twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/parsers/unicode_regex/character_range.rb

Summary

Maintainability
A
0 mins
Test Coverage
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Parsers
    class UnicodeRegexParser

      # Regex character range eg. a-z or 0-9
      # Can only exist in character classes
      class CharacterRange < Component

        attr_reader :initial, :final

        def initialize(initial, final)
          @initial = initial
          @final = final
        end

        # Unfortunately, due to the ambiguity of having both character
        # ranges and set operations in the same syntax (which both use
        # the "-" operator and square brackets), we have to treat
        # CharacterRange as both a token and an operand. This type method
        # helps it behave like a token.
        def type
          :character_range
        end

        def to_set
          TwitterCldr::Utils::RangeSet.new(
            [initial.to_set.to_full_a.first..final.to_set.to_full_a.first]
          )
        end

        def codepoints
          to_set.to_full_a
        end

        def to_regexp_str
          set_to_regex(to_set)
        end

        def to_s
          "#{initial.to_s}-#{final.to_s}"
        end

      end
    end
  end
end