twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/transforms/conversions/side.rb

Summary

Maintainability
A
1 hr
Test Coverage
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Transforms
    module Conversions

      SideMatch = Struct.new(:before_offset, :key_offset, :after_offset, :captures) do
        def start
          key_offset.first
        end

        def stop
          key_offset.last
        end
      end

      class Side
        attr_reader :before_context, :key
        attr_reader :after_context, :cursor_offset

        def initialize(before_context, key, after_context, cursor_offset)
          @before_context = before_context
          @key = key
          @after_context = after_context
          @cursor_offset = cursor_offset
        end

        def match(cursor)
          if before_match = match_before(cursor)
            if key_match = match_key(cursor, before_match)
              if after_match = match_after(cursor, key_match)
                SideMatch.new(
                  before_match.offset(0),
                  key_match.offset(0),
                  after_match.offset(0),
                  before_match.captures +
                    key_match.captures +
                    after_match.captures
                )
              end
            end
          end
        end

        def match_before(cursor)
          cursor.text.scan(before_context_regexp) do
            match = Regexp.last_match
            if match.end(0) >= cursor.position && match.begin(0) <= cursor.position
              return match
            end
          end
          nil
        end

        def match_key(cursor, before_match)
          if match = key_regexp.match(cursor.text, before_match.end(0))
            match if match.begin(0) == before_match.end(0)
          end
        end

        def match_after(cursor, key_match)
          if match = after_context_regexp.match(cursor.text, key_match.end(0))
            match if match.begin(0) == key_match.end(0)
          end
        end

        def has_codepoints?
          if first_elem = key_u_regexp.elements.first
            first_elem.respond_to?(:codepoints) &&
              !first_elem.codepoints.empty?
          else
            false
          end
        end

        def codepoints
          if first_elem = key_u_regexp.elements.first
            first_elem.codepoints
          else
            []
          end
        end

        private

        def before_context_regexp
          @before_context_regexp ||= compile_regexp(before_context).to_regexp
        end

        def key_u_regexp
          @key_u_regexp ||= compile_regexp(Rule.regexp_token_string(key))
        end

        def key_regexp
          @key_regexp ||= key_u_regexp.to_regexp
        end

        def after_context_regexp
          @after_context_regexp ||= compile_regexp(after_context).to_regexp
        end

        # This is a pretty big hack. The problem we're trying to solve here
        # is that regular negated character classes don't match the ends of
        # strings. CLDR's transform rules expect the after context to match
        # the end of a string, since, in a sense, "nothing" is always part
        # of a negated character class. In other words, "I want to match on
        # anything but these specific characters" should also include _no_
        # characters. Accordingly, this function adds "\z" to the ends of
        # negated character classes. Hopefully this works for all cases.
        def compile_regexp(regexp_str)
          TwitterCldr::Shared::UnicodeRegex.compile(regexp_str).tap do |re|
            re.elements.replace(
              re.elements.flat_map do |element|
                new_elem = case element.type
                when :character_class
                  if element.negated?
                    repl = TwitterCldr::Shared::UnicodeRegex.compile(
                      "(?:#{element.to_regexp_str[3..-2]}|\\z)"
                    )

                    repl.elements
                  end
                end

                new_elem || element
              end
            )
          end
        end

      end

    end
  end
end