twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/transforms/conversion_rule_set.rb

Summary

Maintainability
B
5 hrs
Test Coverage
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

# http://unicode.org/reports/tr35/tr35-general.html#Transforms
# http://unicode.org/cldr/utility/transform.jsp

module TwitterCldr
  module Transforms

    class ConversionRuleSet
      attr_reader :filter_rule, :inverse_filter_rule
      attr_reader :rules, :rule_index

      def initialize(filter_rule, inverse_filter_rule, rules)
        @rules = rules
        @filter_rule = filter_rule
        @inverse_filter_rule = inverse_filter_rule
        @rule_index = build_rule_index(rules)
      end

      def forward?
        true
      end

      def backward?
        false
      end

      def is_filter_rule?
        false
      end

      def is_transform_rule?
        false
      end

      def is_conversion_rule?
        false
      end

      def is_conversion_rule_set?
        true
      end

      def invert
        ConversionRuleSet.new(
          inverse_filter_rule, filter_rule, inverted_rules
        )
      end

      def apply_to(cursor)
        until cursor.eos?
          if filter_rule.matches?(cursor)
            rule_match = find_matching_rule_at(cursor)

            if rule_match
              start = rule_match.start
              stop = rule_match.stop
              replacement = rule_match.replacement
              puts "#{cursor.text[start...stop]} -> #{replacement}" if $debug
              cursor.text[start...stop] = replacement

              cursor.advance(
                replacement.size + rule_match.cursor_offset
              )
            else
              cursor.advance
            end
          else
            cursor.advance
          end
        end
      end

      private

      def inverted_rules
        @inverted_rules ||= begin
          rules.each_with_object([]) do |rule, ret|
            if rule.can_invert?
              ret << rule.invert
            end
          end
        end
      end

      def find_matching_rule_at(cursor)
        indexed_match = find_matching_indexed_rule_at(cursor)
        blank_key_match = find_matching_blank_key_rule_at(cursor)

        if indexed_match
          if blank_key_match
            if blank_key_match < indexed_match
              blank_key_match
            else
              indexed_match
            end
          else
            indexed_match
          end
        else
          blank_key_match
        end
      end

      def find_matching_indexed_rule_at(cursor)
        if rules = rule_index.get(cursor.index_values)
          rules.each do |rule|
            if side_match = rule.match(cursor)
              return RuleMatch.new(rule, side_match)
            end
          end
        end

        nil
      end

      def find_matching_blank_key_rule_at(cursor)
        if rules = rule_index.get([0])
          rules.each do |rule|
            if side_match = rule.match(cursor)
              return RuleMatch.new(rule, side_match)
            end
          end
        end

        nil
      end

      def build_rule_index(rules)
        TwitterCldr::Utils::Trie.new.tap do |trie|
          rules.each_with_index do |rule, idx|
            next unless rule.forward?

            if rule.has_codepoints?
              codepoints = rule.codepoints

              if codepoints.size > 0
                rule.codepoints.each do |codepoint|
                  bytes = codepoint.chr('UTF-8').bytes

                  if entry = trie.get(bytes)
                    entry << rule
                  else
                    trie.add(bytes, [rule])
                  end
                end
              end
            else
              if entry = trie.get([0])
                entry << rule
              else
                trie.add([0], [rule])
              end
            end
          end
        end
      end
    end

  end
end