lib/twitter_cldr/transforms/conversion_rule_set.rb
# encoding: UTF-8
# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0
# http://unicode.org/reports/tr35/tr35-general.html#Transforms
# http://unicode.org/cldr/utility/transform.jsp
module TwitterCldr
module Transforms
class ConversionRuleSet
attr_reader :filter_rule, :inverse_filter_rule
attr_reader :rules, :rule_index
def initialize(filter_rule, inverse_filter_rule, rules)
@rules = rules
@filter_rule = filter_rule
@inverse_filter_rule = inverse_filter_rule
@rule_index = build_rule_index(rules)
end
def forward?
true
end
def backward?
false
end
def is_filter_rule?
false
end
def is_transform_rule?
false
end
def is_conversion_rule?
false
end
def is_conversion_rule_set?
true
end
def invert
ConversionRuleSet.new(
inverse_filter_rule, filter_rule, inverted_rules
)
end
def apply_to(cursor)
until cursor.eos?
if filter_rule.matches?(cursor)
rule_match = find_matching_rule_at(cursor)
if rule_match
start = rule_match.start
stop = rule_match.stop
replacement = rule_match.replacement
puts "#{cursor.text[start...stop]} -> #{replacement}" if $debug
cursor.text[start...stop] = replacement
cursor.advance(
replacement.size + rule_match.cursor_offset
)
else
cursor.advance
end
else
cursor.advance
end
end
end
private
def inverted_rules
@inverted_rules ||= begin
rules.each_with_object([]) do |rule, ret|
if rule.can_invert?
ret << rule.invert
end
end
end
end
def find_matching_rule_at(cursor)
indexed_match = find_matching_indexed_rule_at(cursor)
blank_key_match = find_matching_blank_key_rule_at(cursor)
if indexed_match
if blank_key_match
if blank_key_match < indexed_match
blank_key_match
else
indexed_match
end
else
indexed_match
end
else
blank_key_match
end
end
def find_matching_indexed_rule_at(cursor)
if rules = rule_index.get(cursor.index_values)
rules.each do |rule|
if side_match = rule.match(cursor)
return RuleMatch.new(rule, side_match)
end
end
end
nil
end
def find_matching_blank_key_rule_at(cursor)
if rules = rule_index.get([0])
rules.each do |rule|
if side_match = rule.match(cursor)
return RuleMatch.new(rule, side_match)
end
end
end
nil
end
def build_rule_index(rules)
TwitterCldr::Utils::Trie.new.tap do |trie|
rules.each_with_index do |rule, idx|
next unless rule.forward?
if rule.has_codepoints?
codepoints = rule.codepoints
if codepoints.size > 0
rule.codepoints.each do |codepoint|
bytes = codepoint.chr('UTF-8').bytes
if entry = trie.get(bytes)
entry << rule
else
trie.add(bytes, [rule])
end
end
end
else
if entry = trie.get([0])
entry << rule
else
trie.add([0], [rule])
end
end
end
end
end
end
end
end