lib/twitter_cldr/parsers/unicode_regex/character_set.rb
# encoding: UTF-8
# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0
module TwitterCldr
module Parsers
class UnicodeRegexParser
# Can exist inside and outside of character classes
class CharacterSet < Component
include TwitterCldr::Shared
attr_reader :property_name, :property_value
def initialize(text)
if (name_parts = text.split("=")).size == 2
@property_name, @property_value = name_parts
else
@property_value = text
end
end
def to_regexp_str
set_to_regex(to_set)
end
def to_set
codepoints.subtract(
TwitterCldr::Shared::UnicodeRegex.invalid_regexp_chars
)
end
def to_s
if property_value
"[:#{property_name}=#{property_value}:]"
else
"[:#{property_name}:]"
end
end
def type
:character_set
end
private
def codepoints
code_points = CodePoint.code_points_for_property(
*normalized_property
)
if code_points.empty?
raise UnicodeRegexParserError,
"Couldn't find property '#{property_name}' containing "\
"property value '#{property_value}'"
end
code_points
end
private
def normalized_property
property_value_candidates.each do |property_value|
prop_name, prop_value = normalized_property_name(
property_value, property_name_candidates
)
if prop_name
return [prop_name, prop_value]
end
end
[nil, nil]
end
def normalized_property_name(property_value, property_name_candidates)
property_name_candidates.each do |property_name|
prop_name, prop_value = CodePoint.properties.normalize(
property_name, property_value
)
if prop_name
return [prop_name, prop_value]
end
end
[nil, nil]
end
def property_name_candidates
if property_name
[property_name]
else
[property_value, 'General_Category', 'Script']
end
end
def property_value_candidates
if property_name && property_value
[property_value]
else
[property_value, nil].uniq
end
end
end
end
end
end