twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/shared/properties_database.rb

Summary

Maintainability
A
2 hrs
Test Coverage
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

require 'set'

module TwitterCldr
  module Shared

    class PropertiesDatabase
      include TwitterCldr::Utils

      DEFAULT_ROOT_PATH = File.join(
        TwitterCldr::RESOURCES_DIR, 'unicode_data', 'properties'
      )

      attr_reader :root_path, :trie

      def initialize(root_path = DEFAULT_ROOT_PATH)
        @root_path = root_path
        @trie = FileSystemTrie.new(root_path)
      end

      def store(property_name, property_value, data)
        trie.add(key_for(property_name, property_value), data)
      end

      def code_points_for_property(property_name, property_value = nil)
        key = key_for(property_name, property_value)
        node = trie.get_node(key)

        if node
          if node.value
            node.value
          elsif name_indicates_value_prefix?(property_name)
            concat_children(key)
          else
            RangeSet.new([])
          end
        else
          RangeSet.new([])
        end
      end

      def include?(property_name, property_value = nil)
        values = property_values_for(property_name)

        if property_value
          property_names.include?(property_name) &&
            values && values.include?(property_value)
        else
          property_names.include?(property_name)
        end
      end

      def properties_for_code_point(code_point)
        code_point_cache[code_point] ||=
          PropertySet.new(lookup_code_point(code_point))
      end

      # List of property names
      # @return [Array<String>] Array of property names string
      # @example
      #   TwitterCldr::Shared::CodePoint.properties.property_names
      #   # => ["ASCII_Hex_Digit", "Age", "Alphabetic", … ]
      def property_names
        glob = File.join(root_path, '*')
        @property_names ||= Dir.glob(glob).map do |path|
          File.basename(path)
        end
      end

      # Return possible values for a given property
      # @param property_name [String] Property name
      # @return [Array<String>|nil] List of values Values
      # @example
      #   TwitterCldr::Shared::CodePoint.properties.property_values_for('Script')
      #   # => ["Adlam", "Ahom", "Anatolian_Hieroglyphs", … ]
      # TwitterCldr::Shared::CodePoint.properties.property_values_for('Alphabetic') # => nil
      def property_values_for(property_name)
        if property_values.include?(property_name)
          return property_values[property_name]
        end

        prefix = File.join(root_path, property_name)
        glob = File.join(prefix, "**/**/#{FileSystemTrie::VALUE_FILE}")

        values = Dir.glob(glob).map do |path|
          path = File.dirname(path)[(prefix.length + 1)..-1]
          path.split(File::SEPARATOR).join if path
        end.compact

        if name_indicates_value_prefix?(property_name)
          values += values.map { |v| v[0] }
        end

        property_values[property_name] = if values.length == 0
          nil
        else
          values.uniq
        end
      end

      def each_property_pair
        if block_given?
          property_names.each do |property_name|
            if property_values = property_values_for(property_name)
              property_values.each do |property_value|
                yield property_name, property_value
              end
            else
              yield property_name, nil
            end
          end
        else
          to_enum(__method__)
        end
      end

      def normalize(property_name, property_value = nil)
        normalizer.normalize(property_name, property_value)
      end

      private

      def normalizer
        @normalizer ||= PropertyNormalizer.new(self)
      end

      def lookup_code_point(code_point)
        {}.tap do |properties|
          each_property_pair do |property_name, property_value|
            code_points = code_points_for_property(
              property_name, property_value
            )

            if code_points.include?(code_point)
              if property_value
                properties[property_name] ||= Set.new
                properties[property_name] << property_value
              else
                properties[property_name] = nil
              end
            end
          end
        end
      end

      def property_values
        @property_values ||= {}
      end

      def code_point_cache
        @code_point_cache ||= {}
      end

      def concat_children(key)
        glob = File.join(root_path, *key, "**/**/#{FileSystemTrie::VALUE_FILE}")

        Dir.glob(glob).inject(RangeSet.new([])) do |ret, entry|
          key = File.dirname(entry)
          key = key[(root_path.length + 1)..-1].split(File::SEPARATOR)

          if value = trie.get(key)
            ret.union(value)
          else
            ret
          end
        end
      end

      def key_for(property_name, property_value)
        if property_value
          if name_indicates_value_prefix?(property_name)
            [property_name] + property_value.chars.to_a
          else
            [property_name, property_value]
          end
        else
          [property_name]
        end
      end

      def name_indicates_value_prefix?(property_name)
        property_name == 'General_Category' ||
          property_name == 'gc'
      end
    end

  end
end