twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/shared/locale.rb

Summary

Maintainability
C
1 day
Test Coverage
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Shared
    class Locale

      class << self
        # http://unicode.org/reports/tr35/tr35-9.html#Likely_Subtags
        #
        # 1. Make sure the input locale is in canonical form: uses the right
        #    separator, and has the right casing.
        #
        # 2. Replace any deprecated subtags with their canonical values using
        #    the <alias> data in supplemental metadata. Use the first value in
        #    the replacement list, if it exists.
        #
        # 3. If the tag is grandfathered (see <variable id="$grandfathered"
        #    type="choice"> in the supplemental data), then return it.
        #    (NOTE: grandfathered subtags are no longer part of CLDR)
        #
        # 4. Remove the script code 'Zzzz' and the region code 'ZZ' if they
        #    occur; change an empty language subtag to 'und'.
        #
        # 5. Get the components of the cleaned-up tag (language¹, script¹, and
        #    region¹), plus any variants if they exist (including keywords).
        def parse(locale_text)
          locale_text = locale_text.to_s.strip

          normalize(locale_text).tap do |locale|
            replace_aliased_subtags(locale)
            remove_placeholder_tags(locale)
          end
        end

        def valid?(locale_text)
          # make sure all subtags have at least one identity, i.e. they exist
          # in one of the language/script/region/variant lists
          identify_subtags(locale_text.strip).all? do |subtag|
            !subtag.last.empty?
          end
        end

        def parse_likely(locale_text)
          LikelySubtags.locale_for(locale_text)
        end

        def split(locale_text)
          locale_text.strip.split(/[-_ ]/)
        end

        private

        def normalize(locale_text)
          Locale.new(nil).tap do |locale|
            subtags = identify_subtags(locale_text)

            until subtags.empty?
              subtag, identities = subtags.shift
              next if identities.empty?

              identities.each do |identity|
                unless subtag_set?(locale, identity)
                  set_subtag(locale, identity, subtag)
                  break
                end
              end
            end
          end
        end

        def subtag_set?(locale, identity)
          case identity
            when :variant
              false
            else
              !!locale.send(identity)
          end
        end

        def set_subtag(locale, identity, subtag)
          case identity
            when :variant
              locale.variants << normalize_subtag(subtag, identity)
            else
              locale.send(
                :"#{identity}=", normalize_subtag(subtag, identity)
              )
          end
        end

        def identify_subtags(locale_text)
          split(locale_text).map do |subtag|
            identities = identify_subtag(subtag)
            [subtag, identities]
          end
        end

        def identify_subtag(subtag)
          [].tap do |types|
            types << :language if language?(subtag)
            types << :script   if script?(subtag)
            types << :region   if region?(subtag)
            types << :variant  if variant?(subtag)

            types << :language if language?(normalize_subtag(subtag, :language))
            types << :script   if script?(normalize_subtag(subtag, :script))
            types << :region   if region?(normalize_subtag(subtag, :region))
            types << :variant  if variant?(normalize_subtag(subtag, :variant))
          end
        end

        def language?(subtag)
          languages.include?(subtag) || language_aliases.include?(subtag.to_sym)
        end

        def script?(subtag)
          scripts.include?(subtag) ||
            !!PropertyValueAliases.long_alias_for('sc', subtag)
        end

        def region?(subtag)
          territories.include?(subtag) || region_aliases.include?(subtag.to_sym)
        end

        def variant?(subtag)
          subtag = normalize_subtag(subtag, :variant)
          variants.include?(subtag)
        end

        def region_aliases
          @region_aliases ||= aliases_resource[:territory].each_with_object({}) do |(_, aliases), ret|
            ret.merge!(aliases)
          end
        end

        def language_aliases
          @language_aliases ||= aliases_resource[:language].each_with_object({}) do |(_, aliases), ret|
            ret.merge!(aliases)
          end
        end

        def normalize_subtag(subtag, identity)
          case identity
            when :language
              subtag.downcase
            when :script
              subtag.capitalize
            when :region, :variant
              subtag.upcase
          end
        end

        def replace_aliased_subtags(locale)
          replace_aliased_language_subtags(locale)
          replace_aliased_region_subtags(locale)
        end

        def replace_aliased_language_subtags(locale)
          language = locale.language ? locale.language.to_sym : nil
          if found_alias = language_aliases[language]
            locale.language = found_alias
          end
        end

        def replace_aliased_region_subtags(locale)
          region = locale.region ? locale.region.to_sym : nil
          if found_alias = region_aliases[region]
            locale.region = found_alias
          end
        end

        def remove_placeholder_tags(locale)
          locale.script = nil if locale.script == 'Zzzz'
          locale.region = nil if locale.region == 'ZZ'
          locale.language ||= 'und'
        end

        def languages
          @languages ||= [:regular, :special].flat_map do |type|
            validity_resource[:languages][type]
          end
        end

        def scripts
          @scripts ||= [:regular, :special].flat_map do |type|
            validity_resource[:scripts][type]
          end
        end

        def territories
          @territories ||= [:regular, :special, :macroregion].flat_map do |type|
            validity_resource[:regions][type]
          end
        end

        def variants
          validity_resource[:variants][:regular]
        end

        def aliases_resource
          @aliases_resource ||=
            TwitterCldr.get_resource('shared', 'aliases')[:aliases]
        end

        def validity_resource
          @validity_resource ||=
            TwitterCldr.get_resource('shared', 'validity_data')[:validity_data]
        end

        def parent_locales
          @parent_locales ||= TwitterCldr.get_resource('shared', 'parent_locales')
        end
      end

      attr_accessor :language, :script, :region, :variants

      def initialize(language, script = nil, region = nil, variants = [])
        @language = language ? language.to_s : nil
        @script = script ? script.to_s : nil
        @region = region ? region.to_s : nil
        @variants = Array(variants)
      end

      def full_script
        # fall back to abbreviated script if long alias can't be found
        @full_script ||= PropertyValueAliases.long_alias_for('sc', script) || script
      end

      def abbreviated_script
        @short_script ||= PropertyValueAliases.abbreviated_alias_for('sc', script) || script
      end

      def maximize
        LikelySubtags.locale_for(to_s)
      end

      def max_supported
        @max_supported ||= maximize.supported
      end

      def supported
        @supported ||= begin
          ancestor_chain.sort.find do |loc|
            TwitterCldr.supported_locale?(loc.dasherized)
          end
        end
      end

      def dasherized
        join('-')
      end

      def join(delimiter = '_')
        to_a.join(delimiter)
      end

      alias :underscored :join
      alias :to_s :join

      def to_a
        ([language, script, region] + variants).compact
      end

      def permutations(delimiter = '_')
        perms = [
          [language, script, region].compact.join(delimiter),
          [language, script].compact.join(delimiter),
          [language, region].compact.join(delimiter),
          language,
        ]

        perms.uniq
      end

      def ==(other)
        language == other.language &&
          script == other.script &&
          region == other.region &&
          variants == other.variants
      end

      alias eql? ==

      def hash
        to_a.hash
      end

      def sort_key
        k = 0
        k += 3 if language
        k += 2 if script
        k += 1 if region
        k
      end

      def <=>(other)
        other.sort_key <=> sort_key
      end

      def ancestor_chain
        ancestry = [self]
        remaining = [self]

        until remaining.empty?
          locale = remaining.pop

          if parent = self.class.send(:parent_locales)[locale.to_s]
            parent = self.class.parse(parent)
            ancestry << parent
            remaining << parent
          else
            parents = locale.permutations.map { |p| self.class.parse(p) }
            remaining += parents - ancestry
            ancestry += parents - ancestry
          end
        end

        ancestry
      end

    end
  end
end