twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/resources/number_formats_importer.rb

Summary

Maintainability
C
1 day
Test Coverage
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

require 'nokogiri'
require 'fileutils'
require 'parallel'
require 'etc'
require 'set'

module TwitterCldr
  module Resources

    class NumberFormatsImporter < Importer

      requirement :cldr, Versions.cldr_version
      output_path 'locales'
      locales TwitterCldr.supported_locales
      ruby_engine :mri

      private

      def execute
        locales = Set.new

        finish = -> (locale, *) do
          locales.add(locale)
          STDOUT.write "\rImported #{locale}, #{locales.size} of #{params[:locales].size} total"
        end

        Parallel.each(params[:locales], in_processes: Etc.nprocessors, finish: finish) do |locale|
          import_locale(locale)
          locales << locale
        end
      end

      def import_locale(locale)
        data = requirements[:cldr].build_data(locale) do |ancestor_locale|
          NumberFormats.new(ancestor_locale, requirements[:cldr]).to_h
        end

        output_file = File.join(output_path, locale.to_s, 'numbers.yml')

        File.open(output_file, 'w:utf-8') do |output|
          output.write(
            TwitterCldr::Utils::YAML.dump(
              TwitterCldr::Utils.deep_symbolize_keys(locale => data),
              use_natural_symbols: true
            )
          )
        end
      end

      def output_path
        params.fetch(:output_path)
      end

    end


    class NumberFormats

      TYPES = %w(decimal scientific percent currency).freeze

      attr_reader :locale, :cldr_req

      def initialize(locale, cldr_req)
        @locale = locale
        @cldr_req = cldr_req
      end

      def to_h
        {
          numbers: {
            symbols: symbols,
            default_number_systems: default_number_systems,
            formats: TYPES.each_with_object({}) do |type, ret|
              ret[type.to_sym] = formats_for_type(type)
            end
          }
        }
      end

      def symbols
        doc.xpath('//ldml/numbers/symbols').each_with_object({}) do |symbols_node, symbols_result|
          number_system = if ns_node = symbols_node.attribute('numberSystem')
            ns_node.value
          else
            :default
          end

          if aliased = symbols_node.xpath('alias').first
            alias_number_system = aliased.attribute('path').value[/@numberSystem='(\w+)'/, 1]
            symbols_result[number_system] = :"numbers.symbols.#{alias_number_system}"
            next
          end

          symbols_result[number_system] = symbols_node.elements.each_with_object({}) do |symbol, symbol_result|
            unless cldr_req.draft?(symbol)
              symbol_name = symbol.name.gsub(/([a-z])([A-Z])/) { "#{$1}_#{$2.downcase}"}
              symbol_result[symbol_name] = symbol.content
            end
          end
        end
      end

      def default_number_systems
        { alternatives: {} }.tap do |result|
          doc.xpath('//ldml/numbers/defaultNumberingSystem').each do |default_ns_node|
            if alt_attr = default_ns_node.attribute('alt')
              result[:alternatives][alt_attr.value] = default_ns_node.content
            else
              result[:default] = default_ns_node.content
            end
          end
        end
      end

      def formats_for_type(type)
        doc.xpath("//ldml/numbers/#{type}Formats").each_with_object({}) do |formats_node, ret|
          number_system = if ns_node = formats_node.attribute('numberSystem')
            ns_node.value
          else
            :default
          end

          if aliased = formats_node.xpath('alias').first
            alias_number_system = aliased.attribute('path').value[/@numberSystem='(\w+)'/, 1]
            ret[number_system] = :"numbers.formats.#{type}.#{alias_number_system}"
            next
          end

          formats = formats_from_node(formats_node, type, number_system)
          formats[:default] = formats[:default][:default] if formats[:default]
          ret[number_system] = formats

          unit = unit_for(formats_node)

          unless unit.empty?
            ret[number_system][:unit] = unit
          end
        end
      end

      def formats_from_node(formats_node, type, number_system)
        formats_node.xpath("#{type}FormatLength").each_with_object({}) do |format_length_node, format_result|
          format_nodes = format_length_node.xpath("#{type}Format")

          format_key = format_length_node.attribute('type')
          format_key = format_key ? format_key.value : :default

          if format_nodes.size > 0
            format_nodes.each do |format_node|
              format_result[format_key] ||= patterns_from(format_node)
            end
          else
            if aliased = format_length_node.xpath('alias').first
              format_result[format_key] = pattern_xpath_to_redirect(
                aliased.attribute('path').value, number_system
              )
            end
          end
        end
      end

      def patterns_from(format_node)
        format_node.xpath('pattern').each_with_object({}) do |pattern_node, pattern_result|
          # CLDR v42 added a few new alt patterns, alphaNextToNumber and noCurrency.
          # See: https://cldr.unicode.org/index/downloads/cldr-42#h.ocxunccgtf28
          next if pattern_node.attribute('alt')

          pattern_key_node = pattern_node.attribute('type')
          pattern_count_node = pattern_node.attribute('count')

          unless cldr_req.draft?(pattern_node)
            pattern_key = pattern_key_node ? pattern_key_node.value : :default

            if pattern_count_node
              pattern_count = pattern_count_node.value

              if pattern_result[pattern_key].nil?
                pattern_result[pattern_key] ||= {}
              elsif !pattern_result[pattern_key].is_a?(Hash)
                raise "can't parse patterns with and without 'count' attribute in the same section"
              end

              pattern_result[pattern_key][pattern_count] = pattern_node.content
            else
              pattern_result[pattern_key] = pattern_node.content
            end
          end
        end
      end

      def pattern_xpath_to_redirect(xpath, number_system)
        length = xpath[/(\w+)FormatLength/, 1]
        type   = xpath[/@type='(\w+)'/, 1]

        :"numbers.formats.#{length}.#{number_system}.#{type}"
      end

      def unit_for(format_length_node)
        format_length_node.xpath('unitPattern').each_with_object({}) do |unit_node, result|
          count = unit_node.attribute('count').value rescue 'one'
          result[count] = unit_node.content
        end
      end

      def doc
        @doc ||= begin
          locale_fs = locale.to_s.gsub('-', '_')
          Nokogiri.XML(File.read(File.join(cldr_main_path, "#{locale_fs}.xml")))
        end
      end

      def cldr_main_path
        @cldr_main_path ||= File.join(cldr_req.common_path, 'main')
      end

    end

  end
end