twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/resources/bidi_test_importer.rb

Summary

Maintainability
A
2 hrs
Test Coverage
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Resources
    # This class should be used with JRuby in 1.9 mode

    class BidiTestImporter < Importer
      BIDI_TEST_FILE = 'ucd/BidiTest.txt'  # this file is about 3.4 MB
      OUT_FILE = 'classpath_bidi_test.txt'
      DIRECTIONS = [nil, :LTR, :RTL]

      requirement :unicode, '6.1.0', ['ucd/BidiTest.txt']
      output_path File.join(TwitterCldr::SPEC_DIR, 'bidi')
      ruby_engine :jruby

      def execute
        generate_test
      end

      private

      def before_prepare
        require 'java'
        java_import 'java.lang.Character'
        java_import 'classpath.Bidi'
      end

      def source_file
        requirements[:unicode].source_path_for(BIDI_TEST_FILE)
      end

      def output_file
        File.join(params.fetch(:output_path), File.basename(BIDI_TEST_FILE))
      end

      def generate_test
        run_hash = {}

        File.open(source_file, 'r').each_line do |ln|
          cur_line = ln.strip

          case cur_line[0]
            when '#', '@'
              next
            else
              input, bitset = cur_line.split('; ')

              expand_bitset_str(bitset).each_with_index do |check, index|
                if check
                  types = input.split(" ")
                  direction = get_java_direction(DIRECTIONS[index])
                  bidi = Java::Classpath::Bidi.new(types_to_string(types), direction)
                  levels = types.each_with_index.map { |_, idx| bidi.getLevelAt(idx) }
                  reorder_arr = Java::Classpath::Bidi.reorderVisually(levels.dup, 0, (0...types.size).to_a, 0, types.size).to_a

                  key = "#{levels.join(" ")} | #{reorder_arr.join(" ")}"
                  run_hash[key] ||= {}
                  run_hash[key][input] ||= 0
                  run_hash[key][input] |= (2 ** index)
                end
              end
          end
        end

        File.open(output_file, 'w+') do |out|
          run_hash.each_pair do |levels_and_reorders, inputs|
            levels, reorders = levels_and_reorders.split(' | ')
            out.write("@Levels: #{levels}\n")
            out.write("@Reorder: #{reorders}\n")
            inputs.each_pair do |input, bitset|
              out.write("#{input}; #{bitset}\n")
            end
          end
        end
      end

      def expand_bitset_str(bitset)
        bitset.to_i.to_s(2).rjust(3, '0').chars.to_a.map { |i| i == '1' }.reverse
      end

      def get_java_direction(dir)
        case dir
          when :RTL
            Java::Classpath::Bidi::DIRECTION_RIGHT_TO_LEFT
          when :LTR
            Java::Classpath::Bidi::DIRECTION_LEFT_TO_RIGHT
          else
            Java::Classpath::Bidi::DIRECTION_DEFAULT_LEFT_TO_RIGHT
        end
      end

      def types_to_string(types)
        @utf_map ||= {
          'L'   => "\u0041",
          'LRE' => "\u202a",
          'LRO' => "\u202d",
          'R'   => "\u05be",
          'AL'  => "\u0626",
          'RLE' => "\u202b",
          'RLO' => "\u202e",
          'PDF' => "\u202c",
          'EN'  => "\u0030",
          'ET'  => "\u0023",
          'AN'  => "\u0667",
          'CS'  => "\u002c",
          'NSM' => "\u0300",
          'BN'  => "\u0000",
          'B'   => "\u0085",
          'S'   => "\u0009",
          'WS'  => "\u000c",
          'ON'  => "\u0021"
        }

        # java 1.6 and 1.7 report different representative characters for the "ES" bidi class
        @utf_map['ES'] = (Character.getDirectionality(0x002b) == Character::DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR) ? "\u002b" : "\u002f"
        types.inject('') { |ret, type| ret << @utf_map[type]; ret }
      end

    end
  end
end