twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/formatters/numbers/rbnf/post_processors/chinese.rb

Summary

Maintainability
C
7 hrs
Test Coverage
# This code was ported from the java version available here:
# http://grepcode.com/file/repo1.maven.org/maven2/com.ibm.icu/icu4j/51.2/com/ibm/icu/text/RBNFChinesePostProcessor.java

# This code is incomplete, untested, and unused. It should remain here until
# I can figure out why it's necessary in ICU and wether to make use of it here or not.

RULE_SET_NAMES = ["traditional", "simplified", "accounting", "time"]
DIAN = 40670  # decimal point
MARKERS = [
  [33836, 20740, 20806, 12295], # marker chars, last char is the 'zero'
  [19975, 20159, 20806, 12295],
  [33836, 20740, 20806, 38646]
  # need markers for time?
]

def process(str, rule_set)
  # markers depend on what rule set we are using
  buf = str.unpack("U*")

  name = rule_set.name
  format = RULE_SET_NAMES.find_index { |rule_set_name| rule_set.name == rule_set_name }
  long_form = format == 1 || format == 3

  if long_form
    i = buf.index("*".ord)
    while i != -1
      buf.delete(i...i + 1)
      i = buf.index("*".ord)
    end
  else

    # remove unwanted lings
    # a '0' (ling) with * might be removed
    # mark off 10,000 'chunks', markers are Z, Y, W (zhao, yii, and wan)
    # already, we avoid two lings in the same chunk -- ling without * wins
    # now, just need  to avoid optional lings in adjacent chunks
    # process right to left

    # decision matrix:
    # state, situation
    #     state         none       opt.          req.
    #     -----         ----       ----          ----
    # none to right     none       opt.          req.
    # opt. to right     none   clear, none  clear right, req.
    # req. to right     none   clear, none       req.

    # mark chunks with '|' for convenience
    m = MARKERS[format]
    0.upto(m.length - 2) do |i|
      n = buf.index(m[i])
      if n != -1
        buf.insert(n + m[i].length, '|'.ord)
      end
    end

    x = buf.index(DIAN)
    x = buf.length if x == -1

    s = 0   # 0 = none to right, 1 = opt. to right, 2 = req. to right
    n = -1  # previous optional ling
    ling = MARKERS[format][3]

    while x >= 0
      m = buf.rindex("|", x)
      nn = buf.rindex(ling, x)
      ns = 0

      if nn > m
        ns = (nn > 0 && buf[nn - 1] != '*'.ord) ? 2 : 1
      end

      x = m - 1

      # actually much simpler, but leave this verbose for now so it's easier to follow
      case s * 3 + ns
        when 0 # none, none
          s = ns # redundant
          n = -1
        when 1 # none, opt.
          s = ns
          n = nn # remember optional ling to right
        when 2 # none, req.
          s = ns
          n = -1
        when 3 # opt., none
          s = ns
          n = -1
        when 4 # opt., opt.
          # n + ling.length
          buf.delete((nn - 1)...(nn + 1)) # delete current optional ling
          s = 0
          n = -1
        when 5 # opt., req.
          # n + ling.length
          buf.delete((n - 1)...(n + 1)) # delete previous optional ling
          s = ns
          n = -1
        when 6 # req., none
          s = ns
          n = -1
        when 7 # req., opt.
          # nn + ling.length
          buf.delete((nn - 1)...(nn + 1)) # delete current optional ling
          s = 0
          n = -1
        when 8 # req., req.
          s = ns
          n = -1
        else
          raise "Illegal state"
      end
    end

    buf.length.downto(0) do |i|
      if buf[i] == "*".ord || buf[i] == "|".ord
        buf.delete(i...i + 1)
      end
    end
  end

  buf.pack("U*")
end