twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/segmentation/suppressions.rb

Summary

Maintainability
A
2 hrs
Test Coverage
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

require 'singleton'

module TwitterCldr
  module Segmentation
    class Suppressions
      include Singleton

      class << self
        def instance(boundary_type, locale)
          resource_path = find_resource(boundary_type, locale)
          return NullSuppressions.instance unless resource_path

          cache[resource_path] ||= begin
            rsrc = TwitterCldr.get_resource(resource_path)

            new(
              Marshal.load(rsrc[:forwards_trie]),
              Marshal.load(rsrc[:backwards_trie])
            )
          end
        end

        private

        def find_resource(boundary_type, locale)
          path = TwitterCldr.resource_file_path(
            ['shared', 'segments', 'suppressions', locale, boundary_type]
          )

          path if TwitterCldr.resource_exists?(path)
        end

        def cache
          @cache ||= {}
        end
      end

      attr_reader :forward_trie, :backward_trie

      def initialize(forward_trie, backward_trie)
        @forward_trie = forward_trie
        @backward_trie = backward_trie
      end

      def should_break?(cursor)
        idx = cursor.position

        # consider case when a space follows the '.' (so we handle i.e. "Mr. Brown")
        idx -= 2 if cursor.codepoint(idx - 1) == 32
        node = backward_trie.root

        found = loop do
          break false if idx < 0 || idx >= cursor.length
          node = node.child(cursor.codepoint(idx))
          break false unless node
          break true if node.value
          idx -= 1
        end

        return true unless found

        node = forward_trie.root

        loop do
          return true if idx >= cursor.length
          node = node.child(cursor.codepoint(idx))
          return true unless node
          return false if node.value
          idx += 1
        end
      end
    end
  end
end