twitter/twitter-cldr-rb

View on GitHub
lib/twitter_cldr/tokenizers/calendars/date_time_tokenizer.rb

Summary

Maintainability
A
0 mins
Test Coverage
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Tokenizers
    class DateTimeTokenizer

      class << self
        def tokenizer
          @tokenizer ||= Tokenizer.new([
            TokenRecognizer.new(:date, /\{\{date\}\}/),
            TokenRecognizer.new(:time, /\{\{time\}\}/),
            TokenRecognizer.new(:plaintext, /'.*'/),
            TokenRecognizer.new(:plaintext, //)
          ])
        end
      end

      attr_reader :data_reader

      def initialize(data_reader)
        @data_reader = data_reader
      end

      def tokenize(pattern)
        expand_tokens(
          PatternTokenizer.new(data_reader, tokenizer).tokenize(pattern)
        )
      end

      # Tokenizes mixed date and time pattern strings,
      # used to tokenize the additional date format patterns.
      def full_tokenize(pattern)
        PatternTokenizer.new(data_reader, full_tokenizer).tokenize(pattern)
      end

      protected

      def expand_tokens(tokens)
        tokens.inject([]) do |ret, token|
          ret + case token.type
            when :date
              expand_date(token)
            when :time
              expand_time(token)
            else
              [token]
          end
        end
      end

      def expand_date(token)
        date_reader = data_reader.date_reader
        date_reader.tokenizer.tokenize(date_reader.pattern)
      end

      def expand_time(token)
        time_reader = data_reader.time_reader
        time_reader.tokenizer.tokenize(time_reader.pattern)
      end

      def full_tokenizer
        @@full_tokenizer ||= begin
          new_tok = Tokenizer.union(
            data_reader.date_reader.tokenizer.tokenizer,
            data_reader.time_reader.tokenizer.tokenizer
          ) do |recognizer|
            recognizer.token_type != :plaintext
          end

          new_tok.recognizers << TokenRecognizer.new(:plaintext, //)
          new_tok
        end
      end

      def tokenizer
        self.class.tokenizer
      end

    end
  end
end