smugglys/translatomatic

View on GitHub
lib/translatomatic/translation/collection.rb

Summary

Maintainability
A
0 mins
Test Coverage
require 'set'

module Translatomatic
  module Translation
    # Stores results of translations.
    # For each original text, there may be zero or more translations from
    # one or more providers.
    class Collection
      # Create a translation collection
      def initialize
        # by_provider[provider] = [Result, ...]
        @by_provider = {}
        # by_original[text] = [Result, ...]
        @by_original = {}
      end

      # @param string [String,Text] Original string
      # @return [Array<Result>] All translations for the given string
      def [](string)
        @by_original[string.to_s]
      end

      def empty?
        @by_original.empty?
      end

      # @param string [String,Text] Original string
      # @param locale [Locale] Target locale
      # @return [Result] The best translation for the given string
      def get(string, locale)
        locale = build_locale(locale)
        list = @by_original[string.to_s] || []
        list = sort_by_best_match(list)
        if string.is_a?(Translatomatic::Text) && string.context
          # string has a context
          list = sort_by_context_match(list, string.context, locale)
        end
        list.find { |i| i.result.locale == locale }
      end

      # Add a list of translations to the collection
      # @param translations [Array<Result>] Translation results
      # @return [void]
      def add(translations)
        translations = [translations] unless translations.is_a?(Array)
        translations.each do |tr|
          next if tr.result.nil?
          add_to_list(@by_provider, tr.provider, tr)
          add_to_list(@by_original, tr.original, tr)
        end
      end

      # @return [Number] The number of translations
      def count
        translations.length
      end

      # @return [Boolean] True if there is one or more translations
      def present?
        count > 0
      end

      # Get translations from this collection. If provider is specified,
      # returns only translations from the given provider, otherwise all
      # translations are returned.
      # @param provider [String] Optional name of a provider.
      # @return [Array<Result>] Translation results
      def translations(provider = nil)
        if provider.nil?
          @by_provider.values.flatten
        else
          @by_provider[provider.to_s] || []
        end
      end

      # Get a list of the best sentence translations for the given
      # parent string.
      # @param parent [Text] Parent text
      # @param locale [Locale] Target locale
      # @return [Array<Result>] Substring translation results
      def sentences(parent, locale)
        parent.sentences.collect do |sentence|
          # get translation for sentence
          translation = get(sentence, locale)
          # create a new translation with the sentence as the original
          # string, so that we can rely on the offset value.
          if translation
            Result.new(sentence, translation.result, translation.provider,
                       from_database: translation.from_database)
          end
        end.compact
      end

      # Return a new collection with only the translations that came from
      # providers (not from the database).
      # @return [Collection] The collection result
      def from_providers
        result = self.class.new
        provider_translations = translations.reject(&:from_database)
        result.add(provider_translations)
        result
      end

      # @return [Array<Result>] Best translations for each string
      # @param locale [Locale] Target locale
      def best_translations(locale)
        @by_original.keys.collect { |i| get(i, locale) }
      end

      # @return [Array<String>] A list of providers that translations were
      #   sourced from.
      def providers
        @by_provider.keys
      end

      # Combine this collection with another
      # @param other [Collection] Another collection
      # @return [Collection] The collection result
      def +(other)
        result = self.class.new
        @by_provider.each_value { |i| result.add(i) }
        other.translations.each { |i| result.add(i) }
        result
      end

      # @param string [String,Text] Original string
      # @param provider [String] Provider name
      # @return [boolean] True if there is a translation for the given string.
      def translated?(string, provider)
        list = @by_provider[provider.to_s] || []
        list.any? { |tr| tr.original.to_s == string.to_s }
      end

      # @return [String] String description of all translations
      def description
        translations.collect(&:description).join("\n")
      end

      private

      include Translatomatic::Util

      # sort the list of translations by comparing to the translation for
      # the string context.
      # for example:
      #   context is 'go right' with a translation of 'Geh rechts'.
      #   translation of 'right' is 'rechts', and 'richtig'.
      #   translation 'rechts' will be ordered first, as 'rechts' is in
      #     the translated context string (and is longer than 'recht').
      def sort_by_context_match(list, context, locale)
        return list if list.blank?
        context_results = context_translation_results(list, context, locale)
        # log.debug("context translations: #{context_results}")
        # put translations that include the context string(s) first.
        # also put longer translations that include the context string first.
        # (fixes matching 'rechts' before 'recht')
        list.sort_by do |tr|
          tr_result = tr.result.downcase
          ctx_match = context_results.any? { |ctx| ctx.include?(tr_result) }
          ctx_match ? -tr_result.length : 1
        end
      end

      # sort the list of translations by finding the most common
      # translation. if there is an equal number of different translations
      # use the first most common translation (first provider).
      def sort_by_best_match(list)
        by_count = {}
        list.each do |tr|
          key = tr.result.downcase
          by_count[key] = (by_count[key] || 0) + 1
        end

        # not using sort_by to maintain original sort order
        # unless count is different.
        list.sort do |tr1, tr2|
          by_count[tr1.result.downcase] <=> by_count[tr2.result.downcase]
        end
      end

      # @param list [Array<Result>] translations
      # @param context [Array<String>] context(s)
      # @return [Array<String>] lowercased context translation strings
      def context_translation_results(list, context, locale)
        context_locale = list[0].original.locale
        context = [context] unless context.is_a?(Array)
        context.collect do |ctx|
          context_tr = get(Text[ctx, context_locale], locale)
          context_tr.result.downcase if context_tr
        end.compact
      end

      def add_to_list(hash, key, value)
        list = hash[key.to_s] ||= []
        list << value
      end
    end
  end
end