smugglys/translatomatic

View on GitHub
lib/translatomatic/provider/microsoft.rb

Summary

Maintainability
A
0 mins
Test Coverage
require 'builder'

module Translatomatic
  module Provider
    # Interface to the Microsoft translation API
    # @see https://www.microsoft.com/en-us/translator/translatorapi.aspx
    # @see http://docs.microsofttranslator.com/text-translate.html
    class Microsoft < Base
      define_option :microsoft_api_key,
                    desc: t('provider.microsoft.api_key'), use_env: true

      # (see Base.supports_alternative_translations?)
      def self.supports_alternative_translations?
        true
      end

      # (see Base.supports_no_translate_html?)
      def self.supports_no_translate_html?
        true
      end

      # Create a new Microsoft provider instance
      def initialize(options = {})
        super(options)
        @key = options[:microsoft_api_key] || ENV['MICROSOFT_API_KEY']
        raise t('provider.microsoft.key_required') if @key.nil?
      end

      # (see Base#languages)
      def languages
        @languages ||= fetch_languages
      end

      private

      BASE_URL = 'https://api.microsofttranslator.com/V2/Http.svc'.freeze
      # this endpoint returns one translation per source text
      TRANSLATE_URL1 = "#{BASE_URL}/TranslateArray".freeze
      LIMITS_URL1 = [2000, 10_000].freeze # strings, characters per request
      # this url returns multiple translations
      TRANSLATE_URL2 = "#{BASE_URL}/GetTranslationsArray".freeze
      LIMITS_URL2 = [10, 10_000].freeze # strings, characters per request
      MAX_TRANSLATIONS = 10 # for URL2
      LANGUAGES_URL = "#{BASE_URL}/GetLanguagesForTranslate".freeze
      ARRAYS_NS = 'http://schemas.microsoft.com/2003/10/Serialization/Arrays'.freeze
      WEB_SERVICE_NS = 'http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2'.freeze

      def perform_translate(strings, from, to)
        # get multiple translations for strings with context, so we
        # can choose the best translation.
        strings_with_context = strings.select { |i| context?(i) }
        strings_without_context = strings.reject { |i| context?(i) }

        fetch_translation_array(strings_with_context, from, to, true)
        fetch_translation_array(strings_without_context, from, to, false)
      end

      # fetch translations for given strings
      def fetch_translation_array(strings, from, to, multiple)
        limit = multiple ? LIMITS_URL2 : LIMITS_URL1
        batcher(strings, max_count: limit[0], max_length: limit[1])
          .each_batch do |texts|
          translate_texts(texts, from, to, multiple)
        end
      end

      def translate_texts(texts, from, to, multiple)
        url = multiple ? TRANSLATE_URL2 : TRANSLATE_URL1
        headers = { 'Ocp-Apim-Subscription-Key' => @key }
        body = build_body_xml(texts, from, to, multiple)
        response = http_client.post(url, body,
                                    headers: headers,
                                    content_type: 'application/xml')
        add_translations_from_response(response, texts, multiple)
      end

      def add_translations_from_response(response, texts, multiple)
        doc = Nokogiri::XML(response.body)
        if multiple
          add_translations_from_response_multiple(doc, texts)
        else
          results = doc.search('//xmlns:TranslatedText').collect(&:content)
          texts.zip(results).each do |original, tr|
            add_translations(original, tr)
          end
        end
      end

      def add_translations_from_response_multiple(doc, texts)
        # there should be one GetTranslationsResponse for each string
        responses = doc.search('//xmlns:GetTranslationsResponse')
        texts.zip(responses).each do |original, tr|
          results = tr.search('TranslatedText').collect(&:content)
          add_translations(original, results)
        end
      end

      def fetch_languages
        # this request redirects to a html page
        headers = { 'Ocp-Apim-Subscription-Key' => @key }
        query = { 'appid' => '' }
        response = http_client.get(LANGUAGES_URL, query, headers: headers)
        log.debug("#{name} response: #{response.body}")
        doc = Nokogiri::XML(response.body)
        doc.search('//xmlns:string').collect(&:content)
      end

      def xml_root(multiple)
        multiple ? 'GetTranslationsArray' : 'TranslateArray'
      end

      def build_body_xml(strings, from, to, multiple)
        root = xml_root(multiple) + 'Request'
        xml = Builder::XmlMarkup.new
        xml.tag!(root, 'xmlns:a' => ARRAYS_NS) do
          xml.AppId
          xml.From(from)
          build_options_xml(xml) if multiple
          build_texts_xml(xml, strings)
          xml.To(to)
          xml.MaxTranslations MAX_TRANSLATIONS if multiple
        end
      end

      def build_texts_xml(xml, strings)
        xml.Texts do
          strings.each do |string|
            xml.tag!('a:string', string)
          end
        end
      end

      def build_options_xml(xml)
        xml.tag!('Options', 'xmlns:o' => WEB_SERVICE_NS) do
          xml.tag!('o:IncludeMultipleMTAlternatives', 'true')
        end
      end

      def context?(text)
        text.is_a?(Translatomatic::Text) && text.context.present?
      end
    end
  end
end