rosette-proj/rosette-core

View on GitHub
lib/rosette/core/extractor/commit_processor.rb

Summary

Maintainability
A
1 hr
Test Coverage
# encoding: UTF-8

java_import 'org.eclipse.jgit.revwalk.RevWalk'

module Rosette
  module Core

    # Extracts phrases from a git commit. Should be thread-safe.
    #
    # @!attribute [r] config
    #   @return [Configurator] the Rosette config to use.
    # @!attribute [r] error_reporter
    #   @return [ErrorReporter] the error reporter to report syntax
    #     errors, etc to.
    #
    # @example
    #   processor = CommitProcessor.new(configuration)
    #   processor.process_each_phrase('my_repo', 'master') do |phrase|
    #     puts phrase.key
    #   end
    class CommitProcessor
      attr_reader :config, :error_reporter

      # Creates a new processor.
      #
      # @param [Configurator] config The Rosette config to use.
      # @param [ErrorReporter] error_reporter The error reporter to report
      #   syntax errors, etc to.
      def initialize(config, error_reporter = NilErrorReporter.instance)
        @config = config
        @error_reporter = error_reporter
      end

      # Extracts translatable phrases from the given ref and yields them
      # sequentially to the given block. If no block is given, this method
      # returns an +Enumerator+.
      #
      # @param [String] repo_name The name of the repository to extract
      #   translatable phrases from. Must be configured in +config+.
      # @param [String] commit_ref The git ref or commit id to extract
      #   translatable phrases from.
      # @raise [Java::OrgEclipseJgitErrors::MissingObjectException]
      # @return [void, Enumerator] either nothing if a block is given or
      #   an instance of +Enumerator+ if no block is given.
      # @yield [phrase] a single extracted phrase.
      # @yieldparam phrase [Phrase]
      def process_each_phrase(repo_name, commit_ref)
        if block_given?
          repo_config = config.get_repo(repo_name)
          rev_walk = RevWalk.new(repo_config.repo.jgit_repo)
          diff_finder = DiffFinder.new(repo_config.repo.jgit_repo, rev_walk)
          commit = repo_config.repo.get_rev_commit(commit_ref, rev_walk)

          diff_finder.diff_with_parents(commit).each_pair do |_, diff_entries|
            diff_entries.each do |diff_entry|
              if diff_entry.getNewPath != '/dev/null'
                process_diff_entry(diff_entry, repo_config, commit) do |phrase|
                  yield phrase
                end
              end
            end
          end
        else
          to_enum(__method__, repo_name, commit_ref)
        end
      end

      protected

      def process_diff_entry(diff_entry, repo_config, commit)
        repo_config.get_extractor_configs(diff_entry.getNewPath).each do |extractor_config|
          source_code = read_object_from_entry(diff_entry, repo_config, extractor_config)
          line_numbers_to_author = repo_config.repo.blame(diff_entry.getNewPath, commit.getId.name)

          begin
            extractor_config.extractor.extract_each_from(source_code) do |phrase, line_number|
              phrase.file = diff_entry.getNewPath
              phrase.commit_id = commit.getId.name

              if extractor_config.extractor.supports_line_numbers?
                if author_identity = line_numbers_to_author[line_number - 1]
                  phrase.author_name = author_identity.getName
                  phrase.author_email = author_identity.getEmailAddress
                  phrase.line_number = line_number
                end
              end

              yield phrase
            end
          rescue SyntaxError => e
            error_reporter.report_error(
              ExtractionSyntaxError.new(
                e.message, e.original_exception, e.language,
                diff_entry.getNewPath, commit.getId.name
              )
            )
          end
        end
      end

      def read_object_from_entry(diff_entry, repo_config, extractor_config)
        object_reader = repo_config.repo.jgit_repo.newObjectReader
        bytes = object_reader.open(diff_entry.getNewId.toObjectId).getBytes
        Java::JavaLang::String.new(bytes, extractor_config.encoding.to_s).to_s
      end
    end

  end
end