ruby-git/ruby-git

View on GitHub
lib/git/command_line.rb

Summary

Maintainability
A
1 hr
Test Coverage
# frozen_string_literal: true

require 'git/base'
require 'git/command_line_result'
require 'git/errors'
require 'stringio'

module Git
  # Runs a git command and returns the result
  #
  # @api public
  #
  class CommandLine
    # Create a Git::CommandLine object
    #
    # @example
    #   env = { 'GIT_DIR' => '/path/to/git/dir' }
    #   binary_path = '/usr/bin/git'
    #   global_opts = %w[--git-dir /path/to/git/dir]
    #   logger = Logger.new(STDOUT)
    #   cli = CommandLine.new(env, binary_path, global_opts, logger)
    #   cli.run('version') #=> #<Git::CommandLineResult:0x00007f9b0c0b0e00
    #
    # @param env [Hash<String, String>] environment variables to set
    # @param global_opts [Array<String>] global options to pass to git
    # @param logger [Logger] the logger to use
    #
    def initialize(env, binary_path, global_opts, logger)
      @env = env
      @binary_path = binary_path
      @global_opts = global_opts
      @logger = logger
    end

    # @attribute [r] env
    #
    # Variables to set (or unset) in the git command's environment
    #
    # @example
    #   env = { 'GIT_DIR' => '/path/to/git/dir' }
    #   command_line = Git::CommandLine.new(env, '/usr/bin/git', [], Logger.new(STDOUT))
    #   command_line.env #=> { 'GIT_DIR' => '/path/to/git/dir' }
    #
    # @return [Hash<String, String>]
    #
    # @see https://ruby-doc.org/3.2.1/Process.html#method-c-spawn Process.spawn
    #   for details on how to set environment variables using the `env` parameter
    #
    attr_reader :env

    # @attribute [r] binary_path
    #
    # The path to the command line binary to run
    #
    # @example
    #   binary_path = '/usr/bin/git'
    #   command_line = Git::CommandLine.new({}, binary_path, ['version'], Logger.new(STDOUT))
    #   command_line.binary_path #=> '/usr/bin/git'
    #
    # @return [String]
    #
    attr_reader :binary_path

    # @attribute [r] global_opts
    #
    # The global options to pass to git
    #
    # These are options that are passed to git before the command name and
    # arguments. For example, in `git --git-dir /path/to/git/dir version`, the
    # global options are %w[--git-dir /path/to/git/dir].
    #
    # @example
    #   env = {}
    #   global_opts = %w[--git-dir /path/to/git/dir]
    #   logger = Logger.new(nil)
    #   cli = CommandLine.new(env, '/usr/bin/git', global_opts, logger)
    #   cli.global_opts #=> %w[--git-dir /path/to/git/dir]
    #
    # @return [Array<String>]
    #
    attr_reader :global_opts

    # @attribute [r] logger
    #
    # The logger to use for logging git commands and results
    #
    # @example
    #   env = {}
    #   global_opts = %w[]
    #   logger = Logger.new(STDOUT)
    #   cli = CommandLine.new(env, '/usr/bin/git', global_opts, logger)
    #   cli.logger == logger #=> true
    #
    # @return [Logger]
    #
    attr_reader :logger

    # Execute a git command, wait for it to finish, and return the result
    #
    # NORMALIZATION
    #
    # The command output is returned as a Unicde string containing the binary output
    # from the command. If the binary output is not valid UTF-8, the output will
    # cause problems because the encoding will be invalid.
    #
    # Normalization is a process that trys to convert the binary output to a valid
    # UTF-8 string. It uses the `rchardet` gem to detect the encoding of the binary
    # output and then converts it to UTF-8.
    #
    # Normalization is not enabled by default. Pass `normalize: true` to Git::CommandLine#run
    # to enable it. Normalization will only be performed on stdout and only if the `out:`` option
    # is nil or is a StringIO object. If the out: option is set to a file or other IO object,
    # the normalize option will be ignored.
    #
    # @example Run a command and return the output
    #   cli.run('version') #=> "git version 2.39.1\n"
    #
    # @example The args array should be splatted into the parameter list
    #   args = %w[log -n 1 --oneline]
    #   cli.run(*args) #=> "f5baa11 beginning of Ruby/Git project\n"
    #
    # @example Run a command and return the chomped output
    #   cli.run('version', chomp: true) #=> "git version 2.39.1"
    #
    # @example Run a command and without normalizing the output
    #   cli.run('version', normalize: false) #=> "git version 2.39.1\n"
    #
    # @example Capture stdout in a temporary file
    #   require 'tempfile'
    #   tempfile = Tempfile.create('git') do |file|
    #     cli.run('version', out: file)
    #     file.rewind
    #     file.read #=> "git version 2.39.1\n"
    #   end
    #
    # @example Capture stderr in a StringIO object
    #   require 'stringio'
    #   stderr = StringIO.new
    #   begin
    #     cli.run('log', 'nonexistent-branch', err: stderr)
    #   rescue Git::FailedError => e
    #     stderr.string #=> "unknown revision or path not in the working tree.\n"
    #   end
    #
    # @param args [Array<String>] the command line arguements to pass to git
    #
    #   This array should be splatted into the parameter list.
    #
    # @param out [#write, nil] the object to write stdout to or nil to ignore stdout
    #
    #   If this is a 'StringIO' object, then `stdout_writer.string` will be returned.
    #
    #   In general, only specify a `stdout_writer` object when you want to redirect
    #   stdout to a file or some other object that responds to `#write`. The default
    #   behavior will return the output of the command.
    #
    # @param err [#write] the object to write stderr to or nil to ignore stderr
    #
    #   If this is a 'StringIO' object and `merged_output` is `true`, then
    #   `stderr_writer.string` will be merged into the output returned by this method.
    #
    # @param normalize [Boolean] whether to normalize the output to a valid encoding
    #
    # @param chomp [Boolean] whether to chomp the output
    #
    # @param merge [Boolean] whether to merge stdout and stderr in the string returned
    #
    # @param chdir [String] the directory to run the command in
    #
    # @param timeout [Numeric, nil] the maximum seconds to wait for the command to complete
    #
    #   If timeout is zero, the timeout will not be enforced.
    #
    #   If the command times out, it is killed via a `SIGKILL` signal and `Git::TimeoutError` is raised.
    #
    #   If the command does not respond to SIGKILL, it will hang this method.
    #
    # @return [Git::CommandLineResult] the output of the command
    #
    #   This result of running the command.
    #
    # @raise [ArgumentError] if `args` is not an array of strings
    #
    # @raise [Git::SignaledError] if the command was terminated because of an uncaught signal
    #
    # @raise [Git::FailedError] if the command returned a non-zero exitstatus
    #
    # @raise [Git::ProcessIOError] if an exception was raised while collecting subprocess output
    #
    # @raise [Git::TimeoutError] if the command times out
    #
    def run(*args, out:, err:, normalize:, chomp:, merge:, chdir: nil, timeout: nil)
      git_cmd = build_git_cmd(args)
      out ||= StringIO.new
      err ||= (merge ? out : StringIO.new)
      status = execute(git_cmd, out, err, chdir: (chdir || :not_set), timeout: timeout)

      process_result(git_cmd, status, out, err, normalize, chomp, timeout)
    end

    private

    # Build the git command line from the available sources to send to `Process.spawn`
    # @return [Array<String>]
    # @api private
    #
    def build_git_cmd(args)
      raise ArgumentError.new('The args array can not contain an array') if args.any? { |a| a.is_a?(Array) }

      [binary_path, *global_opts, *args].map { |e| e.to_s }
    end

    # Determine the output to return in the `CommandLineResult`
    #
    # If the writer can return the output by calling `#string` (such as a StringIO),
    # then return the result of normalizing the encoding and chomping the output
    # as requested.
    #
    # If the writer does not support `#string`, then return nil. The output is
    # assumed to be collected by the writer itself such as when the  writer
    # is a file instead of a StringIO.
    #
    # @param writer [#string] the writer to post-process
    #
    # @return [String, nil]
    #
    # @api private
    #
    def post_process(writer, normalize, chomp)
      if writer.respond_to?(:string)
        output = writer.string.dup
        output = output.lines.map { |l| Git::EncodingUtils.normalize_encoding(l) }.join if normalize
        output.chomp! if chomp
        output
      else
        nil
      end
    end

    # Post-process all writers and return an array of the results
    #
    # @param writers [Array<#write>] the writers to post-process
    # @param normalize [Boolean] whether to normalize the output of each writer
    # @param chomp [Boolean] whether to chomp the output of each writer
    #
    # @return [Array<String, nil>] the output of each writer that supports `#string`
    #
    # @api private
    #
    def post_process_all(writers, normalize, chomp)
      Array.new.tap do |result|
        writers.each { |writer| result << post_process(writer, normalize, chomp) }
      end
    end

    # Raise an error when there was exception while collecting the subprocess output
    #
    # @param git_cmd [Array<String>] the git command that was executed
    # @param pipe_name [Symbol] the name of the pipe that raised the exception
    # @param pipe [ProcessExecuter::MonitoredPipe] the pipe that raised the exception
    #
    # @raise [Git::ProcessIOError]
    #
    # @return [void] this method always raises an error
    #
    # @api private
    #
    def raise_pipe_error(git_cmd, pipe_name, pipe)
      raise Git::ProcessIOError.new("Pipe Exception for #{git_cmd}: #{pipe_name}"), cause: pipe.exception
    end

    # Execute the git command and collect the output
    #
    # @param cmd [Array<String>] the git command to execute
    # @param chdir [String] the directory to run the command in
    # @param timeout [Numeric, nil] the maximum seconds to wait for the command to complete
    #
    #   If timeout is zero of nil, the command will not time out. If the command
    #   times out, it is killed via a SIGKILL signal and `Git::TimeoutError` is raised.
    #
    #   If the command does not respond to SIGKILL, it will hang this method.
    #
    # @raise [Git::ProcessIOError] if an exception was raised while collecting subprocess output
    # @raise [Git::TimeoutError] if the command times out
    #
    # @return [ProcessExecuter::Status] the status of the completed subprocess
    #
    # @api private
    #
    def spawn(cmd, out_writers, err_writers, chdir:, timeout:)
      out_pipe = ProcessExecuter::MonitoredPipe.new(*out_writers, chunk_size: 10_000)
      err_pipe = ProcessExecuter::MonitoredPipe.new(*err_writers, chunk_size: 10_000)
      ProcessExecuter.spawn(env, *cmd, out: out_pipe, err: err_pipe, chdir: chdir, timeout: timeout)
    ensure
      out_pipe.close
      err_pipe.close
      raise_pipe_error(cmd, :stdout, out_pipe) if out_pipe.exception
      raise_pipe_error(cmd, :stderr, err_pipe) if err_pipe.exception
    end

    # The writers that will be used to collect stdout and stderr
    #
    # Additional writers could be added here if you wanted to tee output
    # or send output to the terminal.
    #
    # @param out [#write] the object to write stdout to
    # @param err [#write] the object to write stderr to
    #
    # @return [Array<Array<#write>, Array<#write>>] the writers for stdout and stderr
    #
    # @api private
    #
    def writers(out, err)
      out_writers = [out]
      err_writers = [err]
      [out_writers, err_writers]
    end

    # Process the result of the command and return a Git::CommandLineResult
    #
    # Post process output, log the command and result, and raise an error if the
    # command failed.
    #
    # @param git_cmd [Array<String>] the git command that was executed
    # @param status [Process::Status] the status of the completed subprocess
    # @param out [#write] the object that stdout was written to
    # @param err [#write] the object that stderr was written to
    # @param normalize [Boolean] whether to normalize the output of each writer
    # @param chomp [Boolean] whether to chomp the output of each writer
    # @param timeout [Numeric, nil] the maximum seconds to wait for the command to complete
    #
    # @return [Git::CommandLineResult] the result of the command to return to the caller
    #
    # @raise [Git::FailedError] if the command failed
    # @raise [Git::SignaledError] if the command was signaled
    # @raise [Git::TimeoutError] if the command times out
    # @raise [Git::ProcessIOError] if an exception was raised while collecting subprocess output
    #
    # @api private
    #
    def process_result(git_cmd, status, out, err, normalize, chomp, timeout)
      out_str, err_str = post_process_all([out, err], normalize, chomp)
      logger.info { "#{git_cmd} exited with status #{status}" }
      logger.debug { "stdout:\n#{out_str.inspect}\nstderr:\n#{err_str.inspect}" }
      Git::CommandLineResult.new(git_cmd, status, out_str, err_str).tap do |result|
        raise Git::TimeoutError.new(result, timeout) if status.timeout?
        raise Git::SignaledError.new(result) if status.signaled?
        raise Git::FailedError.new(result) unless status.success?
      end
    end

    # Execute the git command and write the command output to out and err
    #
    # @param git_cmd [Array<String>] the git command to execute
    # @param out [#write] the object to write stdout to
    # @param err [#write] the object to write stderr to
    # @param chdir [String] the directory to run the command in
    # @param timeout [Numeric, nil] the maximum seconds to wait for the command to complete
    #
    #   If timeout is zero of nil, the command will not time out. If the command
    #   times out, it is killed via a SIGKILL signal and `Git::TimeoutError` is raised.
    #
    #   If the command does not respond to SIGKILL, it will hang this method.
    #
    # @raise [Git::ProcessIOError] if an exception was raised while collecting subprocess output
    # @raise [Git::TimeoutError] if the command times out
    #
    # @return [Git::CommandLineResult] the result of the command to return to the caller
    #
    # @api private
    #
    def execute(git_cmd, out, err, chdir:, timeout:)
      out_writers, err_writers = writers(out, err)
      spawn(git_cmd, out_writers, err_writers, chdir: chdir, timeout: timeout)
    end
  end
end