gitlabhq/grit

View on GitHub
lib/grit/git-ruby/repository.rb

Summary

Maintainability
F
1 wk
Test Coverage
#
# converted from the gitrb project
#
# authors:
#    Matthias Lederhofer <matled@gmx.net>
#    Simon 'corecode' Schubert <corecode@fs.ei.tum.de>
#    Scott Chacon <schacon@gmail.com>
#
# provides native ruby access to git objects and pack files
#
require 'grit/git-ruby/internal/raw_object'
require 'grit/git-ruby/internal/pack'
require 'grit/git-ruby/internal/loose'
require 'grit/git-ruby/git_object'

require 'rubygems'
require 'diff/lcs'
require 'diff/lcs/hunk'

# have to do this so it doesn't interfere with Grit::Diff
module Difference
  include Diff
end

module Grit
  module GitRuby
    class Repository

      class NoSuchShaFound < StandardError
      end

      class NoSuchPath < StandardError
      end

      attr_accessor :git_dir, :options

      def initialize(git_dir, options = {})
        @git_dir = git_dir
        @options = options
        @packs = []
      end

      # returns the loose objects object lazily
      def loose
        @loose ||= initloose
      end

      # returns the array of pack list objects
      def packs
        @packs ||= initpacks
      end


      # prints out the type, shas and content of all of the pack files
      def show
        packs.each do |p|
          puts p.name
          puts
          p.each_sha1 do |s|
            puts "**#{p[s].type}**"
            if p[s].type.to_s == 'commit'
              puts s.unpack('H*')
              puts p[s].content
            end
          end
          puts
        end
      end


      # returns a raw object given a SHA1
      def get_raw_object_by_sha1(sha1o)
        raise NoSuchShaFound if sha1o.nil? || sha1o.empty? || !sha1o.is_a?(String)

        sha1 = [sha1o.chomp].pack("H*")
        # try packs
        packs.each do |pack|
          o = pack[sha1]
          return pack[sha1] if o
        end

        # try loose storage
        loose.each do |lsobj|
          o = lsobj[sha1]
          return o if o
        end

        # try packs again, maybe the object got packed in the meantime
        initpacks
        packs.each do |pack|
          o = pack[sha1]
          return o if o
        end

#        puts "*#{sha1o}*"
        raise NoSuchShaFound
      end

      def cached(key, object, do_cache = true)
        object
      end

      # returns GitRuby object of any type given a SHA1
      def get_object_by_sha1(sha1)
        r = get_raw_object_by_sha1(sha1)
        return nil if !r
        GitObject.from_raw(r)
      end

      # writes a raw object into the git repo
      def put_raw_object(content, type)
        loose.first.put_raw_object(content, type)
      end

      # returns true or false if that sha exists in the db
      def object_exists?(sha1)
        sha_hex = [sha1].pack("H*")
        return true if in_packs?(sha_hex)
        return true if in_loose?(sha_hex)
        initpacks
        return true if in_packs?(sha_hex) #maybe the object got packed in the meantime
        false
      end

      # returns true if the hex-packed sha is in the packfiles
      def in_packs?(sha_hex)
        # try packs
        packs.each do |pack|
          return true if pack[sha_hex]
        end
        false
      end

      # returns true if the hex-packed sha is in the loose objects
      def in_loose?(sha_hex)
        loose.each do |lsobj|
          return true if lsobj[sha_hex]
        end
        false
      end


      # returns the file type (as a symbol) of this sha
      def cat_file_type(sha)
        get_raw_object_by_sha1(sha).type
      end

      # returns the file size (as an int) of this sha
      def cat_file_size(sha)
        get_raw_object_by_sha1(sha).content.size
      end

      # returns the raw file contents of this sha
      def cat_file(sha)
        get_object_by_sha1(sha).raw_content
      end

      # returns a 2-d hash of the tree
      # ['blob']['FILENAME'] = {:mode => '100644', :sha => SHA}
      # ['tree']['DIRNAME'] = {:mode => '040000', :sha => SHA}
      def list_tree(sha)
        data = {'blob' => {}, 'tree' => {}, 'link' => {}, 'commit' => {}}
        get_object_by_sha1(sha).entry.each do |e|
          data[e.format_type][e.name] = {:mode => e.format_mode, :sha => e.sha1}
        end
        data
      end

      # returns the raw (cat-file) output for a tree
      # if given a commit sha, it will print the tree of that commit
      # if given a path limiter array, it will limit the output to those
      # if asked for recrusive trees, will traverse trees
      def ls_tree(sha, paths = [], recursive = false)
        if paths.size > 0
          # pathing
          part = []
          paths.each do |path|
            part += ls_tree_path(sha, path)
          end
          return part.join("\n")
        else
          get_raw_tree(sha, recursive)
        end
      end

      def get_raw_tree(sha, recursive = false)
        o = get_raw_object_by_sha1(sha)
        if o.type == :commit
          tree = get_object_by_sha1(sha).tree
        elsif o.type == :tag
          commit_sha = get_object_by_sha1(sha).object
          tree = get_object_by_sha1(commit_sha).tree
        elsif o.type == :tree
          tree = sha
        else
          return nil
        end

        recursive ? get_raw_trees(tree) : cat_file(tree)
      end

      # Grabs tree contents recursively,
      #   e.g. `git ls-tree -r sha`
      def get_raw_trees(sha, path = '')
        out = ''
        cat_file(sha).split("\n").each do |line|
          mode, type, sha, name = line.split(/\s/)

          if type == 'tree'
            full_name = path.empty? ? name : "#{path}/#{name}"
            out << get_raw_trees(sha, full_name)
          elsif path.empty?
            out << line + "\n"
          else
            out << line.gsub(name, "#{path}/#{name}") + "\n"
          end
        end

        out
      end

      # return array of tree entries
      ## TODO : refactor this to remove the fugly
      def ls_tree_path(sha, path, append = nil)
        tree = get_raw_tree(sha)
        if path =~ /\//
          paths = path.split('/')
          last = path[path.size - 1, 1]
          if (last == '/') && (paths.size == 1)
            append = append ? File.join(append, paths.first) : paths.first
            dir_name = tree.split("\n").select { |p| p.split("\t")[1] == paths.first }.first
            raise NoSuchPath if !dir_name
            next_sha = dir_name.split(' ')[2]
            tree = get_raw_tree(next_sha)
            tree = tree.split("\n")
            if append
              mod_tree = []
              tree.each do |ent|
                (info, fpath) = ent.split("\t")
                mod_tree << [info, File.join(append, fpath)].join("\t")
              end
              mod_tree
            else
              tree
            end
          else
            raise NoSuchPath if tree.nil?
            next_path = paths.shift
            dir_name = tree.split("\n").select { |p| p.split("\t")[1] == next_path }.first
            raise NoSuchPath if !dir_name
            next_sha = dir_name.split(' ')[2]
            next_path = append ? File.join(append, next_path) : next_path
            if (last == '/')
              ls_tree_path(next_sha, paths.join("/") + '/', next_path)
            else
              ls_tree_path(next_sha, paths.join("/"), next_path)
            end
          end
        else
          raise NoSuchPath if tree.nil?
          tree = tree.split("\n")
          tree = tree.select { |p| p.split("\t")[1] == path }
          if append
            mod_tree = []
            tree.each do |ent|
              (info, fpath) = ent.split("\t")
              mod_tree << [info, File.join(append, fpath)].join("\t")
            end
            mod_tree
          else
            tree
          end
        end
      end

      # returns an array of GitRuby Commit objects
      # [ [sha, raw_output], [sha, raw_output], [sha, raw_output] ... ]
      #
      # takes the following options:
      #  :since - Time object specifying that you don't want commits BEFORE this
      #  :until - Time object specifying that you don't want commit AFTER this
      #  :first_parent - tells log to only walk first parent
      #  :path_limiter - string or array of strings to limit path
      #  :max_count - number to limit the output
      def log(sha, options = {})
        @already_searched = {}
        walk_log(sha, options)
      end

      def truncate_arr(arr, sha)
        new_arr = []
        arr.each do |a|
          if a[0] == sha
            return new_arr
          end
          new_arr << a
        end
        return new_arr
      end

      def rev_list(sha, options)
        if sha.is_a? Array
          (end_sha, sha) = sha
        end

        log = log(sha, options)
        log = log.sort { |a, b| a[2] <=> b[2] }.reverse

        if end_sha
          log = truncate_arr(log, end_sha)
        end

        # shorten the list if it's longer than max_count (had to get everything in branches)
        if options[:max_count]
          if (opt_len = options[:max_count].to_i) < log.size
            log = log[0, opt_len]
          end
        end

        if options[:pretty] == 'raw'
          log.map {|k, v| v }.join('')
        else
          log.map {|k, v| k }.join("\n")
        end
      end

      # called by log() to recursively walk the tree
      def walk_log(sha, opts, total_size = 0)
        return [] if @already_searched[sha] # to prevent rechecking branches
        @already_searched[sha] = true

        array = []
        if (sha)
          o = get_raw_object_by_sha1(sha)
          if o.type == :tag
            commit_sha = get_object_by_sha1(sha).object
            c = get_object_by_sha1(commit_sha)
          else
            c = GitObject.from_raw(o)
          end

          return [] if c.type != :commit

          add_sha = true

          if opts[:since] && opts[:since].is_a?(Time) && (opts[:since] > c.committer.date)
            add_sha = false
          end
          if opts[:until] && opts[:until].is_a?(Time) && (opts[:until] < c.committer.date)
            add_sha = false
          end

          # follow all parents unless '--first-parent' is specified #
          subarray = []

          if !c.parent.first && opts[:path_limiter]  # check for the last commit
            add_sha = false
          end

          if (!opts[:max_count] || ((array.size + total_size) < opts[:max_count]))

            if !opts[:path_limiter]
              output = c.raw_log(sha)
              array << [sha, output, c.committer.date]
            end

            if (opts[:max_count] && (array.size + total_size) >= opts[:max_count])
              return array
            end

            c.parent.each do |psha|
              if psha && !files_changed?(c.tree, get_object_by_sha1(psha).tree,
                                        opts[:path_limiter])
                add_sha = false
              end
              subarray += walk_log(psha, opts, (array.size + total_size))
              next if opts[:first_parent]
            end

            if opts[:path_limiter] && add_sha
              output = c.raw_log(sha)
              array << [sha, output, c.committer.date]
            end

            if add_sha
              array += subarray
            end
          end

        end

        array
      end

      def diff(commit1, commit2, options = {})
        patch = ''

        commit_obj1 = get_object_by_sha1(commit1)
        tree1 = commit_obj1.tree
        if commit2
          tree2 = get_object_by_sha1(commit2).tree
        else
          tree2 = get_object_by_sha1(commit_obj1.parent.first).tree
        end

        qdiff = quick_diff(tree1, tree2)

        qdiff.sort.each do |diff_arr|
          path, status, treeSHA1, treeSHA2 = *diff_arr
          format, lines, output = :unified, 3, ''
          file_length_difference = 0

          fileA = treeSHA1 ? cat_file(treeSHA1) : ''
          fileB = treeSHA2 ? cat_file(treeSHA2) : ''

          sha1 = treeSHA1 || '0000000000000000000000000000000000000000'
          sha2 = treeSHA2 || '0000000000000000000000000000000000000000'

          data_old = fileA.split(/\n/).map! { |e| e.chomp }
          data_new = fileB.split(/\n/).map! { |e| e.chomp }

          diffs = Difference::LCS.diff(data_old, data_new)
          next if diffs.empty?

          a_path = "a/#{path.gsub('./', '')}"
          b_path = "b/#{path.gsub('./', '')}"

          header = "diff --git #{a_path} #{b_path}"
          if options[:full_index]
            header << "\n" + 'index ' + sha1 + '..' + sha2
            header << ' 100644' if treeSHA2 # hard coding this because i don't think we use it
          else
            header << "\n" + 'index ' + sha1[0,7] + '..' + sha2[0,7]
            header << ' 100644' if treeSHA2 # hard coding this because i don't think we use it
          end
          header << "\n--- " + (treeSHA1 ? a_path : '/dev/null')
          header << "\n+++ " + (treeSHA2 ? b_path : '/dev/null')
          header += "\n"

          oldhunk = hunk = nil

          diffs.each do |piece|
            begin
              hunk = Difference::LCS::Hunk.new(data_old, data_new, piece, lines, file_length_difference)
              file_length_difference = hunk.file_length_difference

              next unless oldhunk

              if lines > 0 && hunk.overlaps?(oldhunk)
                hunk.unshift(oldhunk)
              else
                output << oldhunk.diff(format)
              end
            ensure
              oldhunk = hunk
              output << "\n"
            end
          end

          output << oldhunk.diff(format)
          output << "\n"

          patch << header + output.lstrip
        end
        patch
      rescue
        '' # one of the trees was bad or lcs isn't there - no diff
      end

      def quick_what_changed(t1, t2, path, type)
        changed = []

        t1[type].each do |file, hsh|
          t2_file = t2[type][file] rescue nil
          full = File.join(path, file)
          if !t2_file
            changed << [full, 'added', hsh[:sha], nil]      # not in parent
          elsif (hsh[:sha] != t2_file[:sha])
            changed << [full, 'modified', hsh[:sha], t2_file[:sha]]   # file changed
          end
        end if t1

        t2[type].each do |file, hsh|
          if !t1 || !t1[type][file]
            changed << [File.join(path, file), 'removed', nil, hsh[:sha]]
          end
        end if t2

        changed
      end

      # takes 2 tree shas and recursively walks them to find out what
      # files or directories have been modified in them and returns an
      # array of changes
      # [ [full_path, 'added', tree1_hash, nil],
      #   [full_path, 'removed', nil, tree2_hash],
      #   [full_path, 'modified', tree1_hash, tree2_hash]
      #  ]
      def quick_diff(tree1, tree2, path = '.', recurse = true)
        # handle empty trees
        return changed if tree1 == tree2

        t1 = list_tree(tree1) if tree1
        t2 = list_tree(tree2) if tree2

        # finding files that are different
        changed = quick_what_changed(t1, t2, path, 'blob') +
                  quick_what_changed(t1, t2, path, 'link')

        t1['tree'].each do |dir, hsh|
          t2_tree = t2['tree'][dir] rescue nil
          full = File.join(path, dir)
          if !t2_tree
            if recurse
              changed += quick_diff(hsh[:sha], nil, full, true)
            else
              changed << [full, 'added', hsh[:sha], nil]      # not in parent
            end
          elsif (hsh[:sha] != t2_tree[:sha])
            if recurse
              changed += quick_diff(hsh[:sha], t2_tree[:sha], full, true)
            else
              changed << [full, 'modified', hsh[:sha], t2_tree[:sha]]   # file changed
            end
          end
        end if t1
        t2['tree'].each do |dir, hsh|
          t1_tree = t1['tree'][dir] rescue nil
          full = File.join(path, dir)
          if !t1_tree
            if recurse
              changed += quick_diff(nil, hsh[:sha], full, true)
            else
              changed << [full, 'removed', nil, hsh[:sha]]
            end
          end
        end if t2

        changed
      end

      # returns true if the files in path_limiter were changed, or no path limiter
      # used by the log() function when passed with a path_limiter
      def files_changed?(tree_sha1, tree_sha2, path_limiter = nil)
        if path_limiter
          mod = quick_diff(tree_sha1, tree_sha2)
          files = mod.map { |c| c.first }
          path_limiter.to_a.each do |filepath|
            if files.include?(filepath)
              return true
            end
          end
          return false
        end
        true
      end

      def get_subtree(commit_sha, path)
        tree_sha = get_object_by_sha1(commit_sha).tree

        if path && !(path == '' || path == '.' || path == './')
          paths = path.split('/')
          paths.each do |pathname|
            tree = get_object_by_sha1(tree_sha)
            if entry = tree.entry.select { |e| e.name == pathname }.first
              tree_sha = entry.sha1 rescue nil
            else
              return false
            end
          end
        end

        tree_sha
      end

      def blame_tree(commit_sha, path)
        # find subtree
        tree_sha = get_subtree(commit_sha, path)
        return {} if !tree_sha

        looking_for = []
        get_object_by_sha1(tree_sha).entry.each do |e|
          looking_for << File.join('.', e.name)
        end

        @already_searched = {}
        commits = look_for_commits(commit_sha, path, looking_for)

        # cleaning up array
        arr = {}
        commits.each do |commit_array|
          key = commit_array[0].gsub('./', '')
          arr[key] = commit_array[1]
        end
        arr
      end

      def look_for_commits(commit_sha, path, looking_for, options = {})
        return [] if @already_searched[commit_sha] # to prevent rechecking branches

        @already_searched[commit_sha] = true

        commit = get_object_by_sha1(commit_sha)
        tree_sha = get_subtree(commit_sha, path)

        found_data = []

        # at the beginning of the branch
        if commit.parent.size == 0
          looking_for.each do |search|
            # prevents the rare case of multiple branch starting points with
            # files that have never changed
            if found_data.assoc(search)
              found_data << [search, commit_sha]
            end
          end
          return found_data
        end

        # go through the parents recursively, looking for somewhere this has been changed
        commit.parent.each do |pc|
          diff = quick_diff(tree_sha, get_subtree(pc, path), '.', false)

          # remove anything found
          looking_for.each do |search|
            if match = diff.assoc(search)
              found_data << [search, commit_sha, match]
              looking_for.delete(search)
            end
          end

          if looking_for.size <= 0  # we're done
            return found_data
          end

          found_data += look_for_commits(pc, path, looking_for)  # recurse into parent
          return found_data if options[:first_parent]
        end

        ## TODO : find most recent commit with change in any parent
        found_data
      end

      # initialize a git repository
      def self.init(dir, bare = true)

        FileUtils.mkdir_p(dir) if !File.exists?(dir)

        FileUtils.cd(dir) do
          if(File.exists?('objects'))
            return false # already initialized
          else
            # initialize directory
            create_initial_config(bare)
            FileUtils.mkdir_p('refs/heads')
            FileUtils.mkdir_p('refs/tags')
            FileUtils.mkdir_p('objects/info')
            FileUtils.mkdir_p('objects/pack')
            FileUtils.mkdir_p('branches')
            add_file('description', 'Unnamed repository; edit this file to name it for gitweb.')
            add_file('HEAD', "ref: refs/heads/master\n")
            FileUtils.mkdir_p('hooks')
            FileUtils.cd('hooks') do
              add_file('applypatch-msg', '# add shell script and make executable to enable')
              add_file('post-commit', '# add shell script and make executable to enable')
              add_file('post-receive', '# add shell script and make executable to enable')
              add_file('post-update', '# add shell script and make executable to enable')
              add_file('pre-applypatch', '# add shell script and make executable to enable')
              add_file('pre-commit', '# add shell script and make executable to enable')
              add_file('pre-rebase', '# add shell script and make executable to enable')
              add_file('update', '# add shell script and make executable to enable')
            end
            FileUtils.mkdir_p('info')
            add_file('info/exclude', "# *.[oa]\n# *~")
          end
        end
      end

      def self.create_initial_config(bare = false)
        bare ? bare_status = 'true' : bare_status = 'false'
        config = "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = #{bare_status}\n\tlogallrefupdates = true"
        add_file('config', config)
      end

      def self.add_file(name, contents)
        path = File.join(Dir.pwd, name)
        raise "Invalid path: #{path}" unless File.absolute_path(path) == path
        File.open(path, 'w') do |f|
          f.write contents
        end
      end

      def close
        @packs.each do |pack|
          pack.close
        end if @packs
      end

      protected

        def git_path(path)
          return "#@git_dir/#{path}"
        end

      private

        def initloose
          @loaded = []
          @loose = []
          load_loose(git_path('objects'))
          load_alternate_loose(git_path('objects'))
          @loose
        end

        def each_alternate_path(path)
          alt = File.join(path, 'info/alternates')
          return if !File.exists?(alt)

          File.readlines(alt).each do |line|
            path = line.chomp
            if path[0, 2] == '..'
              yield File.expand_path(File.join(@git_dir, 'objects', path))

              # XXX this is here for backward compatibility with grit < 2.3.0
              # relative alternate objects paths are expanded relative to the
              # objects directory, not the git repository directory.
              yield File.expand_path(File.join(@git_dir, path))
            else
              yield path
            end
          end
        end

        def load_alternate_loose(pathname)
          # load alternate loose, too
          each_alternate_path pathname do |path|
            next if @loaded.include?(path)
            next if !File.exist?(path)
            load_loose(path)
            load_alternate_loose(path)
          end
        end

        def load_loose(path)
          @loaded << path
          return if !File.exists?(path)
          @loose << Grit::GitRuby::Internal::LooseStorage.new(path)
        end

        def initpacks
          close
          @loaded_packs = []
          @packs = []
          load_packs(git_path("objects/pack"))
          load_alternate_packs(git_path('objects'))
          @packs
        end

        def load_alternate_packs(pathname)
          each_alternate_path pathname do |path|
            full_pack = File.join(path, 'pack')
            next if @loaded_packs.include?(full_pack)
            load_packs(full_pack)
            load_alternate_packs(path)
          end
        end

        def load_packs(path)
          @loaded_packs << path
          return if !File.exists?(path)
           Dir.open(path) do |dir|
            dir.each do |entry|
              next if !(entry =~ /\.pack$/i)
              pack = Grit::GitRuby::Internal::PackStorage.new(File.join(path,entry))
              if @options[:map_packfile]
                pack.cache_objects
              end
              @packs << pack
            end
          end
        end

    end

  end
end