git_deps/blame.py
import subprocess
import re
from dataclasses import dataclass
# The following classes are introduced to imitate their counterparts in pygit2,
# so that the output of 'blame_via_subprocess' can be swapped with pygit2's own
# blame output.
@dataclass
class GitRef:
"""
A reference to a commit
"""
hex: str
@dataclass
class BlameHunk:
"""
A chunk of a blame output which has the same commit information
for a consecutive set of lines
"""
orig_commit_id: GitRef
orig_start_line_number: int
final_start_line_number: int
lines_in_hunk: int = 1
def blame_via_subprocess(path, commit, start_line, num_lines):
"""
Generate a list of blame hunks by calling 'git blame' as a separate process.
This is a workaround for the slowness of pygit2's own blame algorithm.
See https://github.com/aspiers/git-deps/issues/1
"""
cmd = [
'git', 'blame',
'--porcelain',
'-L', "%d,+%d" % (start_line, num_lines),
commit, '--', path
]
output = subprocess.check_output(cmd, universal_newlines=True)
current_hunk = None
for line in output.split('\n'):
m = re.match(r'^([0-9a-f]{40}) (\d+) (\d+) (\d+)$', line)
if m: # starting a new hunk
if current_hunk:
yield current_hunk
dependency_sha1, orig_line_num, line_num, length = m.group(1, 2, 3, 4)
orig_line_num = int(orig_line_num)
line_num = int(line_num)
length = int(length)
current_hunk = BlameHunk(
orig_commit_id=GitRef(dependency_sha1),
orig_start_line_number = orig_line_num,
final_start_line_number = line_num,
lines_in_hunk = length
)
if current_hunk:
yield current_hunk