Last active
March 20, 2017 15:37
-
-
Save xai/c25a2130000a23331a7643d8f01f436d to your computer and use it in GitHub Desktop.
Identify changes using libgit2 combined with google's diff approach
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# vim:fenc=utf-8 | |
# | |
# Copyright © 2017 Olaf Lessenich <xai@linux.com> | |
# | |
# Distributed under terms of the MIT license. | |
import sys | |
from pygit2 import Repository | |
import diff_match_patch as dmp_module | |
debug = False | |
dmp = dmp_module.diff_match_patch() | |
def diff(repo, commit1, commit2): | |
""" | |
setting context_lines=0 ist important here | |
""" | |
return repo.diff(commit1, commit2, context_lines=0) | |
def word_diff(a, b): | |
""" | |
using google approach | |
""" | |
return dmp.diff_main(a, b) | |
def print_changes(change): | |
""" | |
prints changes in a line and resets counters | |
""" | |
if change['chars'] > 0: | |
print('%s \'%s\' line %d cols %d-%d' % | |
(change['label'], | |
change['file'], | |
change['line'], | |
change['col'], | |
change['col'] + change['chars'])) | |
change['line'] += 1 | |
change['col'] = 1 | |
change['chars'] = 0 | |
def main(): | |
repo = Repository(sys.argv[1]) | |
DIFF_DELETED = -1 | |
DIFF_ADDED = 1 | |
DIFF_EQUAL = 0 | |
""" | |
diff something. just latest commit here for demonstration | |
""" | |
patches = diff(repo, 'HEAD~1', 'HEAD') | |
for p in patches: | |
""" | |
iterate over patches in diff | |
""" | |
for h in p.hunks: | |
""" | |
iterate over hunks in patch | |
""" | |
changes = dict() | |
changes[DIFF_DELETED] = dict() | |
changes[DIFF_ADDED] = dict() | |
changes[DIFF_DELETED]['file'] = p.delta.old_file.path | |
changes[DIFF_ADDED]['file'] = p.delta.new_file.path | |
changes[DIFF_DELETED]['label'] = '-' | |
changes[DIFF_ADDED]['label'] = '+' | |
changes[DIFF_DELETED]['start'] = h.old_start | |
changes[DIFF_ADDED]['start'] = h.new_start | |
""" | |
just used for debugging and (so far manual) sanity checking | |
""" | |
changes[DIFF_DELETED]['end'] = h.old_start + h.old_lines - 1 | |
changes[DIFF_ADDED]['end'] = h.new_start + h.new_lines - 1 | |
if debug and h.old_lines > 0: | |
print('old version has changes in lines %d to %d' % | |
(changes[DIFF_DELETED]['start'], | |
changes[DIFF_DELETED]['end'])) | |
if debug and h.new_lines > 0: | |
print('new version has changes in lines %d to %d' % | |
(changes[DIFF_ADDED]['start'], | |
changes[DIFF_ADDED]['end'])) | |
""" | |
partition lines of the hunk into deleted and added lines | |
""" | |
changes[DIFF_DELETED]['lines'] = [] | |
changes[DIFF_ADDED]['lines'] = [] | |
for line in h.lines: | |
if debug: | |
sys.stdout.write('%s %s' % (line.origin, line.content)) | |
if line.origin == '-': | |
changes[DIFF_DELETED]['lines'].append(line.content) | |
elif line.origin == '+': | |
changes[DIFF_ADDED]['lines'].append(line.content) | |
else: | |
""" | |
most likely line.origin is '<' | |
with line.content '\ No newline at end of file' | |
""" | |
# | |
# print("LINE ORIGIN: %s" % line.origin) | |
# print("LINE CONTENT: %s" % line.content) | |
pass | |
""" | |
running the google approach on the competing sides of the hunk | |
""" | |
wdiff = word_diff(''.join(changes[DIFF_DELETED]['lines']), | |
''.join(changes[DIFF_ADDED]['lines'])) | |
""" | |
calculate line and column numbers of changed parts | |
""" | |
for version in DIFF_DELETED, DIFF_ADDED: | |
# first column of a change in a line | |
changes[version]['col'] = 1 | |
changes[version]['line'] = changes[DIFF_DELETED]['start'] | |
# changed chars in a line | |
changes[version]['chars'] = 0 | |
# common chars after a change but before a newline | |
# i.e., if another change appears in this line, | |
# we need to add these pending ones to the range | |
changes[version]['pending'] = 0 | |
for d in wdiff: | |
if debug: | |
print(d) | |
for char in d[1]: | |
if char == '\n': | |
if d[0] == DIFF_EQUAL: | |
for version in DIFF_DELETED, DIFF_ADDED: | |
print_changes(changes[version]) | |
else: | |
changes[d[0]]['chars'] += 1 | |
print_changes(changes[d[0]]) | |
elif d[0] == DIFF_EQUAL: | |
for version in DIFF_DELETED, DIFF_ADDED: | |
if changes[version]['chars'] == 0: | |
changes[version]['col'] += 1 | |
else: | |
changes[version]['pending'] += 1 | |
else: | |
if changes[d[0]]['pending'] > 0: | |
changes[d[0]]['chars'] += changes[d[0]]['pending'] | |
changes[d[0]]['pending'] = 0 | |
changes[d[0]]['chars'] += 1 | |
for version in DIFF_DELETED, DIFF_ADDED: | |
print_changes(changes[version]) | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
sys.stderr.write('Usage: %s /path/to/repo\n\n' % sys.argv[0]) | |
sys.stderr.write('Exiting.\n') | |
sys.exit(1) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment