Skip to content

Instantly share code, notes, and snippets.

@xai
Last active Mar 20, 2017
Embed
What would you like to do?
Identify changes using libgit2 combined with google's diff approach
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2017 Olaf Lessenich <xai@linux.com>
#
# Distributed under terms of the MIT license.
import sys
from pygit2 import Repository
import diff_match_patch as dmp_module
debug = False
dmp = dmp_module.diff_match_patch()
def diff(repo, commit1, commit2):
"""
setting context_lines=0 ist important here
"""
return repo.diff(commit1, commit2, context_lines=0)
def word_diff(a, b):
"""
using google approach
"""
return dmp.diff_main(a, b)
def print_changes(change):
"""
prints changes in a line and resets counters
"""
if change['chars'] > 0:
print('%s \'%s\' line %d cols %d-%d' %
(change['label'],
change['file'],
change['line'],
change['col'],
change['col'] + change['chars']))
change['line'] += 1
change['col'] = 1
change['chars'] = 0
def main():
repo = Repository(sys.argv[1])
DIFF_DELETED = -1
DIFF_ADDED = 1
DIFF_EQUAL = 0
"""
diff something. just latest commit here for demonstration
"""
patches = diff(repo, 'HEAD~1', 'HEAD')
for p in patches:
"""
iterate over patches in diff
"""
for h in p.hunks:
"""
iterate over hunks in patch
"""
changes = dict()
changes[DIFF_DELETED] = dict()
changes[DIFF_ADDED] = dict()
changes[DIFF_DELETED]['file'] = p.delta.old_file.path
changes[DIFF_ADDED]['file'] = p.delta.new_file.path
changes[DIFF_DELETED]['label'] = '-'
changes[DIFF_ADDED]['label'] = '+'
changes[DIFF_DELETED]['start'] = h.old_start
changes[DIFF_ADDED]['start'] = h.new_start
"""
just used for debugging and (so far manual) sanity checking
"""
changes[DIFF_DELETED]['end'] = h.old_start + h.old_lines - 1
changes[DIFF_ADDED]['end'] = h.new_start + h.new_lines - 1
if debug and h.old_lines > 0:
print('old version has changes in lines %d to %d' %
(changes[DIFF_DELETED]['start'],
changes[DIFF_DELETED]['end']))
if debug and h.new_lines > 0:
print('new version has changes in lines %d to %d' %
(changes[DIFF_ADDED]['start'],
changes[DIFF_ADDED]['end']))
"""
partition lines of the hunk into deleted and added lines
"""
changes[DIFF_DELETED]['lines'] = []
changes[DIFF_ADDED]['lines'] = []
for line in h.lines:
if debug:
sys.stdout.write('%s %s' % (line.origin, line.content))
if line.origin == '-':
changes[DIFF_DELETED]['lines'].append(line.content)
elif line.origin == '+':
changes[DIFF_ADDED]['lines'].append(line.content)
else:
"""
most likely line.origin is '<'
with line.content '\ No newline at end of file'
"""
#
# print("LINE ORIGIN: %s" % line.origin)
# print("LINE CONTENT: %s" % line.content)
pass
"""
running the google approach on the competing sides of the hunk
"""
wdiff = word_diff(''.join(changes[DIFF_DELETED]['lines']),
''.join(changes[DIFF_ADDED]['lines']))
"""
calculate line and column numbers of changed parts
"""
for version in DIFF_DELETED, DIFF_ADDED:
# first column of a change in a line
changes[version]['col'] = 1
changes[version]['line'] = changes[DIFF_DELETED]['start']
# changed chars in a line
changes[version]['chars'] = 0
# common chars after a change but before a newline
# i.e., if another change appears in this line,
# we need to add these pending ones to the range
changes[version]['pending'] = 0
for d in wdiff:
if debug:
print(d)
for char in d[1]:
if char == '\n':
if d[0] == DIFF_EQUAL:
for version in DIFF_DELETED, DIFF_ADDED:
print_changes(changes[version])
else:
changes[d[0]]['chars'] += 1
print_changes(changes[d[0]])
elif d[0] == DIFF_EQUAL:
for version in DIFF_DELETED, DIFF_ADDED:
if changes[version]['chars'] == 0:
changes[version]['col'] += 1
else:
changes[version]['pending'] += 1
else:
if changes[d[0]]['pending'] > 0:
changes[d[0]]['chars'] += changes[d[0]]['pending']
changes[d[0]]['pending'] = 0
changes[d[0]]['chars'] += 1
for version in DIFF_DELETED, DIFF_ADDED:
print_changes(changes[version])
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.stderr.write('Usage: %s /path/to/repo\n\n' % sys.argv[0])
sys.stderr.write('Exiting.\n')
sys.exit(1)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment