xai/diff.py

## diff.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2017 Olaf Lessenich <xai@linux.com>
#
# Distributed under terms of the MIT license.

import sys
from pygit2 import Repository
import diff_match_patch as dmp_module

debug = False

dmp = dmp_module.diff_match_patch()


def diff(repo, commit1, commit2):
    """
    setting context_lines=0 ist important here
    """
    return repo.diff(commit1, commit2, context_lines=0)


def word_diff(a, b):
    """
    using google approach
    """
    return dmp.diff_main(a, b)


def print_changes(change):
    """
    prints changes in a line and resets counters
    """
    if change['chars'] > 0:
        print('%s \'%s\' line %d cols %d-%d' %
              (change['label'],
               change['file'],
               change['line'],
               change['col'],
               change['col'] + change['chars']))
        change['line'] += 1
        change['col'] = 1
        change['chars'] = 0


def main():
    repo = Repository(sys.argv[1])
    DIFF_DELETED = -1
    DIFF_ADDED = 1
    DIFF_EQUAL = 0

    """
    diff something. just latest commit here for demonstration
    """
    patches = diff(repo, 'HEAD~1', 'HEAD')

    for p in patches:
        """
        iterate over patches in diff
        """
        for h in p.hunks:
            """
            iterate over hunks in patch
            """
            changes = dict()
            changes[DIFF_DELETED] = dict()
            changes[DIFF_ADDED] = dict()

            changes[DIFF_DELETED]['file'] = p.delta.old_file.path
            changes[DIFF_ADDED]['file'] = p.delta.new_file.path

            changes[DIFF_DELETED]['label'] = '-'
            changes[DIFF_ADDED]['label'] = '+'

            changes[DIFF_DELETED]['start'] = h.old_start
            changes[DIFF_ADDED]['start'] = h.new_start

            """
            just used for debugging and (so far manual) sanity checking
            """
            changes[DIFF_DELETED]['end'] = h.old_start + h.old_lines - 1
            changes[DIFF_ADDED]['end'] = h.new_start + h.new_lines - 1

            if debug and h.old_lines > 0:
                print('old version has changes in lines %d to %d' %
                      (changes[DIFF_DELETED]['start'],
                       changes[DIFF_DELETED]['end']))
            if debug and h.new_lines > 0:
                print('new version has changes in lines %d to %d' %
                      (changes[DIFF_ADDED]['start'],
                       changes[DIFF_ADDED]['end']))

            """
            partition lines of the hunk into deleted and added lines
            """
            changes[DIFF_DELETED]['lines'] = []
            changes[DIFF_ADDED]['lines'] = []

            for line in h.lines:
                if debug:
                    sys.stdout.write('%s %s' % (line.origin, line.content))

                if line.origin == '-':
                    changes[DIFF_DELETED]['lines'].append(line.content)
                elif line.origin == '+':
                    changes[DIFF_ADDED]['lines'].append(line.content)
                else:
                    """
                    most likely line.origin is '<'
                    with        line.content '\ No newline at end of file'
                    """
                    #
                    # print("LINE ORIGIN: %s" % line.origin)
                    # print("LINE CONTENT: %s" % line.content)
                    pass

            """
            running the google approach on the competing sides of the hunk
            """
            wdiff = word_diff(''.join(changes[DIFF_DELETED]['lines']),
                              ''.join(changes[DIFF_ADDED]['lines']))

            """
            calculate line and column numbers of changed parts
            """
            for version in DIFF_DELETED, DIFF_ADDED:
                # first column of a change in a line
                changes[version]['col'] = 1
                changes[version]['line'] = changes[DIFF_DELETED]['start']
                # changed chars in a line
                changes[version]['chars'] = 0
                # common chars after a change but before a newline
                # i.e., if another change appears in this line,
                # we need to add these pending ones to the range
                changes[version]['pending'] = 0

            for d in wdiff:
                if debug:
                    print(d)

                for char in d[1]:
                    if char == '\n':
                        if d[0] == DIFF_EQUAL:
                            for version in DIFF_DELETED, DIFF_ADDED:
                                print_changes(changes[version])
                        else:
                            changes[d[0]]['chars'] += 1
                            print_changes(changes[d[0]])
                    elif d[0] == DIFF_EQUAL:
                        for version in DIFF_DELETED, DIFF_ADDED:
                            if changes[version]['chars'] == 0:
                                changes[version]['col'] += 1
                            else:
                                changes[version]['pending'] += 1
                    else:
                        if changes[d[0]]['pending'] > 0:
                            changes[d[0]]['chars'] += changes[d[0]]['pending']
                            changes[d[0]]['pending'] = 0
                        changes[d[0]]['chars'] += 1

            for version in DIFF_DELETED, DIFF_ADDED:
                print_changes(changes[version])


if __name__ == '__main__':
    if len(sys.argv) < 2:
        sys.stderr.write('Usage: %s /path/to/repo\n\n' % sys.argv[0])
        sys.stderr.write('Exiting.\n')
        sys.exit(1)
    main()
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	# vim:fenc=utf-8
	#
	# Copyright © 2017 Olaf Lessenich <xai@linux.com>
	#
	# Distributed under terms of the MIT license.

	import sys
	from pygit2 import Repository
	import diff_match_patch as dmp_module

	debug = False

	dmp = dmp_module.diff_match_patch()


	def diff(repo, commit1, commit2):
	"""
	setting context_lines=0 ist important here
	"""
	return repo.diff(commit1, commit2, context_lines=0)


	def word_diff(a, b):
	"""
	using google approach
	"""
	return dmp.diff_main(a, b)


	def print_changes(change):
	"""
	prints changes in a line and resets counters
	"""
	if change['chars'] > 0:
	print('%s \'%s\' line %d cols %d-%d' %
	(change['label'],
	change['file'],
	change['line'],
	change['col'],
	change['col'] + change['chars']))
	change['line'] += 1
	change['col'] = 1
	change['chars'] = 0


	def main():
	repo = Repository(sys.argv[1])
	DIFF_DELETED = -1
	DIFF_ADDED = 1
	DIFF_EQUAL = 0

	"""
	diff something. just latest commit here for demonstration
	"""
	patches = diff(repo, 'HEAD~1', 'HEAD')

	for p in patches:
	"""
	iterate over patches in diff
	"""
	for h in p.hunks:
	"""
	iterate over hunks in patch
	"""
	changes = dict()
	changes[DIFF_DELETED] = dict()
	changes[DIFF_ADDED] = dict()

	changes[DIFF_DELETED]['file'] = p.delta.old_file.path
	changes[DIFF_ADDED]['file'] = p.delta.new_file.path

	changes[DIFF_DELETED]['label'] = '-'
	changes[DIFF_ADDED]['label'] = '+'

	changes[DIFF_DELETED]['start'] = h.old_start
	changes[DIFF_ADDED]['start'] = h.new_start

	"""
	just used for debugging and (so far manual) sanity checking
	"""
	changes[DIFF_DELETED]['end'] = h.old_start + h.old_lines - 1
	changes[DIFF_ADDED]['end'] = h.new_start + h.new_lines - 1

	if debug and h.old_lines > 0:
	print('old version has changes in lines %d to %d' %
	(changes[DIFF_DELETED]['start'],
	changes[DIFF_DELETED]['end']))
	if debug and h.new_lines > 0:
	print('new version has changes in lines %d to %d' %
	(changes[DIFF_ADDED]['start'],
	changes[DIFF_ADDED]['end']))

	"""
	partition lines of the hunk into deleted and added lines
	"""
	changes[DIFF_DELETED]['lines'] = []
	changes[DIFF_ADDED]['lines'] = []

	for line in h.lines:
	if debug:
	sys.stdout.write('%s %s' % (line.origin, line.content))

	if line.origin == '-':
	changes[DIFF_DELETED]['lines'].append(line.content)
	elif line.origin == '+':
	changes[DIFF_ADDED]['lines'].append(line.content)
	else:
	"""
	most likely line.origin is '<'
	with line.content '\ No newline at end of file'
	"""
	#
	# print("LINE ORIGIN: %s" % line.origin)
	# print("LINE CONTENT: %s" % line.content)
	pass

	"""
	running the google approach on the competing sides of the hunk
	"""
	wdiff = word_diff(''.join(changes[DIFF_DELETED]['lines']),
	''.join(changes[DIFF_ADDED]['lines']))

	"""
	calculate line and column numbers of changed parts
	"""
	for version in DIFF_DELETED, DIFF_ADDED:
	# first column of a change in a line
	changes[version]['col'] = 1
	changes[version]['line'] = changes[DIFF_DELETED]['start']
	# changed chars in a line
	changes[version]['chars'] = 0
	# common chars after a change but before a newline
	# i.e., if another change appears in this line,
	# we need to add these pending ones to the range
	changes[version]['pending'] = 0

	for d in wdiff:
	if debug:
	print(d)

	for char in d[1]:
	if char == '\n':
	if d[0] == DIFF_EQUAL:
	for version in DIFF_DELETED, DIFF_ADDED:
	print_changes(changes[version])
	else:
	changes[d[0]]['chars'] += 1
	print_changes(changes[d[0]])
	elif d[0] == DIFF_EQUAL:
	for version in DIFF_DELETED, DIFF_ADDED:
	if changes[version]['chars'] == 0:
	changes[version]['col'] += 1
	else:
	changes[version]['pending'] += 1
	else:
	if changes[d[0]]['pending'] > 0:
	changes[d[0]]['chars'] += changes[d[0]]['pending']
	changes[d[0]]['pending'] = 0
	changes[d[0]]['chars'] += 1

	for version in DIFF_DELETED, DIFF_ADDED:
	print_changes(changes[version])


	if __name__ == '__main__':
	if len(sys.argv) < 2:
	sys.stderr.write('Usage: %s /path/to/repo\n\n' % sys.argv[0])
	sys.stderr.write('Exiting.\n')
	sys.exit(1)
	main()