Skip to content

Instantly share code, notes, and snippets.

@knipknap
Created December 24, 2010 10:28
Show Gist options
  • Save knipknap/754092 to your computer and use it in GitHub Desktop.
Save knipknap/754092 to your computer and use it in GitHub Desktop.
Formating a diff_match_patch diff array into a pretty two-column HTML table
.diff {
text-align: left;
white-space: pre;
}
.diff td {
padding: 5px 2px;
}
.diff td.line-numbers {
text-align: right;
color: #999;
background-color: #fff;
width: 1%;
}
.diff span, .diff del, .diff ins {
vertical-align: middle;
}
.diff span.change {
background-color: yellow;
}
.diff span.nodiff {
display: block;
text-align: center;
font-style: italic;
}
.diff del {
background-color: #f40;
}
.diff ins {
background-color: #3e3;
text-decoration: none;
}
.diff span.separator {
display: block;
height: 3px;
margin: 3px 0 2px 0;
border-bottom: 1px dashed #888;
}
.diff span.line-number {
margin-left: 3px;
}
#!/usr/bin/env python
# Copyright (C) 2010-2011 Samuel Abels.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Utilities for formating a diff_match_patch diff array into
a pretty two-column HTML table.
"""
from cgi import escape
from ips.external.diff_match_patch import diff_match_patch as dmp
def _line_iter(diffs):
"""
A generator that lets you iterate over a diff array such that
lines are always broken into separate chunks of data, and such
that the current line number is returned, i.e.::
for op, line_number_diff1, line_number_diff2, data in _line_iter(diffs):
...
"""
lineno_diff1 = 1
lineno_diff2 = 1
for op, data in diffs:
for line in data.splitlines(True):
yield op, lineno_diff1, lineno_diff2, line
if line.endswith('\n'):
if op == dmp.DIFF_INSERT:
lineno_diff1 += 1
elif op == dmp.DIFF_DELETE:
lineno_diff2 += 1
elif op == dmp.DIFF_EQUAL:
lineno_diff1 += 1
lineno_diff2 += 1
def _remove_equal_lines(diffs):
"""
Given a diff array, this function returns a new array where equal
lines are removed (except for equal lines directly before and after a
change).
It returns a new array containing 4-tuples (op, has_change, lineno_diff1,
lineno_diff2, data), where::
- op and data are equivalent to the corresponding elements in
the source array
- has_change is True if the current item is in a changed
line, and False if it is an extra line (directly before or after
a change).
- lineno is the number of the current line.
"""
# Identify changed lines.
extra = set()
changed1 = set()
changed2 = set()
for op, lineno_diff1, lineno_diff2, data in _line_iter(diffs):
if op == dmp.DIFF_EQUAL:
continue
# Mark changed lines, as well as one line before and after it.
extra.add(max(1, lineno_diff1 - 1))
if op == dmp.DIFF_INSERT:
changed1.add(lineno_diff1)
extra.add(lineno_diff1 + 1)
elif op == dmp.DIFF_DELETE:
changed2.add(lineno_diff2)
extra.add(lineno_diff1)
# Filter changed lines.
result = []
for op, lineno_diff1, lineno_diff2, data in _line_iter(diffs):
if lineno_diff1 in changed1:
result.append((op, True, lineno_diff1, lineno_diff2, data))
elif lineno_diff2 in changed2:
result.append((op, True, lineno_diff1, lineno_diff2, data))
elif lineno_diff1 in extra:
result.append((op, False, lineno_diff1, lineno_diff2, data))
return result
def diff2html(diffs, left_label = None, right_label = None):
"""
Given a diff array, this function returns a pretty two-column HTML
table.
"""
line_html = []
left_html = []
right_html = []
max_lineno = 0
left_line_start = True
right_line_start = True
skip = 0
for op, has_change, lineno, lineno_diff2, data in _remove_equal_lines(diffs):
# Add some spacing in between sections of the diff.
if lineno > max_lineno + 1 and max_lineno != 0:
line_html.append('<span class="separator"><br/></span>')
left_html.append('<span class="separator"><br/></span>')
right_html.append('<span class="separator"><br/></span>')
# Append the left and right text to the second/third columns of
# the table.
text = escape(data.rstrip('\n'))
nl = data.endswith('\n') and 1 or 0
if nl:
text += '\n'
if op == dmp.DIFF_INSERT:
right_html.append('')
left_html.append('<ins>%s</ins>' % text)
skip -= nl
left_line_start = bool(nl)
elif op == dmp.DIFF_DELETE:
right_html.append('<del>%s</del>' % text)
left_html.append('')
skip += nl
right_line_start = bool(nl)
elif op == dmp.DIFF_EQUAL:
# Re-align left and right side.
if right_line_start and skip < 0:
for i in range(skip, 0):
right_html.append('<span>\n</span>')
skip = 0
if left_line_start and skip > 0:
for i in range(skip):
line_html.append('<span>\n</span>')
left_html.append('<span>\n</span>')
skip = 0
# Add content that was not changed. If that content is within
# a line that has another change, highlight it.
if has_change:
change = '<span class="change">%s</span>' % text
else:
change = '<span>%s</span>' % text
right_html.append(change)
left_html.append(change)
left_line_start = bool(nl)
right_line_start = bool(nl)
# Append the line number to the first column of the table.
if op in (dmp.DIFF_INSERT, dmp.DIFF_EQUAL) and lineno > max_lineno:
line_html.append('<span class="line-number">%d&nbsp;\n</span>' % lineno)
max_lineno = lineno
if not left_label and not right_label:
head = ''
else:
head = '<tr>' \
+ '<th></th>' \
+ '<th>Version: %s</th>' % escape(left_label or '') \
+ '<th>Version: %s</th>' % escape(right_label or '') \
+ '</tr>'
if line_html:
columns = '<td class="line-numbers">' \
+ ''.join(line_html) \
+ '</td>' \
+ '<td class="expand">' \
+ ''.join(left_html) \
+ '</td>' \
+ '<td class="expand">' \
+ ''.join(right_html) \
+ '</td>'
else:
columns = '<td colspan="3">' \
+ '<span class="nodiff">No difference</span>' \
+ '</td>'
return '<table class="diff">' \
+ head \
+ '<tr>' + columns + '</tr>' \
+ '</table>'
return html
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment