Created
December 24, 2010 10:28
-
-
Save knipknap/754092 to your computer and use it in GitHub Desktop.
Formating a diff_match_patch diff array into a pretty two-column HTML table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.diff { | |
text-align: left; | |
white-space: pre; | |
} | |
.diff td { | |
padding: 5px 2px; | |
} | |
.diff td.line-numbers { | |
text-align: right; | |
color: #999; | |
background-color: #fff; | |
width: 1%; | |
} | |
.diff span, .diff del, .diff ins { | |
vertical-align: middle; | |
} | |
.diff span.change { | |
background-color: yellow; | |
} | |
.diff span.nodiff { | |
display: block; | |
text-align: center; | |
font-style: italic; | |
} | |
.diff del { | |
background-color: #f40; | |
} | |
.diff ins { | |
background-color: #3e3; | |
text-decoration: none; | |
} | |
.diff span.separator { | |
display: block; | |
height: 3px; | |
margin: 3px 0 2px 0; | |
border-bottom: 1px dashed #888; | |
} | |
.diff span.line-number { | |
margin-left: 3px; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Copyright (C) 2010-2011 Samuel Abels. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
""" | |
Utilities for formating a diff_match_patch diff array into | |
a pretty two-column HTML table. | |
""" | |
from cgi import escape | |
from ips.external.diff_match_patch import diff_match_patch as dmp | |
def _line_iter(diffs): | |
""" | |
A generator that lets you iterate over a diff array such that | |
lines are always broken into separate chunks of data, and such | |
that the current line number is returned, i.e.:: | |
for op, line_number_diff1, line_number_diff2, data in _line_iter(diffs): | |
... | |
""" | |
lineno_diff1 = 1 | |
lineno_diff2 = 1 | |
for op, data in diffs: | |
for line in data.splitlines(True): | |
yield op, lineno_diff1, lineno_diff2, line | |
if line.endswith('\n'): | |
if op == dmp.DIFF_INSERT: | |
lineno_diff1 += 1 | |
elif op == dmp.DIFF_DELETE: | |
lineno_diff2 += 1 | |
elif op == dmp.DIFF_EQUAL: | |
lineno_diff1 += 1 | |
lineno_diff2 += 1 | |
def _remove_equal_lines(diffs): | |
""" | |
Given a diff array, this function returns a new array where equal | |
lines are removed (except for equal lines directly before and after a | |
change). | |
It returns a new array containing 4-tuples (op, has_change, lineno_diff1, | |
lineno_diff2, data), where:: | |
- op and data are equivalent to the corresponding elements in | |
the source array | |
- has_change is True if the current item is in a changed | |
line, and False if it is an extra line (directly before or after | |
a change). | |
- lineno is the number of the current line. | |
""" | |
# Identify changed lines. | |
extra = set() | |
changed1 = set() | |
changed2 = set() | |
for op, lineno_diff1, lineno_diff2, data in _line_iter(diffs): | |
if op == dmp.DIFF_EQUAL: | |
continue | |
# Mark changed lines, as well as one line before and after it. | |
extra.add(max(1, lineno_diff1 - 1)) | |
if op == dmp.DIFF_INSERT: | |
changed1.add(lineno_diff1) | |
extra.add(lineno_diff1 + 1) | |
elif op == dmp.DIFF_DELETE: | |
changed2.add(lineno_diff2) | |
extra.add(lineno_diff1) | |
# Filter changed lines. | |
result = [] | |
for op, lineno_diff1, lineno_diff2, data in _line_iter(diffs): | |
if lineno_diff1 in changed1: | |
result.append((op, True, lineno_diff1, lineno_diff2, data)) | |
elif lineno_diff2 in changed2: | |
result.append((op, True, lineno_diff1, lineno_diff2, data)) | |
elif lineno_diff1 in extra: | |
result.append((op, False, lineno_diff1, lineno_diff2, data)) | |
return result | |
def diff2html(diffs, left_label = None, right_label = None): | |
""" | |
Given a diff array, this function returns a pretty two-column HTML | |
table. | |
""" | |
line_html = [] | |
left_html = [] | |
right_html = [] | |
max_lineno = 0 | |
left_line_start = True | |
right_line_start = True | |
skip = 0 | |
for op, has_change, lineno, lineno_diff2, data in _remove_equal_lines(diffs): | |
# Add some spacing in between sections of the diff. | |
if lineno > max_lineno + 1 and max_lineno != 0: | |
line_html.append('<span class="separator"><br/></span>') | |
left_html.append('<span class="separator"><br/></span>') | |
right_html.append('<span class="separator"><br/></span>') | |
# Append the left and right text to the second/third columns of | |
# the table. | |
text = escape(data.rstrip('\n')) | |
nl = data.endswith('\n') and 1 or 0 | |
if nl: | |
text += '\n' | |
if op == dmp.DIFF_INSERT: | |
right_html.append('') | |
left_html.append('<ins>%s</ins>' % text) | |
skip -= nl | |
left_line_start = bool(nl) | |
elif op == dmp.DIFF_DELETE: | |
right_html.append('<del>%s</del>' % text) | |
left_html.append('') | |
skip += nl | |
right_line_start = bool(nl) | |
elif op == dmp.DIFF_EQUAL: | |
# Re-align left and right side. | |
if right_line_start and skip < 0: | |
for i in range(skip, 0): | |
right_html.append('<span>\n</span>') | |
skip = 0 | |
if left_line_start and skip > 0: | |
for i in range(skip): | |
line_html.append('<span>\n</span>') | |
left_html.append('<span>\n</span>') | |
skip = 0 | |
# Add content that was not changed. If that content is within | |
# a line that has another change, highlight it. | |
if has_change: | |
change = '<span class="change">%s</span>' % text | |
else: | |
change = '<span>%s</span>' % text | |
right_html.append(change) | |
left_html.append(change) | |
left_line_start = bool(nl) | |
right_line_start = bool(nl) | |
# Append the line number to the first column of the table. | |
if op in (dmp.DIFF_INSERT, dmp.DIFF_EQUAL) and lineno > max_lineno: | |
line_html.append('<span class="line-number">%d \n</span>' % lineno) | |
max_lineno = lineno | |
if not left_label and not right_label: | |
head = '' | |
else: | |
head = '<tr>' \ | |
+ '<th></th>' \ | |
+ '<th>Version: %s</th>' % escape(left_label or '') \ | |
+ '<th>Version: %s</th>' % escape(right_label or '') \ | |
+ '</tr>' | |
if line_html: | |
columns = '<td class="line-numbers">' \ | |
+ ''.join(line_html) \ | |
+ '</td>' \ | |
+ '<td class="expand">' \ | |
+ ''.join(left_html) \ | |
+ '</td>' \ | |
+ '<td class="expand">' \ | |
+ ''.join(right_html) \ | |
+ '</td>' | |
else: | |
columns = '<td colspan="3">' \ | |
+ '<span class="nodiff">No difference</span>' \ | |
+ '</td>' | |
return '<table class="diff">' \ | |
+ head \ | |
+ '<tr>' + columns + '</tr>' \ | |
+ '</table>' | |
return html |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment