Skip to content

Instantly share code, notes, and snippets.

@jlumbroso
Created May 8, 2022 02:18
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jlumbroso/3ef433b4402b4f157728920a66cc15ed to your computer and use it in GitHub Desktop.
Save jlumbroso/3ef433b4402b4f157728920a66cc15ed to your computer and use it in GitHub Desktop.
Side-by-Side Diff Comparison in Python
# Code licensed LGPLv3 by Jérémie Lumbroso <lumbroso@cs.princeton.edu>
import difflib
import itertools
import textwrap
import typing
def side_by_side(
left: typing.List[str],
right: typing.List[str],
width: int = 78,
as_string: bool = False,
separator: typing.Optional[str] = " | ",
left_title: typing.Optional[str] = None,
right_title: typing.Optional[str] = None,
) -> typing.Union[str, typing.List[str]]:
"""Returns either the list of lines, or string of lines, that results from
merging the two lists side-by-side.
:param left: Lines of text to place on the left side
:type left: typing.List[str]
:param right: Lines of text to place on the right side
:type right: typing.List[str]
:param width: Character width of the overall output, defaults to 78
:type width: int, optional
:param as_string: Whether to return a string (as opposed to a list of strings), defaults to False
:type as_string: bool, optional
:param separator: String separating the left and right side, defaults to " | "
:type separator: typing.Optional[str], optional
:param left_title: Title to place on the left side, defaults to None
:type left_title: typing.Optional[str], optional
:param right_title: Title to place on the right side, defaults to None
:type right_title: typing.Optional[str], optional
:return: Lines or text of the merged side-by-side output.
:rtype: typing.Union[str, typing.List[str]]
"""
DEFAULT_SEPARATOR = " | "
separator = separator or DEFAULT_SEPARATOR
mid_width = (width - len(separator) - (1 - width % 2)) // 2
tw = textwrap.TextWrapper(
width=mid_width,
break_long_words=False,
replace_whitespace=False
)
def reflow(lines):
wrapped_lines = list(map(tw.wrap, lines))
wrapped_lines_with_linebreaks = [
[""] if len(wls) == 0 else wls
for wls in wrapped_lines
]
return list(itertools.chain.from_iterable(wrapped_lines_with_linebreaks))
left = reflow(left)
right = reflow(right)
zip_pairs = itertools.zip_longest(left, right)
if left_title is not None or right_title is not None:
left_title = left_title or ""
right_title = right_title or ""
zip_pairs = [
(left_title, right_title),
(mid_width * "-", mid_width * "-")
] + list(zip_pairs)
lines = []
for l, r in zip_pairs:
l = l or ""
r = r or ""
line = "{}{}{}{}".format(
l,
(" " * max(0, mid_width - len(l))),
separator,
r
)
lines.append(line)
if as_string:
return "\n".join(lines)
return lines
def better_diff(
left: typing.List[str],
right: typing.List[str],
width: int = 78,
as_string: bool = False,
separator: typing.Optional[str] = None,
left_title: typing.Optional[str] = None,
right_title: typing.Optional[str] = None,
) -> typing.Union[str, typing.List[str]]:
"""Returns a side-by-side comparison of the two provided inputs, showing
common lines between both inputs, and the lines that are unique to each.
:param left: Lines of text to place on the left side
:type left: typing.List[str]
:param right: Lines of text to place on the right side
:type right: typing.List[str]
:param width: Character width of the overall output, defaults to 78
:type width: int, optional
:param as_string: Whether to return a string (as opposed to a list of strings), defaults to False
:type as_string: bool, optional
:param separator: String separating the left and right side, defaults to " | "
:type separator: typing.Optional[str], optional
:param left_title: Title to place on the left side, defaults to None
:type left_title: typing.Optional[str], optional
:param right_title: Title to place on the right side, defaults to None
:type right_title: typing.Optional[str], optional
:return: Lines or text of the merged side-by-side diff comparison output.
:rtype: typing.Union[str, typing.List[str]]
"""
differ = difflib.Differ()
left_side = []
right_side = []
# adapted from
# LINK: https://stackoverflow.com/a/66091742/408734
difflines = list(differ.compare(left, right))
for line in difflines:
op = line[0]
tail = line[2:]
if op == " ":
# line is same in both
left_side.append(tail)
right_side.append(tail)
elif op == "-":
# line is only on the left
left_side.append(tail)
right_side.append("")
elif op == "+":
# line is only on the right
left_side.append("")
right_side.append(tail)
return side_by_side(
left=left_side,
right=right_side,
width=width,
as_string=as_string,
separator=separator,
left_title=left_title,
right_title=right_title,
)
@jlumbroso
Copy link
Author

Usage example:

print(better_diff(
    ["a", "c",      "a", "a", "a", "a",      "a", "a", "e"],
    ["a", "c", "b", "a", "a", "a", "a", "d", "a", "a"],
    width=20,
    as_string=True,
    left_title="  LEFT",
))

will output:

  LEFT   | 
-------- | --------
a        | a
c        | c
         | b
a        | a
a        | a
a        | a
a        | a
         | d
a        | a
a        | a
e        | 

@jlumbroso
Copy link
Author

This Gist also contains a useful side_by_side() method to place two texts next to each other, with text wrapping, for a pre-specified width.

For example:

# some random text
LOREM = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."

# split into paragraphs
LOREM_PARA = LOREM.replace(". ", ".\n\n").split("\n")

# arbitrarily truncate the first two lines for text B
TEXT_A = LOREM_PARA[:]
TEXT_B = LOREM_PARA[2:]

# reflow as side-by-side
print(side_by_side(TEXT_A, TEXT_B, width=50, as_string=True))

will output:

Lorem ipsum dolor sit   | Ut enim ad minim
amet, consectetur       | veniam, quis nostrud
adipiscing elit, sed do | exercitation ullamco
eiusmod tempor          | laboris nisi ut aliquip
incididunt ut labore et | ex ea commodo
dolore magna aliqua.    | consequat.
                        | 
Ut enim ad minim        | Duis aute irure dolor
veniam, quis nostrud    | in reprehenderit in
exercitation ullamco    | voluptate velit esse
laboris nisi ut aliquip | cillum dolore eu fugiat
ex ea commodo           | nulla pariatur.
consequat.              | 
                        | Excepteur sint occaecat
Duis aute irure dolor   | cupidatat non proident,
in reprehenderit in     | sunt in culpa qui
voluptate velit esse    | officia deserunt mollit
cillum dolore eu fugiat | anim id est laborum.
nulla pariatur.         | 
                        | 
Excepteur sint occaecat | 
cupidatat non proident, | 
sunt in culpa qui       | 
officia deserunt mollit | 
anim id est laborum.    | 

@haferburg
Copy link

haferburg commented Jun 15, 2022

I think side_by_side would be more useful if it would zip first, and then wrap the lines, then zip them again. That way, if two lines were next to each other in the original texts, they would stay next to each other in the wrapped texts.

Here's what I've tested with:

N = 9
left = [f"{n+1}: " + " ".join(n * ["hello?"]) + "\n" for n in range(3, N)]
right = [f"{n+1}: " + " ".join(n * ["what?"]) + "\n" for n in range(3, N)]
print(side_by_side(left, right, width=70, as_string=True))

Your code produces

4: hello? hello? hello?           | 4: what? what? what?
5: hello? hello? hello? hello?    | 5: what? what? what? what?
6: hello? hello? hello? hello?    | 6: what? what? what? what? what?
hello?                            | 7: what? what? what? what? what?
7: hello? hello? hello? hello?    | what?
hello? hello?                     | 8: what? what? what? what? what?
8: hello? hello? hello? hello?    | what? what?
hello? hello? hello?              | 9: what? what? what? what? what?
9: hello? hello? hello? hello?    | what? what? what?
hello? hello? hello? hello?       |

I think it should produce the following:

4: hello? hello? hello?           | 4: what? what? what?
5: hello? hello? hello? hello?    | 5: what? what? what? what?
6: hello? hello? hello? hello?    | 6: what? what? what? what? what?
hello?                            |
7: hello? hello? hello? hello?    | 7: what? what? what? what? what?
hello? hello?                     | what?
8: hello? hello? hello? hello?    | 8: what? what? what? what? what?
hello? hello? hello?              | what? what?
9: hello? hello? hello? hello?    | 9: what? what? what? what? what?
hello? hello? hello? hello?       | what? what? what?

@jlumbroso
Copy link
Author

Dear @haferburg !!

Thanks for taking the time to chime in about this — well observed, I completely agree. I am going to look into making this change tomorrow or Sunday, and I'll follow up when I am done. You rock!!

@jlumbroso giving a high five to @haferburg

@soxofaan
Copy link

soxofaan commented Apr 6, 2023

FYI:
I worked on an alternative implementation here: https://gist.github.com/soxofaan/e97112c4789ee74e1bf61532c998c0eb

The better_diff of this gist did not work well for me, e.g. using on the same data of https://gist.github.com/soxofaan/e97112c4789ee74e1bf61532c998c0eb?permalink_comment_id=4528416#gistcomment-4528416 gave me this:

{                                     | {
  "exp": 1680773344,                  |   "exp": 1680773344,
  "iat": 1680773044,                  |   "iat": 1680773044,
  "jti":                              | 
"40410c7c-8828-4755-a5b8-fa09b8736542", |   "jti":
                                      | "564abdb3-6e34-4a7c-b00e-77d1ee675847",
  "iss":                              |   "iss":
"http://localhost:8642/realms/i168realm", | "http://localhost:8642/realms/i168realm",
  "aud": "account",                   |   "aud": "account",
  "sub":                              | 
"7e7feab3-afd4-4186-a00c-c546ac51a749", |   "sub":
                                      | "d2aa0547-daaa-478b-9cb9-1ba2b1aaf47c",
  "typ": "Bearer",                    |   "typ": "Bearer",
                                      |   "azp": "service-client-L8FFS94R",
                                      |   "preferred_username": "service-
  "azp": "public-client-VQ5r6dli",    | account-service-client-l8ffs94r",
  "session_state":                    | 
"0c09bcad-45de-47ff-b645-e1212047d39a", | 
  "preferred_username":               | 
"user-c39y4yag",                      |   "email_verified": false,
  "email_verified": false,            |   "acr": "1",
  "acr": "1",                         |   "realm_access": {
  "realm_access": {                   |     "roles": [
    "roles": [                        |       "default-roles-i168realm",
      "default-roles-i168realm",      |       "offline_access",

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment