Skip to content

Instantly share code, notes, and snippets.

@noporpoise
Last active November 14, 2023 18:59
Show Gist options
  • Star 14 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save noporpoise/16e731849eb1231e86d78f9dfeca3abc to your computer and use it in GitHub Desktop.
Save noporpoise/16e731849eb1231e86d78f9dfeca3abc to your computer and use it in GitHub Desktop.
Apply unified diff patches in pure python2/3
#!/usr/bin/env python
# coding=utf-8
# License: Public domain (CC0)
# Isaac Turner 2016/12/05
from __future__ import print_function
import difflib
import re
_no_eol = "\ No newline at end of file"
_hdr_pat = re.compile("^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@$")
def make_patch(a,b):
"""
Get unified string diff between two strings. Trims top two lines.
Returns empty string if strings are identical.
"""
diffs = difflib.unified_diff(a.splitlines(True),b.splitlines(True),n=0)
try: _,_ = next(diffs),next(diffs)
except StopIteration: pass
# diffs = list(diffs); print(diffs)
return ''.join([d if d[-1] == '\n' else d+'\n'+_no_eol+'\n' for d in diffs])
def apply_patch(s,patch,revert=False):
"""
Apply patch to string s to recover newer string.
If revert is True, treat s as the newer string, recover older string.
"""
s = s.splitlines(True)
p = patch.splitlines(True)
t = ''
i = sl = 0
(midx,sign) = (1,'+') if not revert else (3,'-')
while i < len(p) and p[i].startswith(("---","+++")): i += 1 # skip header lines
while i < len(p):
m = _hdr_pat.match(p[i])
if not m: raise Exception("Bad patch -- regex mismatch [line "+str(i)+"]")
l = int(m.group(midx))-1 + (m.group(midx+1) == '0')
if sl > l or l > len(s):
raise Exception("Bad patch -- bad line num [line "+str(i)+"]")
t += ''.join(s[sl:l])
sl = l
i += 1
while i < len(p) and p[i][0] != '@':
if i+1 < len(p) and p[i+1][0] == '\\': line = p[i][:-1]; i += 2
else: line = p[i]; i += 1
if len(line) > 0:
if line[0] == sign or line[0] == ' ': t += line[1:]
sl += (line[0] != sign)
t += ''.join(s[sl:])
return t
#
# Testing
#
import random
import string
import traceback
import sys
import codecs
def test_diff(a,b):
mp = make_patch(a,b)
try:
assert apply_patch(a,mp) == b
assert apply_patch(b,mp,True) == a
except Exception as e:
print("=== a ===")
print([a])
print("=== b ===")
print([b])
print("=== mp ===")
print([mp])
print("=== a->b ===")
print(apply_patch(a,mp))
print("=== a<-b ===")
print(apply_patch(b,mp,True))
traceback.print_exc()
sys.exit(-1)
def randomly_interleave(*args):
""" Randomly interleave multiple lists/iterators """
iters = [iter(x) for x in args]
while iters:
i = random.randrange(len(iters))
try:
yield next(iters[i])
except StopIteration:
# swap empty iterator to end and remove
iters[i],iters[-1] = iters[-1],iters[i]
iters.pop()
def rand_ascii():
return random.choice(string.printable)
def rand_unicode():
a = u"\\u%04x" % random.randrange(0x10000)
# return a.decode('utf-8')
return str(codecs.encode(a, 'utf-8'))
def generate_test(nlines=10,linelen=10,randchar=rand_ascii):
"""
Generate two strings with approx `nlines` lines, which share approx half their
lines. Then run the diff/patch test unit with the two strings.
Lines are random characters and may include newline / linefeeds.
"""
aonly,bonly,nshared = (random.randrange(nlines) for _ in range(3))
a = [ ''.join([randchar() for _ in range(linelen)]) for _ in range(aonly)]
b = [ ''.join([randchar() for _ in range(linelen)]) for _ in range(bonly)]
ab = [ ''.join([randchar() for _ in range(linelen)]) for _ in range(nshared)]
a = randomly_interleave(a,ab)
b = randomly_interleave(b,ab)
test_diff(''.join(a),''.join(b))
def std_tests():
test_diff("asdf\nhamster\nmole\nwolf\ndog\ngiraffe",
"asdf\nhampster\nmole\nwolf\ndooog\ngiraffe\n")
test_diff("asdf\nhamster\nmole\nwolf\ndog\ngiraffe",
"hampster\nmole\nwolf\ndooog\ngiraffe\n")
test_diff("hamster\nmole\nwolf\ndog",
"asdf\nhampster\nmole\nwolf\ndooog\ngiraffe\n")
test_diff("", "")
test_diff("", "asdf\nasf")
test_diff("asdf\nasf","xxx")
# Things can get nasty, we need to be able to handle any input
# see https://docs.python.org/3/library/stdtypes.html
test_diff("\x0c", "\n\r\n")
test_diff("\x1c\v", "\f\r\n")
def main():
print("Testing...")
std_tests()
print("Testing random ASCII...")
for _ in range(50): generate_test(50,50,rand_ascii)
print("Testing random unicode...")
for _ in range(50): generate_test(50,50,rand_unicode)
print("Passed ✓")
if __name__ == '__main__': main()
@graingert
Copy link

Can you upload to PyPI?

@amaank404
Copy link

M gonna upload to PyPI under this guys name

@amaank404
Copy link

now, pip install diffpatch works, and import diffpatch

@amaank404
Copy link

any idea how to unify patches?

@1f604
Copy link

1f604 commented Feb 6, 2022

What is the purpose of this line:

  if i+1 < len(p) and p[i+1][0] == '\\': line = p[i][:-1]; i += 2

I don't see how a line in a unified diff can start with a '\' character.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment