Skip to content

Instantly share code, notes, and snippets.

@Dyrcona
Last active August 9, 2024 13:47
Show Gist options
  • Save Dyrcona/dd490a3086b1866770c5b69fd04e1eaa to your computer and use it in GitHub Desktop.
Save Dyrcona/dd490a3086b1866770c5b69fd04e1eaa to your computer and use it in GitHub Desktop.
A simple program to split git diff patches into multiple files.
#!/usr/bin/env python3
# ------------------------------------------------------------------------
# Copyright (c) 2021 Jason Stephenson <jason@sigio.com>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# ------------------------------------------------------------------------
import argparse, contextlib, pathlib, re, sys
@contextlib.contextmanager
def smart_open(filename=None, mode='r'):
"""Open a file and return its handle, or return a handle to stdin or
to stdout depending on filename and mode."""
if filename:
fh = open(filename, mode)
else:
if mode is None or mode == '' or 'r' in mode:
fh = sys.stdin
else:
fh = sys.stdout
try:
yield fh
finally:
if filename:
fh.close()
def ignore_patch(filename, ignore_list):
"""Check if filename matches a pattern in the ignore list. Return True
if it does or False otherwise."""
result = False
for pat in ignore_list:
if re.search(pat, filename):
result = True
break
return result
def err_exit(msg):
"""Print a message and quit."""
print(msg, file=sys.stderr)
print("Exiting...", file=sys.stderr)
quit()
def mkdir_if_not_exists(dirPath):
"""Make the directory represented by the pathlib.Path dirPath if it
does not exist."""
if not dirPath.exists():
dirPath.mkdir(parents=True)
def copy_file(source, destination):
"""Copy a file from the source Path to destination Path. Return True
on success and False on failure."""
success = False
if source.is_file():
mkdir_if_not_exists(destination.parent)
with source.open(mode='rb') as inh:
with destination.open(mode='wb') as outh:
outh.write(inh.read())
success = True
if success:
stat = source.stat()
destination.chmod(stat.st_mode)
else:
print("{} does not exist.".format(source), file=sys.stderr)
return success
def write_patch(outPath, patch):
"""Write patch data to the file at outPath with .patch appended to the
name. Return True on success and False on failure."""
success = False
mkdir_if_not_exists(outPath.parent)
if outPath.suffix != ".patch":
fname = outPath.name + ".patch"
outPath = outPath.with_name(fname)
with outPath.open(mode='wb') as outh:
outh.write(patch.text.encode())
success = True
return success
def print_list_to_path(list, outPath):
"""Print list to the file represented by outPath."""
with outPath.open(mode="w") as fh:
for e in list:
print(e, file=fh)
class Patch:
"""Class to represent an individual patch as generated by git diff."""
def __init__(self, text):
match = re.search("^diff --git a/(.+?) b/(.+?)\n", text)
if match:
self.text = text
self.first = match.group(0)
self.afile = match.group(1)
self.bfile = match.group(2)
else:
raise ValueError
def is_new_file(self):
"""Return True if this patch represents a newly created file, or False
if not."""
match = re.search("^" + self.first + "new file mode", self.text)
if match:
return True
else:
return False
def is_mode_change(self):
"""Return True if this patch includes a file mode change, or False if not."""
match = re.search("^" + self.first + "old mode [0-7]+\nnew mode [0-7]+\n", self.text)
if match:
return True
else:
return False
def is_binary_file(self):
"""Return True if the patch is for a binary file, or False if not."""
search_str = "^" + self.first
if self.is_new_file():
search_str = search_str + "new file mode .+\n"
elif self.is_mode_change():
search_str = search_str + "old mode [0-7]+\nnew mode [0-7]+\n"
search_str = search_str + "index .+\nBinary files .+ differ$"
if re.match(search_str, self.text):
return True
else:
return False
class PatchParser:
"""Class to parse a file of multipe patches and iterate over each
individual patch."""
def __init__(self, input_handle):
self._input = input_handle
self._refirst = re.compile("^diff --git a/.+? b/.+?$")
self._first = None
def __iter__(self):
return self
def __next__(self):
out = None
for line in self._input:
if self._refirst.match(line):
if out is not None:
self._first = line
break
else:
out = line
else:
if out is None:
out = self._first
out = out + line
if out is None:
raise StopIteration
return Patch(out)
if __name__ == "__main__":
argparser = argparse.ArgumentParser(description="""
A simple program to split git diff patches into multiple files.
""")
argparser.add_argument("-f", "--file", action="store", help="filename of patch file to parse")
argparser.add_argument("-i", "--ignore", action="append", help="filename patterns to ignore")
argparser.add_argument("-s", "--source-dir", action="store", dest="source", default=".",
help="source file directory")
argparser.add_argument("-d", "--destination-dir", action="store", dest="destination", default=".",
help="destination directory for split patches")
argparser.add_argument("-l", "--write-lists", action="store_true", dest="lists",
help="write patches.list and files.list to root of destination directory")
args = argparser.parse_args()
if args.source == args.destination:
err_exit("source and destination are the same: {}".format(args.source))
sourceDir = pathlib.Path(args.source).expanduser()
if not sourceDir.exists():
err_exit("source directory ({}) does not exist".format(args.source))
destDir = pathlib.Path(args.destination).expanduser()
mkdir_if_not_exists(destDir)
patchList = []
filesList = []
with smart_open(filename=args.file) as input:
parser = PatchParser(input)
count = 0
for patch in parser:
if args.ignore and ignore_patch(patch.bfile, args.ignore):
continue
count = count + 1
print("{} {}".format(count, patch.bfile))
if patch.is_new_file() or patch.is_binary_file():
if copy_file(sourceDir / patch.bfile, destDir / patch.bfile):
if args.lists:
filesList.append(patch.bfile)
else:
patchName = patch.bfile + ".patch"
if write_patch(destDir / patchName, patch):
if args.lists:
patchList.append(patchName)
if len(patchList):
print_list_to_path(patchList, destDir / "patches.list")
if len(filesList):
print_list_to_path(filesList, destDir / "files.list")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment