Last active
August 9, 2024 13:47
-
-
Save Dyrcona/dd490a3086b1866770c5b69fd04e1eaa to your computer and use it in GitHub Desktop.
A simple program to split git diff patches into multiple files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# ------------------------------------------------------------------------ | |
# Copyright (c) 2021 Jason Stephenson <jason@sigio.com> | |
# | |
# Permission to use, copy, modify, and distribute this software for any | |
# purpose with or without fee is hereby granted, provided that the above | |
# copyright notice and this permission notice appear in all copies. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
# ------------------------------------------------------------------------ | |
import argparse, contextlib, pathlib, re, sys | |
@contextlib.contextmanager | |
def smart_open(filename=None, mode='r'): | |
"""Open a file and return its handle, or return a handle to stdin or | |
to stdout depending on filename and mode.""" | |
if filename: | |
fh = open(filename, mode) | |
else: | |
if mode is None or mode == '' or 'r' in mode: | |
fh = sys.stdin | |
else: | |
fh = sys.stdout | |
try: | |
yield fh | |
finally: | |
if filename: | |
fh.close() | |
def ignore_patch(filename, ignore_list): | |
"""Check if filename matches a pattern in the ignore list. Return True | |
if it does or False otherwise.""" | |
result = False | |
for pat in ignore_list: | |
if re.search(pat, filename): | |
result = True | |
break | |
return result | |
def err_exit(msg): | |
"""Print a message and quit.""" | |
print(msg, file=sys.stderr) | |
print("Exiting...", file=sys.stderr) | |
quit() | |
def mkdir_if_not_exists(dirPath): | |
"""Make the directory represented by the pathlib.Path dirPath if it | |
does not exist.""" | |
if not dirPath.exists(): | |
dirPath.mkdir(parents=True) | |
def copy_file(source, destination): | |
"""Copy a file from the source Path to destination Path. Return True | |
on success and False on failure.""" | |
success = False | |
if source.is_file(): | |
mkdir_if_not_exists(destination.parent) | |
with source.open(mode='rb') as inh: | |
with destination.open(mode='wb') as outh: | |
outh.write(inh.read()) | |
success = True | |
if success: | |
stat = source.stat() | |
destination.chmod(stat.st_mode) | |
else: | |
print("{} does not exist.".format(source), file=sys.stderr) | |
return success | |
def write_patch(outPath, patch): | |
"""Write patch data to the file at outPath with .patch appended to the | |
name. Return True on success and False on failure.""" | |
success = False | |
mkdir_if_not_exists(outPath.parent) | |
if outPath.suffix != ".patch": | |
fname = outPath.name + ".patch" | |
outPath = outPath.with_name(fname) | |
with outPath.open(mode='wb') as outh: | |
outh.write(patch.text.encode()) | |
success = True | |
return success | |
def print_list_to_path(list, outPath): | |
"""Print list to the file represented by outPath.""" | |
with outPath.open(mode="w") as fh: | |
for e in list: | |
print(e, file=fh) | |
class Patch: | |
"""Class to represent an individual patch as generated by git diff.""" | |
def __init__(self, text): | |
match = re.search("^diff --git a/(.+?) b/(.+?)\n", text) | |
if match: | |
self.text = text | |
self.first = match.group(0) | |
self.afile = match.group(1) | |
self.bfile = match.group(2) | |
else: | |
raise ValueError | |
def is_new_file(self): | |
"""Return True if this patch represents a newly created file, or False | |
if not.""" | |
match = re.search("^" + self.first + "new file mode", self.text) | |
if match: | |
return True | |
else: | |
return False | |
def is_mode_change(self): | |
"""Return True if this patch includes a file mode change, or False if not.""" | |
match = re.search("^" + self.first + "old mode [0-7]+\nnew mode [0-7]+\n", self.text) | |
if match: | |
return True | |
else: | |
return False | |
def is_binary_file(self): | |
"""Return True if the patch is for a binary file, or False if not.""" | |
search_str = "^" + self.first | |
if self.is_new_file(): | |
search_str = search_str + "new file mode .+\n" | |
elif self.is_mode_change(): | |
search_str = search_str + "old mode [0-7]+\nnew mode [0-7]+\n" | |
search_str = search_str + "index .+\nBinary files .+ differ$" | |
if re.match(search_str, self.text): | |
return True | |
else: | |
return False | |
class PatchParser: | |
"""Class to parse a file of multipe patches and iterate over each | |
individual patch.""" | |
def __init__(self, input_handle): | |
self._input = input_handle | |
self._refirst = re.compile("^diff --git a/.+? b/.+?$") | |
self._first = None | |
def __iter__(self): | |
return self | |
def __next__(self): | |
out = None | |
for line in self._input: | |
if self._refirst.match(line): | |
if out is not None: | |
self._first = line | |
break | |
else: | |
out = line | |
else: | |
if out is None: | |
out = self._first | |
out = out + line | |
if out is None: | |
raise StopIteration | |
return Patch(out) | |
if __name__ == "__main__": | |
argparser = argparse.ArgumentParser(description=""" | |
A simple program to split git diff patches into multiple files. | |
""") | |
argparser.add_argument("-f", "--file", action="store", help="filename of patch file to parse") | |
argparser.add_argument("-i", "--ignore", action="append", help="filename patterns to ignore") | |
argparser.add_argument("-s", "--source-dir", action="store", dest="source", default=".", | |
help="source file directory") | |
argparser.add_argument("-d", "--destination-dir", action="store", dest="destination", default=".", | |
help="destination directory for split patches") | |
argparser.add_argument("-l", "--write-lists", action="store_true", dest="lists", | |
help="write patches.list and files.list to root of destination directory") | |
args = argparser.parse_args() | |
if args.source == args.destination: | |
err_exit("source and destination are the same: {}".format(args.source)) | |
sourceDir = pathlib.Path(args.source).expanduser() | |
if not sourceDir.exists(): | |
err_exit("source directory ({}) does not exist".format(args.source)) | |
destDir = pathlib.Path(args.destination).expanduser() | |
mkdir_if_not_exists(destDir) | |
patchList = [] | |
filesList = [] | |
with smart_open(filename=args.file) as input: | |
parser = PatchParser(input) | |
count = 0 | |
for patch in parser: | |
if args.ignore and ignore_patch(patch.bfile, args.ignore): | |
continue | |
count = count + 1 | |
print("{} {}".format(count, patch.bfile)) | |
if patch.is_new_file() or patch.is_binary_file(): | |
if copy_file(sourceDir / patch.bfile, destDir / patch.bfile): | |
if args.lists: | |
filesList.append(patch.bfile) | |
else: | |
patchName = patch.bfile + ".patch" | |
if write_patch(destDir / patchName, patch): | |
if args.lists: | |
patchList.append(patchName) | |
if len(patchList): | |
print_list_to_path(patchList, destDir / "patches.list") | |
if len(filesList): | |
print_list_to_path(filesList, destDir / "files.list") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment