Skip to content

Instantly share code, notes, and snippets.

@OscarL
Created December 20, 2022 23:07
Show Gist options
  • Save OscarL/6bbe63baf1b683e2c7e5b4d93d37c2e3 to your computer and use it in GitHub Desktop.
Save OscarL/6bbe63baf1b683e2c7e5b4d93d37c2e3 to your computer and use it in GitHub Desktop.
Split Haikuporter's .patchset files ("git am" mailboxes).
#! python3
"""
patchset_split.py
Based on Paolo Bonzini's mbox_split.py:
https://gist.github.com/bonzini/d5bc1946475487167c529f9699e39512
"""
import argparse
import email.parser
import email.header
import os
import re
import sys
# The original version from Paolo Bonzini expected a "Subject:" like this:
# "Subject: [ 2/2 ] webbrowser: Support for default browsers on Haiku"
# But we don't need that [0-9]+/[0-9]+. We want numbers to be incremental.
def subj_to_name(subj):
"""Convert a subject to a filename."""
# You can write Perl in any language. - Edgar Dijkstra, probably.
def dashify(text):
text = re.sub("[^a-zA-Z0-9_-]", "-", text)
text = re.sub("--+", "-", text)
text = re.sub("^[.-]*", "", text)
return re.sub("[.-]*$", "", text)
subj = re.sub("\n\s+", " ", subj, re.S)
m = re.match(r"""\s* ( \[ [^]]* \] | \S+: )?""", subj, re.X)
area = "misc"
if m and m.group(1):
area = dashify(m.group(1))
subj = subj[m.end() :]
text = dashify(subj.strip())
return "%s-%s.patch" % (area, text)
def has_patch(body):
"""Return whether the body includes a patch."""
return re.search(
b"""^---.* ^\\+\\+\\+.* ^@@
|^diff.* ^index.* ^GIT binary patch
|^diff.* ^old mode .* ^new mode""",
body,
re.M | re.S | re.X,
)
def header_to_string(v):
"""Convert a MIME encoded header to Unicode."""
return email.header.make_header(email.header.decode_header(v))
def do_single(msg, num, output_dir, outfile=None):
"""Remove unnecessary headers from the message as well as
content-transfer-encoding, and print it to outfile or to
a file whose name is derived from the subject. If the
latter, the name of the file is printed to stdout."""
def open_output_file(msg):
name = "%02d-%s" % (num, subj_to_name(msg["Subject"]))
name = os.path.join(output_dir, name)
print(name)
return open(name, "wb")
container = msg.get_payload(0) if msg.is_multipart() else msg
body = container.get_payload(decode=True)
if not args.keep_cr:
body = body.replace(b"\r\n", b"\n")
if not args.nopatch and not has_patch(body):
return
with outfile or open_output_file(msg) as f:
for k in ("From", "Subject", "Date", "Content-Type"):
if k in msg:
f.write(("%s: %s\n" % (k, header_to_string(msg[k]))).encode())
f.write(b"\n")
f.write(body)
def split_mbox(filename, output_dir):
"""Split an mbox file and pass each part to a function func."""
with open(filename, "rb") as mailbox:
patch_num = 0
parser = None
for line in mailbox:
if line.startswith(b"From "):
# finish the previous message
if parser:
do_single(parser.close(), num=patch_num, output_dir=output_dir)
parser = None
else:
if not parser and line.strip() == b"":
continue
if line.startswith(b">From"):
line = line[1:]
if not parser:
parser = email.parser.BytesFeedParser()
patch_num += 1
parser.feed(line)
if parser:
# Last patch
do_single(parser.close(), num=patch_num, output_dir=output_dir)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Splits a given mailbox into separate patch files"
)
parser.add_argument(
"--nopatch",
action="store_true",
default=False,
help="exports even if it's not a patch",
)
parser.add_argument(
"--single",
action="store_true",
default=False,
help="do not split mbox file",
)
parser.add_argument(
"--keep-cr",
action="store_true",
default=False,
help=r"do not remove \r from lines ending with \r\n",
)
parser.add_argument(
"mbox",
metavar="<mailbox file>",
help='specifies the mailbox file',
)
parser.add_argument(
"output_dir",
metavar="<output_dir>",
default='.',
nargs='?',
help='place output files under this directory',
)
args = parser.parse_args()
args.output_dir = os.path.abspath(args.output_dir)
if not os.path.exists(args.output_dir) and not os.path.isfile(args.output_dir):
os.makedirs(args.output_dir, exist_ok=True)
if args.single:
infile = open(args.mbox, "rb")
msg = email.parser.BytesParser().parse(infile)
do_single(msg, args.output_dir, sys.stdout.buffer)
else:
split_mbox(args.mbox, args.output_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment