OscarL/patchset_split.py

## patchset_split.py
#! python3

"""
patchset_split.py

Based on Paolo Bonzini's mbox_split.py:
https://gist.github.com/bonzini/d5bc1946475487167c529f9699e39512
"""

import argparse
import email.parser
import email.header
import os
import re
import sys


# The original version from Paolo Bonzini expected a "Subject:" like this:
# "Subject: [ 2/2 ] webbrowser: Support for default browsers on Haiku"
# But we don't need that [0-9]+/[0-9]+. We want numbers to be incremental.
def subj_to_name(subj):
    """Convert a subject to a filename."""

    # You can write Perl in any language.  - Edgar Dijkstra, probably.
    def dashify(text):
        text = re.sub("[^a-zA-Z0-9_-]", "-", text)
        text = re.sub("--+", "-", text)
        text = re.sub("^[.-]*", "", text)
        return re.sub("[.-]*$", "", text)

    subj = re.sub("\n\s+", " ", subj, re.S)
    m = re.match(r"""\s* ( \[ [^]]* \] | \S+: )?""", subj, re.X)
    area = "misc"
    if m and m.group(1):
        area = dashify(m.group(1))
        subj = subj[m.end() :]

    text = dashify(subj.strip())
    return "%s-%s.patch" % (area, text)


def has_patch(body):
    """Return whether the body includes a patch."""
    return re.search(
        b"""^---.*     ^\\+\\+\\+.*   ^@@
            |^diff.*   ^index.*       ^GIT binary patch
            |^diff.*   ^old mode .*   ^new mode""",
        body,
        re.M | re.S | re.X,
    )


def header_to_string(v):
    """Convert a MIME encoded header to Unicode."""
    return email.header.make_header(email.header.decode_header(v))


def do_single(msg, num, output_dir, outfile=None):
    """Remove unnecessary headers from the message as well as
       content-transfer-encoding, and print it to outfile or to
       a file whose name is derived from the subject.  If the
       latter, the name of the file is printed to stdout."""

    def open_output_file(msg):
        name = "%02d-%s" % (num, subj_to_name(msg["Subject"]))
        name = os.path.join(output_dir, name)
        print(name)
        return open(name, "wb")

    container = msg.get_payload(0) if msg.is_multipart() else msg

    body = container.get_payload(decode=True)

    if not args.keep_cr:
        body = body.replace(b"\r\n", b"\n")

    if not args.nopatch and not has_patch(body):
        return

    with outfile or open_output_file(msg) as f:
        for k in ("From", "Subject", "Date", "Content-Type"):
            if k in msg:
                f.write(("%s: %s\n" % (k, header_to_string(msg[k]))).encode())
        f.write(b"\n")
        f.write(body)


def split_mbox(filename, output_dir):
    """Split an mbox file and pass each part to a function func."""

    with open(filename, "rb") as mailbox:
        patch_num = 0
        parser = None
        for line in mailbox:
            if line.startswith(b"From "):
                # finish the previous message
                if parser:
                    do_single(parser.close(), num=patch_num, output_dir=output_dir)
                    parser = None
            else:
                if not parser and line.strip() == b"":
                    continue
                if line.startswith(b">From"):
                    line = line[1:]
                if not parser:
                    parser = email.parser.BytesFeedParser()
                    patch_num += 1
                parser.feed(line)

        if parser:
            # Last patch
            do_single(parser.close(), num=patch_num, output_dir=output_dir)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="Splits a given mailbox into separate patch files"
    )
    parser.add_argument(
        "--nopatch",
        action="store_true",
        default=False,
        help="exports even if it's not a patch",
    )
    parser.add_argument(
        "--single",
        action="store_true",
        default=False,
        help="do not split mbox file",
    )
    parser.add_argument(
        "--keep-cr",
        action="store_true",
        default=False,
        help=r"do not remove \r from lines ending with \r\n",
    )
    parser.add_argument(
        "mbox",
        metavar="<mailbox file>",
        help='specifies the mailbox file',
    )

    parser.add_argument(
        "output_dir",
        metavar="<output_dir>",
        default='.',
        nargs='?',
        help='place output files under this directory',
    )

    args = parser.parse_args()

    args.output_dir = os.path.abspath(args.output_dir)
    if not os.path.exists(args.output_dir) and not os.path.isfile(args.output_dir):
        os.makedirs(args.output_dir, exist_ok=True)

    if args.single:
        infile = open(args.mbox, "rb")
        msg = email.parser.BytesParser().parse(infile)
        do_single(msg, args.output_dir, sys.stdout.buffer)
    else:
        split_mbox(args.mbox, args.output_dir)
	#! python3

	"""
	patchset_split.py

	Based on Paolo Bonzini's mbox_split.py:
	https://gist.github.com/bonzini/d5bc1946475487167c529f9699e39512
	"""

	import argparse
	import email.parser
	import email.header
	import os
	import re
	import sys


	# The original version from Paolo Bonzini expected a "Subject:" like this:
	# "Subject: [ 2/2 ] webbrowser: Support for default browsers on Haiku"
	# But we don't need that [0-9]+/[0-9]+. We want numbers to be incremental.
	def subj_to_name(subj):
	"""Convert a subject to a filename."""

	# You can write Perl in any language. - Edgar Dijkstra, probably.
	def dashify(text):
	text = re.sub("[^a-zA-Z0-9_-]", "-", text)
	text = re.sub("--+", "-", text)
	text = re.sub("^[.-]*", "", text)
	return re.sub("[.-]*$", "", text)

	subj = re.sub("\n\s+", " ", subj, re.S)
	m = re.match(r"""\s* ( \[ [^]]* \] \| \S+: )?""", subj, re.X)
	area = "misc"
	if m and m.group(1):
	area = dashify(m.group(1))
	subj = subj[m.end() :]

	text = dashify(subj.strip())
	return "%s-%s.patch" % (area, text)


	def has_patch(body):
	"""Return whether the body includes a patch."""
	return re.search(
	b"""^---.* ^\\+\\+\\+.* ^@@
	\|^diff.* ^index.* ^GIT binary patch
	\|^diff.* ^old mode .* ^new mode""",
	body,
	re.M \| re.S \| re.X,
	)


	def header_to_string(v):
	"""Convert a MIME encoded header to Unicode."""
	return email.header.make_header(email.header.decode_header(v))


	def do_single(msg, num, output_dir, outfile=None):
	"""Remove unnecessary headers from the message as well as
	content-transfer-encoding, and print it to outfile or to
	a file whose name is derived from the subject. If the
	latter, the name of the file is printed to stdout."""

	def open_output_file(msg):
	name = "%02d-%s" % (num, subj_to_name(msg["Subject"]))
	name = os.path.join(output_dir, name)
	print(name)
	return open(name, "wb")

	container = msg.get_payload(0) if msg.is_multipart() else msg

	body = container.get_payload(decode=True)

	if not args.keep_cr:
	body = body.replace(b"\r\n", b"\n")

	if not args.nopatch and not has_patch(body):
	return

	with outfile or open_output_file(msg) as f:
	for k in ("From", "Subject", "Date", "Content-Type"):
	if k in msg:
	f.write(("%s: %s\n" % (k, header_to_string(msg[k]))).encode())
	f.write(b"\n")
	f.write(body)


	def split_mbox(filename, output_dir):
	"""Split an mbox file and pass each part to a function func."""

	with open(filename, "rb") as mailbox:
	patch_num = 0
	parser = None
	for line in mailbox:
	if line.startswith(b"From "):
	# finish the previous message
	if parser:
	do_single(parser.close(), num=patch_num, output_dir=output_dir)
	parser = None
	else:
	if not parser and line.strip() == b"":
	continue
	if line.startswith(b">From"):
	line = line[1:]
	if not parser:
	parser = email.parser.BytesFeedParser()
	patch_num += 1
	parser.feed(line)

	if parser:
	# Last patch
	do_single(parser.close(), num=patch_num, output_dir=output_dir)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(
	description="Splits a given mailbox into separate patch files"
	)
	parser.add_argument(
	"--nopatch",
	action="store_true",
	default=False,
	help="exports even if it's not a patch",
	)
	parser.add_argument(
	"--single",
	action="store_true",
	default=False,
	help="do not split mbox file",
	)
	parser.add_argument(
	"--keep-cr",
	action="store_true",
	default=False,
	help=r"do not remove \r from lines ending with \r\n",
	)
	parser.add_argument(
	"mbox",
	metavar="<mailbox file>",
	help='specifies the mailbox file',
	)

	parser.add_argument(
	"output_dir",
	metavar="<output_dir>",
	default='.',
	nargs='?',
	help='place output files under this directory',
	)

	args = parser.parse_args()

	args.output_dir = os.path.abspath(args.output_dir)
	if not os.path.exists(args.output_dir) and not os.path.isfile(args.output_dir):
	os.makedirs(args.output_dir, exist_ok=True)

	if args.single:
	infile = open(args.mbox, "rb")
	msg = email.parser.BytesParser().parse(infile)
	do_single(msg, args.output_dir, sys.stdout.buffer)
	else:
	split_mbox(args.mbox, args.output_dir)