Skip to content

Instantly share code, notes, and snippets.

@kugland
Last active June 9, 2023 03:50
Show Gist options
  • Save kugland/32c46bae792c3720b6b14b4b124a3355 to your computer and use it in GitHub Desktop.
Save kugland/32c46bae792c3720b6b14b4b124a3355 to your computer and use it in GitHub Desktop.
Sort input lines by extension
#!/usr/bin/env python3
# Copyright (C) 2023 Andre Kugland
# This script is released under the MIT License.
"""
sort-by-ext: Sort input lines by extension.
Usage: sort-by-ext [-0]
This script is a command-line utility that reads lines of text from standard
input, sorts them by file extension, and writes the sorted lines to standard
output.
Options:
-0, --null use the null character as separator instead of newline
The purpose of this script is to reorder the files in the creation of a tar
archive, so that a better compression ratio can be achieved.
Example:
find . -type f -print0 | sort-by-ext -0 | tar --null -czf archive.tgz -T -
"""
if __name__ != "__main__":
raise Exception("This script is not meant to be imported.")
import sys
from os.path import basename, splitext
import argparse
def make_sort_key(line: bytes) -> bytes:
"""Return a key that can be used to sort the given line."""
base = basename(line)
ext = splitext(base)[1]
return ext + b"/" + base + b"/" + line
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("-0", "--null", action="store_true")
parser.add_argument("-h", "--help", action="store_true")
args = parser.parse_args()
if args.help:
print(__doc__.strip())
exit(0)
separator = b"\0" if args.null else b"\n"
# Make sure stdin and stdout are opened in binary mode
with (
open(sys.stdin.fileno(), "rb", closefd=False) as stdin,
open(sys.stdout.fileno(), "wb", closefd=False) as stdout,
):
lines = stdin.read().split(separator)
if lines[-1] == b"": # Remove trailing empty line if present
lines.pop()
lines.sort(key=make_sort_key)
for line in lines:
stdout.write(line + separator)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment