Last active
January 21, 2024 15:59
-
-
Save neutrinoceros/aa2a20ffd00b4cd0dd4665597736280d to your computer and use it in GitHub Desktop.
A script I use to cleanup Python "scripts" provided in form of copy-pasted REPL sessions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python 3.8 is required, but this was only tested manually with 3.12 | |
from __future__ import annotations | |
import argparse | |
import re | |
import sys | |
from dataclasses import dataclass | |
from functools import cached_property | |
from pathlib import Path | |
@dataclass(frozen=True) | |
class Repl: | |
prompt: str | |
continuation: str | |
@cached_property | |
def regexp(self): | |
return re.compile(rf"^({self.prompt}|{self.continuation})") | |
PythonRepl = Repl(prompt=r">>> ", continuation=r"\.\.\. ") | |
IPythonRepl = Repl(prompt=r"In \[\d+\]: ", continuation=r" \.\.\.: ") | |
NullRepl = Repl(prompt=r"", continuation=r"") # does nothing | |
ALL_REPLS = [PythonRepl, IPythonRepl] | |
def get_repl(lines: list[str]) -> Repl: | |
for repl in ALL_REPLS: | |
for L in lines: | |
if repl.regexp.match(L): | |
return repl | |
return NullRepl | |
def cleanup_outputs(lines: list[str], repl: Repl) -> list[str]: | |
return [L for L in lines if repl.regexp.match(L)] | |
def cleanup_prompts(lines: list[str], repl: Repl) -> list[str]: | |
return [repl.regexp.sub("", L) for L in lines] | |
def cleanup_whitespace(lines: list[str]) -> list[str]: | |
return [L.rstrip() for L in lines] | |
def main(argv: list[str] | None = None) -> int: | |
parser = argparse.ArgumentParser() | |
parser.add_argument("input_file", type=Path, help="the file to be read") | |
parser.add_argument( | |
"--no-output-cleaning", | |
dest="output_cleaning", | |
action="store_false", | |
help="disable output cleaning (intended for debugging)", | |
) | |
parser.add_argument( | |
"--overwrite", | |
action="store_true", | |
help="redirect output to the input file instead of stdout", | |
) | |
repl_selector = parser.add_mutually_exclusive_group(required=False) | |
repl_selector.add_argument( | |
"--python", | |
action="store_true", | |
help="assume input comes from the basic Python REPL", | |
) | |
repl_selector.add_argument( | |
"--ipython", | |
action="store_true", | |
help="assume input comes from the IPython REPL", | |
) | |
args = parser.parse_args(argv) | |
if not args.input_file.is_file(): | |
print(f"Error: no such file '{args.input_file!s}'", file=sys.stderr) | |
return 1 | |
body = args.input_file.read_text() | |
lines = body.splitlines() | |
if args.python: | |
repl = PythonRepl | |
elif args.ipython: | |
repl = IPythonRepl | |
else: | |
repl = get_repl(lines) | |
if args.output_cleaning: | |
lines = cleanup_outputs(lines, repl) | |
lines = cleanup_prompts(lines, repl) | |
lines = cleanup_whitespace(lines) | |
result = "\n".join(lines) | |
if args.overwrite: | |
if result != body: | |
args.input_file.write_text(result) | |
else: | |
print(result) | |
return 0 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment