Skip to content

Instantly share code, notes, and snippets.

@neutrinoceros
Last active January 21, 2024 15:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save neutrinoceros/aa2a20ffd00b4cd0dd4665597736280d to your computer and use it in GitHub Desktop.
Save neutrinoceros/aa2a20ffd00b4cd0dd4665597736280d to your computer and use it in GitHub Desktop.
A script I use to cleanup Python "scripts" provided in form of copy-pasted REPL sessions
# Python 3.8 is required, but this was only tested manually with 3.12
from __future__ import annotations
import argparse
import re
import sys
from dataclasses import dataclass
from functools import cached_property
from pathlib import Path
@dataclass(frozen=True)
class Repl:
prompt: str
continuation: str
@cached_property
def regexp(self):
return re.compile(rf"^({self.prompt}|{self.continuation})")
PythonRepl = Repl(prompt=r">>> ", continuation=r"\.\.\. ")
IPythonRepl = Repl(prompt=r"In \[\d+\]: ", continuation=r" \.\.\.: ")
NullRepl = Repl(prompt=r"", continuation=r"") # does nothing
ALL_REPLS = [PythonRepl, IPythonRepl]
def get_repl(lines: list[str]) -> Repl:
for repl in ALL_REPLS:
for L in lines:
if repl.regexp.match(L):
return repl
return NullRepl
def cleanup_outputs(lines: list[str], repl: Repl) -> list[str]:
return [L for L in lines if repl.regexp.match(L)]
def cleanup_prompts(lines: list[str], repl: Repl) -> list[str]:
return [repl.regexp.sub("", L) for L in lines]
def cleanup_whitespace(lines: list[str]) -> list[str]:
return [L.rstrip() for L in lines]
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument("input_file", type=Path, help="the file to be read")
parser.add_argument(
"--no-output-cleaning",
dest="output_cleaning",
action="store_false",
help="disable output cleaning (intended for debugging)",
)
parser.add_argument(
"--overwrite",
action="store_true",
help="redirect output to the input file instead of stdout",
)
repl_selector = parser.add_mutually_exclusive_group(required=False)
repl_selector.add_argument(
"--python",
action="store_true",
help="assume input comes from the basic Python REPL",
)
repl_selector.add_argument(
"--ipython",
action="store_true",
help="assume input comes from the IPython REPL",
)
args = parser.parse_args(argv)
if not args.input_file.is_file():
print(f"Error: no such file '{args.input_file!s}'", file=sys.stderr)
return 1
body = args.input_file.read_text()
lines = body.splitlines()
if args.python:
repl = PythonRepl
elif args.ipython:
repl = IPythonRepl
else:
repl = get_repl(lines)
if args.output_cleaning:
lines = cleanup_outputs(lines, repl)
lines = cleanup_prompts(lines, repl)
lines = cleanup_whitespace(lines)
result = "\n".join(lines)
if args.overwrite:
if result != body:
args.input_file.write_text(result)
else:
print(result)
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment