Created
June 5, 2025 17:28
-
-
Save KelWill/5ec915f7bd18fb9b3d63420077685fdf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# uv add openai, parse, rich | |
# DANGER: back up your documents before applying these edits | |
# code generated by cursor | |
# Usage: uv run grammar_corrector.py <directory> generate|apply | |
# generate will get the proposed diffs | |
# apply will interactively apply them | |
import os | |
from pathlib import Path | |
import pdb | |
import sys | |
from typing import List | |
from openai import OpenAI | |
import re | |
from parse import parse | |
import difflib | |
from rich.console import Console | |
from rich.prompt import Prompt, Confirm | |
from rich.text import Text | |
client = OpenAI(api_key=os.getenv("OPEN_AI_KEY")) | |
console = Console() | |
replacement_pattern = """<<<<<<< SEARCH | |
{original} | |
======= RATIONALE = {rationale} | |
{replacement} | |
>>>>>>> REPLACE""" | |
def get_corrections_from_openai(text): | |
prompt = ( | |
f"""Suggest GRAMMAR AND SPELLING CORRECTIONS in the format: | |
{replacement_pattern} | |
ONLY LOOK FOR GRAMMAR CORRECTIONS. | |
- Add your custom grammar instructions here | |
- ONLY include the original text as-is. Don't include "..." or any other indicators of past text. The {{original}} text will be used in a find-and-replace | |
ONLY INCLUDE GRAMMAR CORRECTIONS. DO NOT INCLUDE STYLE CORRECTIONS. | |
""" | |
) | |
full_prompt = f"{prompt}\n\n{text}" | |
response = client.chat.completions.create( | |
model="o4-mini", | |
messages=[{"role": "user", "content": full_prompt}], | |
) | |
return response.choices[0].message.content | |
def parse_suggestions(diff_text): | |
entries = [] | |
# Split into blocks and parse each one | |
blocks = diff_text.split("<<<<<<< SEARCH")[1:] # Skip the first empty part | |
for block in blocks: | |
# Add back the opening marker for parsing | |
full_block = ("<<<<<<< SEARCH" + block).strip() | |
try: | |
result = parse(replacement_pattern, full_block) | |
if result: | |
entries.append({ | |
"original": result["original"].strip(), | |
"rationale": result["rationale"].strip(), | |
"replacement": result["replacement"].strip() | |
}) | |
except Exception: | |
continue | |
return entries | |
def format_diff(original, replacement): | |
"""Generate a focused diff highlighting character-level changes between similar text.""" | |
if original == replacement: | |
return Text("No differences found", style="dim") | |
# Use SequenceMatcher to find character-level differences | |
matcher = difflib.SequenceMatcher(None, original, replacement) | |
# Create Rich Text objects for before and after | |
original_text = Text() | |
original_text.append("Original: ", style="bold yellow") | |
replacement_text = Text() | |
replacement_text.append("Suggested: ", style="bold green") | |
# Process the differences | |
for tag, i1, i2, j1, j2 in matcher.get_opcodes(): | |
if tag == 'equal': | |
# Unchanged text - show in both | |
chunk = original[i1:i2] | |
original_text.append(chunk) | |
replacement_text.append(chunk) | |
elif tag == 'delete': | |
# Text removed - show in red with strikethrough in original, nothing in replacement | |
chunk = original[i1:i2] | |
original_text.append(chunk, style="red strike") | |
elif tag == 'insert': | |
# Text added - show in green in replacement, nothing in original | |
chunk = replacement[j1:j2] | |
replacement_text.append(chunk, style="green bold") | |
elif tag == 'replace': | |
# Text changed - show old in red strikethrough, new in green | |
old_chunk = original[i1:i2] | |
new_chunk = replacement[j1:j2] | |
original_text.append(old_chunk, style="red strike") | |
replacement_text.append(new_chunk, style="green bold") | |
# Create a combined display | |
result = Text() | |
result.append_text(original_text) | |
result.append("\n") | |
result.append_text(replacement_text) | |
return result | |
def apply_changes_interactively(text, changes): | |
for change in changes: | |
console.rule("[bold blue]Suggested Change") | |
# Show the diff with syntax highlighting | |
diff_display = format_diff(change['original'], change['replacement']) | |
console.print(diff_display) | |
console.print(f"[dim]Rationale: {change['rationale']}") | |
choice = Prompt.ask( | |
"[bold cyan]Accept (a, y), Reject (r, n), or Custom (c)?", | |
choices=["a", "y", "r", "n", "c"], default="a" | |
) | |
if choice == "a" or choice == "y": | |
if change["original"] not in text: | |
console.print("[bold yellow]Unable to find original in text to replace[/bold yellow]") | |
text = text.replace(change["original"], change["replacement"]) | |
elif choice == "c": | |
custom = Prompt.ask("Enter your custom replacement") | |
text = text.replace(change["original"], custom) | |
else: | |
console.print("Skipped.\n") | |
return text | |
def sorted_alphanumeric(list: List): | |
def convert(text): | |
return int(text) if text.isdigit() else text | |
def alphanum_key(key): | |
return [ convert(c) for c in re.split('([0-9]+)', key) ] | |
return sorted(list, key = alphanum_key) | |
def main(dir, mode): | |
if mode != "generate" and mode != "apply": | |
raise Exception(f"{mode} isn't generate or apply") | |
files = sorted_alphanumeric(os.listdir(dir)) | |
files = [f"{dir}/{f}" for f in files if ".suggested_changes" not in f] | |
for filename in files: | |
suggested_changes_filename = filename + ".suggested_changes" | |
with open(filename, "r", encoding="utf-8") as f: | |
original_text = f.read() | |
if mode == "generate": | |
console.print(f"Getting suggestions from OpenAI for {filename}", style="bold green") | |
suggestions = get_corrections_from_openai(original_text) | |
with open(suggested_changes_filename, "w", encoding="utf-8") as f: | |
console.print(f"Writing corrections for {filename} to {suggested_changes_filename}", style="bold green") | |
f.write(suggestions) | |
continue | |
if mode == "apply": | |
file_path = Path(suggested_changes_filename) | |
if not file_path.exists(): | |
continue | |
with open(file_path, "r") as f: | |
changes = f.read() | |
new_text = apply_changes_interactively(original_text, parse_suggestions(changes)) | |
if new_text != original_text: | |
if Confirm.ask("Overwrite the original file with changes?", default=True): | |
with open(filename, "w", encoding="utf-8") as f: | |
f.write(new_text) | |
console.print(f"File '{filename}' updated!", style="bold green") | |
else: | |
console.print("No changes were saved.", style="bold red") | |
file_path.rename(file_path.with_suffix(file_path.suffix + "_complete")) | |
# only one at a time for now | |
return | |
if __name__ == "__main__": | |
if len(sys.argv) != 3: | |
console.print("Usage: uv run grammar_corrector.py <directory> generate|apply", style="bold red") | |
sys.exit(1) | |
main(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment