Skip to content

Instantly share code, notes, and snippets.

@joshuabambrick
Created November 5, 2023 18:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joshuabambrick/a850d0e0050129b9252c748fa06c48b2 to your computer and use it in GitHub Desktop.
Save joshuabambrick/a850d0e0050129b9252c748fa06c48b2 to your computer and use it in GitHub Desktop.
# disclaimer: this is perhaps the worst code I've ever written as it was written as fast as possible
from collections import Counter
import dataclasses
import sys
from pathlib import Path
import sys, tokenize
@dataclasses.dataclass
class Token:
text: str
exact_type: int
start_line_num: int
start_col: int
end_line_num: int
end_col: int
visited: bool
@classmethod
def from_tup(cls, tup) -> 'Token':
toktype, ttext, (slineno, scol), (elineno, ecol), ltext = tup
return cls(
text=ttext,
exact_type=tup.exact_type,
start_line_num=slineno,
start_col=scol,
end_line_num=elineno,
end_col=ecol,
visited=False,
)
root = Path(sys.argv[1])
output = bool(len(sys.argv) > 2 and sys.argv[2])
def main():
matches = 0
total_lines_saved = 0
reductions = 0
syntax_errors = []
biggest_change = 0
biggest_change_file = None
if root.suffix == ".py":
paths = [root]
else:
paths = root.rglob("**/*.py")
for path in paths:
print(' =====', path, '=====')
try:
with tokenize.open(path) as source:
# need empty string to represent 'encoding'
file = [''] + source.readlines()
flat_file = ''.join(file)
except SyntaxError:
syntax_errors.append(path)
continue
new_file = file.copy()
file_changed = False
lines_removed = 0
chars_removed_from_line = Counter()
with tokenize.open(path) as source:
tokgen = tokenize.generate_tokens(source.readline)
open_brackets = []
bracket_contents = []
bracket_last_three = []
last_three = []
for tup in map(Token.from_tup, tokgen):
if len (last_three) == 3:
last_three.pop(0)
last_three.append(tup)
assert len(last_three) <= 3
for contents in bracket_contents:
contents.append(tup)
if tup.exact_type == tokenize.LPAR:
open_brackets.append(tup)
bracket_contents.append([tup])
bracket_last_three.append(last_three.copy())
elif tup.exact_type == tokenize.RPAR:
open_bracket = open_brackets.pop()
close_bracket = tup
current_bracket_tokens = bracket_contents.pop()
current_bracket_last_three = bracket_last_three.pop()
if current_bracket_last_three[0].exact_type == tokenize.NAME and current_bracket_last_three[0].text == 'def':
# this is a function definition, not a call
continue
assert current_bracket_tokens[0].exact_type == tokenize.LPAR
assert current_bracket_tokens[-1].exact_type == tokenize.RPAR
current_bracket_tokens = current_bracket_tokens[1:-1]
current_bracket_tokens = [t for t in current_bracket_tokens if t.exact_type not in (tokenize.NL, tokenize.NEWLINE)]
current_term_tokens = []
simplified_bracket_tokens = []
original_matches = matches
for i, term in enumerate(current_bracket_tokens):
if term.visited:
continue
term.visited = True
if term.exact_type != tokenize.COMMA:
current_term_tokens.append(term)
if i != len(current_bracket_tokens) - 1:
continue
new_form = None
try:
lhs, mid, rhs = current_term_tokens
if lhs.text and lhs.text == rhs.text and mid.exact_type == tokenize.EQUAL:
new_form = f'={lhs.text}'
assert lhs.start_line_num == rhs.start_line_num
new_file_line_num = lhs.start_line_num - lines_removed
old_line = new_file[new_file_line_num]
#print('OLD:', old_line)
#print('NEW:', old_line[:lhs.start_col - chars_removed_from_line[lhs.start_line_num]] + new_form + old_line[rhs.end_col - chars_removed_from_line[lhs.start_line_num]:])
new_file[new_file_line_num] = old_line[:lhs.start_col - chars_removed_from_line[lhs.start_line_num]] + new_form + old_line[rhs.end_col - chars_removed_from_line[lhs.start_line_num]:]
chars_removed_from_line[lhs.start_line_num] += (rhs.end_col - lhs.start_col) - len(new_form)
matches += 1
file_changed = True
except:
pass
#if any(term.exact_type == tokenize.EQUAL for term in current_term_tokens):
# print('failed', [t.text for t in current_term_tokens])
if new_form:
simplified_bracket_tokens.append(new_form)
elif current_term_tokens:
start_idx = sum(map(len, file[:current_term_tokens[0].start_line_num])) + current_term_tokens[0].start_col
end_idx = sum(map(len, file[:current_term_tokens[-1].end_line_num])) + current_term_tokens[-1].end_col
simplified_bracket_tokens.append(flat_file[start_idx:end_idx])
current_term_tokens = []
if original_matches != matches:
start_idx = sum(map(len, file[:open_bracket.start_line_num]))
end_idx = sum(map(len, file[:close_bracket.end_line_num + 1])) - 1 # remove trailing newline
new_line = f'{flat_file[start_idx:start_idx + open_bracket.end_col]}{", ".join(simplified_bracket_tokens)}){file[close_bracket.end_line_num][close_bracket.end_col:]}'
if len(new_line) > 80:
continue
new_file_line_num = open_bracket.start_line_num - lines_removed
new_file[new_file_line_num] = new_line
new_file = new_file[:new_file_line_num + 1] + new_file[close_bracket.end_line_num - lines_removed + 1:]
change = close_bracket.end_line_num - open_bracket.start_line_num
lines_removed += change
if (end_idx - start_idx) < 80:
continue
if change > biggest_change:
biggest_change = change
biggest_change_file = path
reductions += 1
total_lines_saved += change
print(' ', flat_file[start_idx:end_idx])
lines = sorted({open_bracket.start_line_num, close_bracket.end_line_num})
print(' ', f'on L{"-".join(map(str, lines))} can become')
print(' ', new_line)
print()
if output and file_changed:
with open(path, 'w') as fout:
fout.write(''.join(new_file[1:]))
print(f'syntax errors ({len(syntax_errors)}):', syntax_errors)
print('matches:', matches)
print('lines_saved:', total_lines_saved)
print('reductions (lines of length >80 become < 80):', reductions)
print('biggest change:', biggest_change, f'({biggest_change_file})')
@ajoino
Copy link

ajoino commented Jan 17, 2024

Out of curiosity, is there a reason you didn't use the ast module? It seems like the more correct layer of abstraction to me but maybe there's something I'm overlooking?

@joshuabambrick
Copy link
Author

I hacked this together very quickly and in no way support all the design decisions made in the process! That said, if I recall correctly, the output of ast ignores the positions of characters (e.g. it removes comments and whitespace) and this made it impossible to experiment with introducing the new syntax in place. This may have been an circumventable issue but I didn't spot a solution at the time.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment