joshuabambrick/sugar-generator.py Secret

## sugar-generator.py
# disclaimer: this is perhaps the worst code I've ever written as it was written as fast as possible

from collections import Counter
import dataclasses
import sys
from pathlib import Path
import sys, tokenize


@dataclasses.dataclass
class Token:
  text: str
  exact_type: int
  start_line_num: int
  start_col: int
  end_line_num: int
  end_col: int
  visited: bool

  @classmethod
  def from_tup(cls, tup) -> 'Token':
    toktype, ttext, (slineno, scol), (elineno, ecol), ltext = tup
    return cls(
      text=ttext,
      exact_type=tup.exact_type,
      start_line_num=slineno,
      start_col=scol,
      end_line_num=elineno,
      end_col=ecol,
      visited=False,
    )


root = Path(sys.argv[1])
output = bool(len(sys.argv) > 2 and sys.argv[2])

def main():
  matches = 0
  total_lines_saved = 0
  reductions = 0
  syntax_errors = []
  biggest_change = 0
  biggest_change_file = None
  if root.suffix == ".py":
    paths = [root]
  else:
    paths = root.rglob("**/*.py")
  for path in paths:
    print('       =====', path, '=====')
    try:
      with tokenize.open(path) as source:
        # need empty string to represent 'encoding'
        file = [''] + source.readlines()
        flat_file = ''.join(file)
    except SyntaxError:
      syntax_errors.append(path)
      continue
    new_file = file.copy()
    file_changed = False
    lines_removed = 0
    chars_removed_from_line = Counter()
    with tokenize.open(path) as source:
      tokgen = tokenize.generate_tokens(source.readline)
      open_brackets = []
      bracket_contents = []
      bracket_last_three = []
      last_three = []
      for tup in map(Token.from_tup, tokgen):
        if len (last_three) == 3:
          last_three.pop(0)
        last_three.append(tup)
        assert len(last_three) <= 3
        for contents in bracket_contents:
          contents.append(tup)
        if tup.exact_type == tokenize.LPAR:
          open_brackets.append(tup)
          bracket_contents.append([tup])
          bracket_last_three.append(last_three.copy())
        elif tup.exact_type == tokenize.RPAR:
          open_bracket = open_brackets.pop()
          close_bracket = tup
          current_bracket_tokens = bracket_contents.pop()
          current_bracket_last_three = bracket_last_three.pop()
          if current_bracket_last_three[0].exact_type == tokenize.NAME and current_bracket_last_three[0].text == 'def':
            # this is a function definition, not a call
            continue
          assert current_bracket_tokens[0].exact_type == tokenize.LPAR
          assert current_bracket_tokens[-1].exact_type == tokenize.RPAR
          current_bracket_tokens = current_bracket_tokens[1:-1]
          current_bracket_tokens = [t for t in current_bracket_tokens if t.exact_type not in (tokenize.NL, tokenize.NEWLINE)]
          current_term_tokens = []
          simplified_bracket_tokens = []
          original_matches = matches
          for i, term in enumerate(current_bracket_tokens):
            if term.visited:
              continue
            term.visited = True
            if term.exact_type != tokenize.COMMA:
              current_term_tokens.append(term)
              if i != len(current_bracket_tokens) - 1:
                continue
            new_form = None
            try:
              lhs, mid, rhs = current_term_tokens
              if lhs.text and lhs.text == rhs.text and mid.exact_type == tokenize.EQUAL:
                new_form = f'={lhs.text}'
                assert lhs.start_line_num == rhs.start_line_num
                new_file_line_num = lhs.start_line_num - lines_removed
                old_line = new_file[new_file_line_num]
                #print('OLD:', old_line)
                #print('NEW:', old_line[:lhs.start_col - chars_removed_from_line[lhs.start_line_num]] + new_form + old_line[rhs.end_col - chars_removed_from_line[lhs.start_line_num]:])
                new_file[new_file_line_num] = old_line[:lhs.start_col - chars_removed_from_line[lhs.start_line_num]] + new_form + old_line[rhs.end_col - chars_removed_from_line[lhs.start_line_num]:]
                chars_removed_from_line[lhs.start_line_num] += (rhs.end_col - lhs.start_col) - len(new_form)
                matches += 1
                file_changed = True
            except:
              pass
              #if any(term.exact_type == tokenize.EQUAL for term in current_term_tokens):
              #  print('failed', [t.text for t in current_term_tokens])
            if new_form:
              simplified_bracket_tokens.append(new_form)
            elif current_term_tokens:
              start_idx = sum(map(len, file[:current_term_tokens[0].start_line_num])) + current_term_tokens[0].start_col
              end_idx = sum(map(len, file[:current_term_tokens[-1].end_line_num])) + current_term_tokens[-1].end_col
              simplified_bracket_tokens.append(flat_file[start_idx:end_idx])

            current_term_tokens = []
          if original_matches != matches:
            start_idx = sum(map(len, file[:open_bracket.start_line_num]))
            end_idx = sum(map(len, file[:close_bracket.end_line_num + 1])) - 1 # remove trailing newline
            new_line = f'{flat_file[start_idx:start_idx + open_bracket.end_col]}{", ".join(simplified_bracket_tokens)}){file[close_bracket.end_line_num][close_bracket.end_col:]}'
            if len(new_line) > 80:
              continue
            new_file_line_num = open_bracket.start_line_num - lines_removed
            new_file[new_file_line_num] = new_line
            new_file = new_file[:new_file_line_num + 1] + new_file[close_bracket.end_line_num - lines_removed + 1:]
            change = close_bracket.end_line_num - open_bracket.start_line_num
            lines_removed += change
            if (end_idx - start_idx) < 80:
              continue
            if change > biggest_change:
              biggest_change = change
              biggest_change_file = path
            reductions += 1
            total_lines_saved += change
            print('    ', flat_file[start_idx:end_idx])
            lines = sorted({open_bracket.start_line_num, close_bracket.end_line_num})
            print('    ', f'on L{"-".join(map(str, lines))} can become')
            print('    ', new_line)
            print()
    if output and file_changed:
      with open(path, 'w') as fout:
        fout.write(''.join(new_file[1:]))
  print(f'syntax errors ({len(syntax_errors)}):', syntax_errors)
  print('matches:', matches)
  print('lines_saved:', total_lines_saved)
  print('reductions (lines of length >80 become < 80):', reductions)
  print('biggest change:', biggest_change, f'({biggest_change_file})')
	# disclaimer: this is perhaps the worst code I've ever written as it was written as fast as possible

	from collections import Counter
	import dataclasses
	import sys
	from pathlib import Path
	import sys, tokenize


	@dataclasses.dataclass
	class Token:
	text: str
	exact_type: int
	start_line_num: int
	start_col: int
	end_line_num: int
	end_col: int
	visited: bool

	@classmethod
	def from_tup(cls, tup) -> 'Token':
	toktype, ttext, (slineno, scol), (elineno, ecol), ltext = tup
	return cls(
	text=ttext,
	exact_type=tup.exact_type,
	start_line_num=slineno,
	start_col=scol,
	end_line_num=elineno,
	end_col=ecol,
	visited=False,
	)



	root = Path(sys.argv[1])
	output = bool(len(sys.argv) > 2 and sys.argv[2])

	def main():
	matches = 0
	total_lines_saved = 0
	reductions = 0
	syntax_errors = []
	biggest_change = 0
	biggest_change_file = None
	if root.suffix == ".py":
	paths = [root]
	else:
	paths = root.rglob("*/.py")
	for path in paths:
	print(' =====', path, '=====')
	try:
	with tokenize.open(path) as source:
	# need empty string to represent 'encoding'
	file = [''] + source.readlines()
	flat_file = ''.join(file)
	except SyntaxError:
	syntax_errors.append(path)
	continue
	new_file = file.copy()
	file_changed = False
	lines_removed = 0
	chars_removed_from_line = Counter()
	with tokenize.open(path) as source:
	tokgen = tokenize.generate_tokens(source.readline)
	open_brackets = []
	bracket_contents = []
	bracket_last_three = []
	last_three = []
	for tup in map(Token.from_tup, tokgen):
	if len (last_three) == 3:
	last_three.pop(0)
	last_three.append(tup)
	assert len(last_three) <= 3
	for contents in bracket_contents:
	contents.append(tup)
	if tup.exact_type == tokenize.LPAR:
	open_brackets.append(tup)
	bracket_contents.append([tup])
	bracket_last_three.append(last_three.copy())
	elif tup.exact_type == tokenize.RPAR:
	open_bracket = open_brackets.pop()
	close_bracket = tup
	current_bracket_tokens = bracket_contents.pop()
	current_bracket_last_three = bracket_last_three.pop()
	if current_bracket_last_three[0].exact_type == tokenize.NAME and current_bracket_last_three[0].text == 'def':
	# this is a function definition, not a call
	continue
	assert current_bracket_tokens[0].exact_type == tokenize.LPAR
	assert current_bracket_tokens[-1].exact_type == tokenize.RPAR
	current_bracket_tokens = current_bracket_tokens[1:-1]
	current_bracket_tokens = [t for t in current_bracket_tokens if t.exact_type not in (tokenize.NL, tokenize.NEWLINE)]
	current_term_tokens = []
	simplified_bracket_tokens = []
	original_matches = matches
	for i, term in enumerate(current_bracket_tokens):
	if term.visited:
	continue
	term.visited = True
	if term.exact_type != tokenize.COMMA:
	current_term_tokens.append(term)
	if i != len(current_bracket_tokens) - 1:
	continue
	new_form = None
	try:
	lhs, mid, rhs = current_term_tokens
	if lhs.text and lhs.text == rhs.text and mid.exact_type == tokenize.EQUAL:
	new_form = f'={lhs.text}'
	assert lhs.start_line_num == rhs.start_line_num
	new_file_line_num = lhs.start_line_num - lines_removed
	old_line = new_file[new_file_line_num]
	#print('OLD:', old_line)
	#print('NEW:', old_line[:lhs.start_col - chars_removed_from_line[lhs.start_line_num]] + new_form + old_line[rhs.end_col - chars_removed_from_line[lhs.start_line_num]:])
	new_file[new_file_line_num] = old_line[:lhs.start_col - chars_removed_from_line[lhs.start_line_num]] + new_form + old_line[rhs.end_col - chars_removed_from_line[lhs.start_line_num]:]
	chars_removed_from_line[lhs.start_line_num] += (rhs.end_col - lhs.start_col) - len(new_form)
	matches += 1
	file_changed = True
	except:
	pass
	#if any(term.exact_type == tokenize.EQUAL for term in current_term_tokens):
	# print('failed', [t.text for t in current_term_tokens])
	if new_form:
	simplified_bracket_tokens.append(new_form)
	elif current_term_tokens:
	start_idx = sum(map(len, file[:current_term_tokens[0].start_line_num])) + current_term_tokens[0].start_col
	end_idx = sum(map(len, file[:current_term_tokens[-1].end_line_num])) + current_term_tokens[-1].end_col
	simplified_bracket_tokens.append(flat_file[start_idx:end_idx])

	current_term_tokens = []
	if original_matches != matches:
	start_idx = sum(map(len, file[:open_bracket.start_line_num]))
	end_idx = sum(map(len, file[:close_bracket.end_line_num + 1])) - 1 # remove trailing newline
	new_line = f'{flat_file[start_idx:start_idx + open_bracket.end_col]}{", ".join(simplified_bracket_tokens)}){file[close_bracket.end_line_num][close_bracket.end_col:]}'
	if len(new_line) > 80:
	continue
	new_file_line_num = open_bracket.start_line_num - lines_removed
	new_file[new_file_line_num] = new_line
	new_file = new_file[:new_file_line_num + 1] + new_file[close_bracket.end_line_num - lines_removed + 1:]
	change = close_bracket.end_line_num - open_bracket.start_line_num
	lines_removed += change
	if (end_idx - start_idx) < 80:
	continue
	if change > biggest_change:
	biggest_change = change
	biggest_change_file = path
	reductions += 1
	total_lines_saved += change
	print(' ', flat_file[start_idx:end_idx])
	lines = sorted({open_bracket.start_line_num, close_bracket.end_line_num})
	print(' ', f'on L{"-".join(map(str, lines))} can become')
	print(' ', new_line)
	print()
	if output and file_changed:
	with open(path, 'w') as fout:
	fout.write(''.join(new_file[1:]))
	print(f'syntax errors ({len(syntax_errors)}):', syntax_errors)
	print('matches:', matches)
	print('lines_saved:', total_lines_saved)
	print('reductions (lines of length >80 become < 80):', reductions)
	print('biggest change:', biggest_change, f'({biggest_change_file})')