Skip to content

Instantly share code, notes, and snippets.

@pquentin
Last active January 11, 2018 06:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pquentin/188f139be4c1bd5731b54bb2f7c29e41 to your computer and use it in GitHub Desktop.
Save pquentin/188f139be4c1bd5731b54bb2f7c29e41 to your computer and use it in GitHub Desktop.
import difflib
import glob
import os
import sys
from tokenize import tokenize as std_tokenize
from tokenize import ASYNC, AWAIT, NAME
import click
ASYNC_TO_SYNC = {
'__aenter__': '__enter__',
'__aexit__': '__exit__',
'__aiter__': '__iter__',
'__anext__': '__next__',
# TODO StopIteration is still accepted in Python 2, but the right change
# is 'raise StopAsyncIteration' -> 'return' since we want to use bleached
# code in Python 3.7+
'StopAsyncIteration': 'StopIteration',
}
# This script removes 'await' and 'async' keywords and rewrites some tokens.
#
# This is done using `tokenize` from the Python standard library to parse an
# input file into tokens. Since untokenize(tokenize(f)) != f using the standard
# library, we have to provide our own that wrap the tokenize function from the
# standard library:
# * tokenize outputs tuples: each token and the possible whitespace that
# needs to go before that token
# * bleach_tokens rewrite this token stream to remove async color
# * untokenize assembles the tokens by simply concatenating all vlues
def tokenize(f):
last_end = (0, 0)
last_line = 0
for toknum, tokval, start, end, _ in std_tokenize(f.readline):
if start == end:
continue # nothing to print
if start[0] > last_line:
last_end = (start[0], 0)
last_line = start[0]
space = ''
if start > last_end:
assert start[0] == end[0]
space = ' ' * (start[1] - last_end[1])
yield (space, toknum, tokval)
last_end = end
def bleach_tokens(tokens):
# TODO __await__, ...?
used_space = None
for space, toknum, tokval in tokens:
if toknum in [ASYNC, AWAIT]: # TODO Python 3.7+
# When remove async or await, we want to use the whitespace that
# was before async/await before the next token so that
# `print(await stuff)` becomes `print(stuff)` and not
# `print( stuff)`
used_space = space
else:
if toknum == NAME and tokval in ASYNC_TO_SYNC:
tokval = ASYNC_TO_SYNC[tokval]
if used_space is None:
used_space = space
yield (used_space, tokval)
used_space = None
def untokenize(tokens):
output = ''.join(space + tokval for space, tokval in tokens)
for line in output.split('\n'):
yield line.rstrip(' ')
def get_diff(initial, result, filename):
return difflib.unified_diff(
[l + '\n' for l in initial.split('\n')],
[l + '\n' for l in result.split('\n')],
fromfile='{} (original)'.format(filename),
tofile='{} (bleached)'.format(filename))
def get_paths(path):
if os.path.isfile(path):
yield path
else:
for expand in ('**/*.py', '**.py'):
yield from glob.iglob(os.path.join('{}/{}'.format(path, expand)))
@click.command()
@click.option(
'-w', '--write', is_flag=True, help='write changes to the filesystem')
@click.argument('inpath', type=click.Path(exists=True, dir_okay=True))
@click.argument('outpath', type=click.Path(dir_okay=True))
def bleach(write, inpath, outpath):
for filepath in get_paths(inpath):
diff = None
with open(filepath, 'rb') as f:
initial = f.read()
f.seek(0)
tokens = tokenize(f)
tokens = bleach_tokens(tokens)
result = '\n'.join(untokenize(tokens))
diff = list(get_diff(initial.decode('utf8'), result, filepath))
if write:
outfilepath = filepath.replace(inpath, outpath)
sys.stdout.writelines(diff)
os.makedirs(os.path.dirname(outfilepath), exist_ok=True)
# TODO get encoding from tokenize
with open(outfilepath, 'w') as f:
print(result, file=f, end='')
if __name__ == '__main__':
assert sys.version_info.major >= 3
bleach()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment