pquentin/asyncbleach.py

## asyncbleach.py
import difflib
import glob
import os
import sys
from tokenize import tokenize as std_tokenize
from tokenize import ASYNC, AWAIT, NAME

import click


ASYNC_TO_SYNC = {
    '__aenter__': '__enter__',
    '__aexit__': '__exit__',
    '__aiter__': '__iter__',
    '__anext__': '__next__',
    # TODO StopIteration is still accepted in Python 2, but the right change
    # is 'raise StopAsyncIteration' -> 'return' since we want to use bleached
    # code in Python 3.7+
    'StopAsyncIteration': 'StopIteration',
}

# This script removes 'await' and 'async' keywords and rewrites some tokens.
#
# This is done using `tokenize` from the Python standard library to parse an
# input file into tokens. Since untokenize(tokenize(f)) != f using the standard
# library, we have to provide our own that wrap the tokenize function from the
# standard library:

#  * tokenize outputs tuples: each token and the possible whitespace that
#    needs to go before that token
#  * bleach_tokens rewrite this token stream to remove async color
#  * untokenize assembles the tokens by simply concatenating all vlues


def tokenize(f):
    last_end = (0, 0)
    last_line = 0
    for toknum, tokval, start, end, _ in std_tokenize(f.readline):
        if start == end:
            continue  # nothing to print

        if start[0] > last_line:
            last_end = (start[0], 0)
            last_line = start[0]

        space = ''
        if start > last_end:
            assert start[0] == end[0]
            space = ' ' * (start[1] - last_end[1])
        yield (space, toknum, tokval)
        last_end = end


def bleach_tokens(tokens):
    # TODO __await__, ...?
    used_space = None
    for space, toknum, tokval in tokens:
        if toknum in [ASYNC, AWAIT]:  # TODO Python 3.7+
            # When remove async or await, we want to use the whitespace that
            # was before async/await before the next token so that
            # `print(await stuff)` becomes `print(stuff)` and not
            # `print( stuff)`
            used_space = space
        else:
            if toknum == NAME and tokval in ASYNC_TO_SYNC:
                tokval = ASYNC_TO_SYNC[tokval]
            if used_space is None:
                used_space = space
            yield (used_space, tokval)
            used_space = None


def untokenize(tokens):
    output = ''.join(space + tokval for space, tokval in tokens)
    for line in output.split('\n'):
        yield line.rstrip(' ')


def get_diff(initial, result, filename):
    return difflib.unified_diff(
            [l + '\n' for l in initial.split('\n')],
            [l + '\n' for l in result.split('\n')],
            fromfile='{} (original)'.format(filename),
            tofile='{} (bleached)'.format(filename))


def get_paths(path):
    if os.path.isfile(path):
        yield path
    else:
        for expand in ('**/*.py', '**.py'):
            yield from glob.iglob(os.path.join('{}/{}'.format(path, expand)))


@click.command()
@click.option(
    '-w', '--write', is_flag=True, help='write changes to the filesystem')
@click.argument('inpath', type=click.Path(exists=True, dir_okay=True))
@click.argument('outpath', type=click.Path(dir_okay=True))
def bleach(write, inpath, outpath):
    for filepath in get_paths(inpath):
        diff = None
        with open(filepath, 'rb') as f:
            initial = f.read()
            f.seek(0)
            tokens = tokenize(f)
            tokens = bleach_tokens(tokens)
            result = '\n'.join(untokenize(tokens))
            diff = list(get_diff(initial.decode('utf8'), result, filepath))
        if write:
            outfilepath = filepath.replace(inpath, outpath)
            sys.stdout.writelines(diff)
            os.makedirs(os.path.dirname(outfilepath), exist_ok=True)
            # TODO get encoding from tokenize
            with open(outfilepath, 'w') as f:
                print(result, file=f, end='')


if __name__ == '__main__':
    assert sys.version_info.major >= 3
    bleach()
	import difflib
	import glob
	import os
	import sys
	from tokenize import tokenize as std_tokenize
	from tokenize import ASYNC, AWAIT, NAME

	import click


	ASYNC_TO_SYNC = {
	'__aenter__': '__enter__',
	'__aexit__': '__exit__',
	'__aiter__': '__iter__',
	'__anext__': '__next__',
	# TODO StopIteration is still accepted in Python 2, but the right change
	# is 'raise StopAsyncIteration' -> 'return' since we want to use bleached
	# code in Python 3.7+
	'StopAsyncIteration': 'StopIteration',
	}

	# This script removes 'await' and 'async' keywords and rewrites some tokens.
	#
	# This is done using `tokenize` from the Python standard library to parse an
	# input file into tokens. Since untokenize(tokenize(f)) != f using the standard
	# library, we have to provide our own that wrap the tokenize function from the
	# standard library:

	# * tokenize outputs tuples: each token and the possible whitespace that
	# needs to go before that token
	# * bleach_tokens rewrite this token stream to remove async color
	# * untokenize assembles the tokens by simply concatenating all vlues


	def tokenize(f):
	last_end = (0, 0)
	last_line = 0
	for toknum, tokval, start, end, _ in std_tokenize(f.readline):
	if start == end:
	continue # nothing to print

	if start[0] > last_line:
	last_end = (start[0], 0)
	last_line = start[0]

	space = ''
	if start > last_end:
	assert start[0] == end[0]
	space = ' ' * (start[1] - last_end[1])
	yield (space, toknum, tokval)
	last_end = end


	def bleach_tokens(tokens):
	# TODO __await__, ...?
	used_space = None
	for space, toknum, tokval in tokens:
	if toknum in [ASYNC, AWAIT]: # TODO Python 3.7+
	# When remove async or await, we want to use the whitespace that
	# was before async/await before the next token so that
	# `print(await stuff)` becomes `print(stuff)` and not
	# `print( stuff)`
	used_space = space
	else:
	if toknum == NAME and tokval in ASYNC_TO_SYNC:
	tokval = ASYNC_TO_SYNC[tokval]
	if used_space is None:
	used_space = space
	yield (used_space, tokval)
	used_space = None


	def untokenize(tokens):
	output = ''.join(space + tokval for space, tokval in tokens)
	for line in output.split('\n'):
	yield line.rstrip(' ')


	def get_diff(initial, result, filename):
	return difflib.unified_diff(
	[l + '\n' for l in initial.split('\n')],
	[l + '\n' for l in result.split('\n')],
	fromfile='{} (original)'.format(filename),
	tofile='{} (bleached)'.format(filename))


	def get_paths(path):
	if os.path.isfile(path):
	yield path
	else:
	for expand in ('*/.py', '**.py'):
	yield from glob.iglob(os.path.join('{}/{}'.format(path, expand)))


	@click.command()
	@click.option(
	'-w', '--write', is_flag=True, help='write changes to the filesystem')
	@click.argument('inpath', type=click.Path(exists=True, dir_okay=True))
	@click.argument('outpath', type=click.Path(dir_okay=True))
	def bleach(write, inpath, outpath):
	for filepath in get_paths(inpath):
	diff = None
	with open(filepath, 'rb') as f:
	initial = f.read()
	f.seek(0)
	tokens = tokenize(f)
	tokens = bleach_tokens(tokens)
	result = '\n'.join(untokenize(tokens))
	diff = list(get_diff(initial.decode('utf8'), result, filepath))
	if write:
	outfilepath = filepath.replace(inpath, outpath)
	sys.stdout.writelines(diff)
	os.makedirs(os.path.dirname(outfilepath), exist_ok=True)
	# TODO get encoding from tokenize
	with open(outfilepath, 'w') as f:
	print(result, file=f, end='')


	if __name__ == '__main__':
	assert sys.version_info.major >= 3
	bleach()