Skip to content

Instantly share code, notes, and snippets.

@boxed
Last active September 12, 2018 13:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save boxed/610b2ba73066c96e9781aed7c0c0b25c to your computer and use it in GitHub Desktop.
Save boxed/610b2ba73066c96e9781aed7c0c0b25c to your computer and use it in GitHub Desktop.
This tool analyses your code base for cases where a short for for keyword arguments would be nice to have
import os
from parso import parse
from collections import defaultdict
args = 0
kwargs = 0
kwargs_foo_equal_foo = 0
passed_value = 0
args_kwargs = 0
calls = []
non_unique_function_names = set()
unique_function_name_parameters = {}
kwarg_that_cannot_match = '<kwarg_that_cannot_match>'
hardcoded = '<hardcoded>'
misc = '<misc>'
matched_kwarg = '<matched_kwarg>'
argument_variable_name_lengths = defaultdict(int)
def get_arguments(i):
# parso represents functions with one argument and multiple arguments very differently, need to handle this here
if i.type == 'arglist':
return i.children
else:
return [i]
def param_name(i):
if i.children[0].type == 'tfpdef':
return i.children[0].children[0].value
else:
return i.children[0].value
def handle_list(result):
for i in result.children:
handle_node(i)
def handle_node(i):
t = i.type
if t == 'funcdef':
name = i.children[1].value
parameters = [param_name(x) for x in i.children[2].children if x.type == 'param']
if parameters and parameters[0] == 'self':
parameters = parameters[1:]
if name in unique_function_name_parameters:
del unique_function_name_parameters[name]
non_unique_function_names.add(name)
else:
unique_function_name_parameters[name] = parameters
elif t == 'atom_expr':
arguments = None
if len(i.children) == 2:
# normal function call
if i.children[0].type == 'name':
function_name = i.children[0].value
arguments = [node for node in get_arguments(i.children[1].children[1]) if node.type != 'operator'] # filter out ,
else:
if (i.children[-2].children[0].type, i.children[-2].children[0].value) == ('operator', '.'):
# member function call
function_name = i.children[-2].children[1].value
arguments = [node for node in get_arguments(i.children[-1].children[1]) if node.type != 'operator'] # filter out ,
else:
# list comprehensions and stuff
pass
if arguments and arguments[0].type != 'subscript' and len(i.children) > 2:
argument_values = [handle_argument(argument) for argument in arguments]
calls.append((function_name, argument_values))
if hasattr(i, 'children'):
handle_list(i)
def handle_argument(argument):
global args_kwargs, args, kwargs, kwargs_foo_equal_foo
if argument.type == 'name':
# positional argument with a named variable
passed_name = argument.value
argument_variable_name_lengths[len(passed_name)] += 1
# print('positional', function_name, passed_name)
args += 1
return passed_name
elif argument.type == 'argument':
if argument.children[0].type == 'operator' and argument.children[0].value in ('*', '**'):
# *args and **kwargs
args_kwargs += 1
return
if len(argument.children) == 3:
# keyword argument
argument, _, passed = argument.children
argument_name = argument.value
if passed.type == 'name':
# passed variable
# print('keyword variable', function_name, argument_name, '=', passed.value)
argument_variable_name_lengths[len(passed.value)] += 1
if argument_name == passed.value:
kwargs_foo_equal_foo += 1
return matched_kwarg
else:
kwargs += 1
return kwarg_that_cannot_match
else:
# passed hardcoded value
kwargs += 1
return kwarg_that_cannot_match
else:
# String formatting and stuff
return misc
else:
# hardcoded value
return hardcoded
def analyse_directory(directory):
for root, dirs, files in os.walk(directory):
dirs[:] = [d for d in dirs if not d.startswith('.') and not d.startswith('env') and not d.startswith('venv') and not d.endswith('_env') and d != 'node_modules']
for filename in files:
if filename.endswith('.py'):
with open(os.path.join(root, filename)) as file:
try:
contents = file.read()
except:
continue
handle_list(parse(contents, error_recovery=True))
import sys
if len(sys.argv) != 2:
print('This tool analyses your code base for cases where a short for for keyword arguments would be nice to have.')
print('Usage: supply one directory path to the code you wish to analyse.')
exit(1)
analyse_directory(sys.argv[1])
print('This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.')
print('-----')
# print(f'kwargs: {kwargs}')
# print(f'args: {args}')
print(f'non-unique function names, unknown relevance: {len(non_unique_function_names)}')
print(f'unique {len(unique_function_name_parameters)}')
print(f'calls to functions outside this code base, will not analyze: {len([name for name, args in calls if name not in non_unique_function_names and name not in unique_function_name_parameters])}')
print('passed variable name length statistics:')
for length, count in sorted(argument_variable_name_lengths.items()):
print(f' length {length}: {count} times')
calls_we_can_analyse = [
(name, args)
for name, args in calls
if name in unique_function_name_parameters and not (name.startswith('__') and name.startswith('__'))
]
calls_with_potential = [
(name, args)
for name, args in calls_we_can_analyse
if any([x not in (hardcoded, kwarg_that_cannot_match) for x in args])
]
could_have_been_a_matched_kwarg = 0
did_not_match = 0
arity_counts = defaultdict(int)
for name, args in calls_with_potential:
had_potential = False
for argument, parameter in zip(args, unique_function_name_parameters[name]):
if argument == parameter:
could_have_been_a_matched_kwarg += 1
had_potential = True
else:
did_not_match += 1
if had_potential:
arity_counts[len(args)] += 1
print('number of arguments for calls that matched:')
for arity, count in arity_counts.items():
print(f' {arity} arguments: {count} calls')
print('-----')
print(f'already matches (foo=foo): {kwargs_foo_equal_foo}')
print(f'could have been a matched kwarg: {could_have_been_a_matched_kwarg}')
print(f'did not match: {did_not_match}')
print('----')
benefits_from_new_syntax = kwargs_foo_equal_foo + could_have_been_a_matched_kwarg
total = did_not_match + benefits_from_new_syntax
print(f'Arguments that would benefit from new syntax suggestion: {benefits_from_new_syntax} ({benefits_from_new_syntax/total * 100:.2f}%)')
@boxed
Copy link
Author

boxed commented Sep 6, 2018

Results on django:

This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.
-----
non-unique function names, unknown relevance: 1844
unique 13603
calls to functions outside this code base, will not analyze: 35706
-----
already matches (foo=foo): 1312
could have been a matched kwarg: 4974
did not match: 15508
----
Arguments that would benefit from new syntax suggestion: 6286 (28.84%)

@boxed
Copy link
Author

boxed commented Sep 6, 2018

Results on twisted:

This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.
-----
non-unique function names, unknown relevance: 2427
unique 12945
calls to functions outside this code base, will not analyze: 11554
-----
already matches (foo=foo): 428
could have been a matched kwarg: 2875
did not match: 16468
----
Arguments that would benefit from new syntax suggestion: 3303 (16.71%)

@dmertz-datacamp
Copy link

Dask:

510-dask % ~/bin/keyword_argument_analysis.py .
This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.
-----
non-unique function names, unknown relevance: 368
unique 2869
calls to functions outside this code base, will not analyze: 5092
-----
already matches (foo=foo): 1355
could have been a matched kwarg: 345
did not match: 4843
----
Arguments that would benefit from new syntax suggestion: 1700 (25.98%)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment