boxed/keyword_argument_analysis.py

## keyword_argument_analysis.py
import os
from parso import parse
from collections import defaultdict


args = 0
kwargs = 0
kwargs_foo_equal_foo = 0
passed_value = 0
args_kwargs = 0

calls = []

non_unique_function_names = set()
unique_function_name_parameters = {}

kwarg_that_cannot_match = '<kwarg_that_cannot_match>'
hardcoded = '<hardcoded>'
misc = '<misc>'
matched_kwarg = '<matched_kwarg>'

argument_variable_name_lengths = defaultdict(int)


def get_arguments(i):
    # parso represents functions with one argument and multiple arguments very differently, need to handle this here
    if i.type == 'arglist':
        return i.children
    else:
        return [i]


def param_name(i):
    if i.children[0].type == 'tfpdef':
        return i.children[0].children[0].value
    else:
        return i.children[0].value


def handle_list(result):
    for i in result.children:
        handle_node(i)


def handle_node(i):
    t = i.type

    if t == 'funcdef':
        name = i.children[1].value
        parameters = [param_name(x) for x in i.children[2].children if x.type == 'param']
        if parameters and parameters[0] == 'self':
            parameters = parameters[1:]

        if name in unique_function_name_parameters:
            del unique_function_name_parameters[name]
            non_unique_function_names.add(name)
        else:
            unique_function_name_parameters[name] = parameters

    elif t == 'atom_expr':
        arguments = None
        if len(i.children) == 2:
            # normal function call
            if i.children[0].type == 'name':
                function_name = i.children[0].value
                arguments = [node for node in get_arguments(i.children[1].children[1]) if node.type != 'operator']  # filter out ,
        else:
            if (i.children[-2].children[0].type, i.children[-2].children[0].value) == ('operator', '.'):
                # member function call
                function_name = i.children[-2].children[1].value
                arguments = [node for node in get_arguments(i.children[-1].children[1]) if node.type != 'operator']  # filter out ,
            else:
                # list comprehensions and stuff
                pass

        if arguments and arguments[0].type != 'subscript' and len(i.children) > 2:
            argument_values = [handle_argument(argument) for argument in arguments]
            calls.append((function_name, argument_values))

    if hasattr(i, 'children'):
        handle_list(i)


def handle_argument(argument):
    global args_kwargs, args, kwargs, kwargs_foo_equal_foo

    if argument.type == 'name':
        # positional argument with a named variable
        passed_name = argument.value
        argument_variable_name_lengths[len(passed_name)] += 1
        # print('positional', function_name, passed_name)
        args += 1

        return passed_name

    elif argument.type == 'argument':
        if argument.children[0].type == 'operator' and argument.children[0].value in ('*', '**'):
            # *args and **kwargs
            args_kwargs += 1
            return

        if len(argument.children) == 3:
            # keyword argument
            argument, _, passed = argument.children
            argument_name = argument.value
            if passed.type == 'name':
                # passed variable
                # print('keyword variable', function_name, argument_name, '=', passed.value)
                argument_variable_name_lengths[len(passed.value)] += 1

                if argument_name == passed.value:
                    kwargs_foo_equal_foo += 1

                    return matched_kwarg
                else:
                    kwargs += 1

                    return kwarg_that_cannot_match
            else:
                # passed hardcoded value
                kwargs += 1
                return kwarg_that_cannot_match
        else:
            # String formatting and stuff
            return misc
    else:
        # hardcoded value
        return hardcoded


def analyse_directory(directory):
    for root, dirs, files in os.walk(directory):
        dirs[:] = [d for d in dirs if not d.startswith('.') and not d.startswith('env') and not d.startswith('venv') and not d.endswith('_env') and d != 'node_modules']
        for filename in files:
            if filename.endswith('.py'):
                with open(os.path.join(root, filename)) as file:
                    try:
                        contents = file.read()
                    except:
                        continue

                    handle_list(parse(contents, error_recovery=True))


import sys
if len(sys.argv) != 2:
    print('This tool analyses your code base for cases where a short for for keyword arguments would be nice to have.')
    print('Usage: supply one directory path to the code you wish to analyse.')
    exit(1)

analyse_directory(sys.argv[1])

print('This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.')
print('-----')

# print(f'kwargs: {kwargs}')
# print(f'args: {args}')
print(f'non-unique function names, unknown relevance: {len(non_unique_function_names)}')
print(f'unique {len(unique_function_name_parameters)}')
print(f'calls to functions outside this code base, will not analyze: {len([name for name, args in calls if name not in non_unique_function_names and name not in unique_function_name_parameters])}')
print('passed variable name length statistics:')
for length, count in sorted(argument_variable_name_lengths.items()):
    print(f'    length {length}: {count} times')

calls_we_can_analyse = [
    (name, args)
    for name, args in calls
    if name in unique_function_name_parameters and not (name.startswith('__') and name.startswith('__'))
]

calls_with_potential = [
    (name, args)
    for name, args in calls_we_can_analyse
    if any([x not in (hardcoded, kwarg_that_cannot_match) for x in args])
]

could_have_been_a_matched_kwarg = 0
did_not_match = 0
arity_counts = defaultdict(int)

for name, args in calls_with_potential:
    had_potential = False
    for argument, parameter in zip(args, unique_function_name_parameters[name]):
        if argument == parameter:
            could_have_been_a_matched_kwarg += 1
            had_potential = True
        else:
            did_not_match += 1

    if had_potential:
        arity_counts[len(args)] += 1

print('number of arguments for calls that matched:')
for arity, count in arity_counts.items():
    print(f'    {arity} arguments: {count} calls')

print('-----')
print(f'already matches (foo=foo): {kwargs_foo_equal_foo}')
print(f'could have been a matched kwarg: {could_have_been_a_matched_kwarg}')
print(f'did not match: {did_not_match}')

print('----')
benefits_from_new_syntax = kwargs_foo_equal_foo + could_have_been_a_matched_kwarg
total = did_not_match + benefits_from_new_syntax
print(f'Arguments that would benefit from new syntax suggestion: {benefits_from_new_syntax} ({benefits_from_new_syntax/total * 100:.2f}%)')
	import os
	from parso import parse
	from collections import defaultdict


	args = 0
	kwargs = 0
	kwargs_foo_equal_foo = 0
	passed_value = 0
	args_kwargs = 0

	calls = []

	non_unique_function_names = set()
	unique_function_name_parameters = {}

	kwarg_that_cannot_match = '<kwarg_that_cannot_match>'
	hardcoded = '<hardcoded>'
	misc = '<misc>'
	matched_kwarg = '<matched_kwarg>'

	argument_variable_name_lengths = defaultdict(int)


	def get_arguments(i):
	# parso represents functions with one argument and multiple arguments very differently, need to handle this here
	if i.type == 'arglist':
	return i.children
	else:
	return [i]


	def param_name(i):
	if i.children[0].type == 'tfpdef':
	return i.children[0].children[0].value
	else:
	return i.children[0].value


	def handle_list(result):
	for i in result.children:
	handle_node(i)


	def handle_node(i):
	t = i.type

	if t == 'funcdef':
	name = i.children[1].value
	parameters = [param_name(x) for x in i.children[2].children if x.type == 'param']
	if parameters and parameters[0] == 'self':
	parameters = parameters[1:]

	if name in unique_function_name_parameters:
	del unique_function_name_parameters[name]
	non_unique_function_names.add(name)
	else:
	unique_function_name_parameters[name] = parameters

	elif t == 'atom_expr':
	arguments = None
	if len(i.children) == 2:
	# normal function call
	if i.children[0].type == 'name':
	function_name = i.children[0].value
	arguments = [node for node in get_arguments(i.children[1].children[1]) if node.type != 'operator'] # filter out ,
	else:
	if (i.children[-2].children[0].type, i.children[-2].children[0].value) == ('operator', '.'):
	# member function call
	function_name = i.children[-2].children[1].value
	arguments = [node for node in get_arguments(i.children[-1].children[1]) if node.type != 'operator'] # filter out ,
	else:
	# list comprehensions and stuff
	pass

	if arguments and arguments[0].type != 'subscript' and len(i.children) > 2:
	argument_values = [handle_argument(argument) for argument in arguments]
	calls.append((function_name, argument_values))

	if hasattr(i, 'children'):
	handle_list(i)


	def handle_argument(argument):
	global args_kwargs, args, kwargs, kwargs_foo_equal_foo

	if argument.type == 'name':
	# positional argument with a named variable
	passed_name = argument.value
	argument_variable_name_lengths[len(passed_name)] += 1
	# print('positional', function_name, passed_name)
	args += 1

	return passed_name

	elif argument.type == 'argument':
	if argument.children[0].type == 'operator' and argument.children[0].value in ('', '*'):
	# args and *kwargs
	args_kwargs += 1
	return

	if len(argument.children) == 3:
	# keyword argument
	argument, _, passed = argument.children
	argument_name = argument.value
	if passed.type == 'name':
	# passed variable
	# print('keyword variable', function_name, argument_name, '=', passed.value)
	argument_variable_name_lengths[len(passed.value)] += 1

	if argument_name == passed.value:
	kwargs_foo_equal_foo += 1

	return matched_kwarg
	else:
	kwargs += 1

	return kwarg_that_cannot_match
	else:
	# passed hardcoded value
	kwargs += 1
	return kwarg_that_cannot_match
	else:
	# String formatting and stuff
	return misc
	else:
	# hardcoded value
	return hardcoded


	def analyse_directory(directory):
	for root, dirs, files in os.walk(directory):
	dirs[:] = [d for d in dirs if not d.startswith('.') and not d.startswith('env') and not d.startswith('venv') and not d.endswith('_env') and d != 'node_modules']
	for filename in files:
	if filename.endswith('.py'):
	with open(os.path.join(root, filename)) as file:
	try:
	contents = file.read()
	except:
	continue

	handle_list(parse(contents, error_recovery=True))


	import sys
	if len(sys.argv) != 2:
	print('This tool analyses your code base for cases where a short for for keyword arguments would be nice to have.')
	print('Usage: supply one directory path to the code you wish to analyse.')
	exit(1)

	analyse_directory(sys.argv[1])

	print('This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.')
	print('-----')

	# print(f'kwargs: {kwargs}')
	# print(f'args: {args}')
	print(f'non-unique function names, unknown relevance: {len(non_unique_function_names)}')
	print(f'unique {len(unique_function_name_parameters)}')
	print(f'calls to functions outside this code base, will not analyze: {len([name for name, args in calls if name not in non_unique_function_names and name not in unique_function_name_parameters])}')
	print('passed variable name length statistics:')
	for length, count in sorted(argument_variable_name_lengths.items()):
	print(f' length {length}: {count} times')

	calls_we_can_analyse = [
	(name, args)
	for name, args in calls
	if name in unique_function_name_parameters and not (name.startswith('__') and name.startswith('__'))
	]

	calls_with_potential = [
	(name, args)
	for name, args in calls_we_can_analyse
	if any([x not in (hardcoded, kwarg_that_cannot_match) for x in args])
	]

	could_have_been_a_matched_kwarg = 0
	did_not_match = 0
	arity_counts = defaultdict(int)

	for name, args in calls_with_potential:
	had_potential = False
	for argument, parameter in zip(args, unique_function_name_parameters[name]):
	if argument == parameter:
	could_have_been_a_matched_kwarg += 1
	had_potential = True
	else:
	did_not_match += 1

	if had_potential:
	arity_counts[len(args)] += 1

	print('number of arguments for calls that matched:')
	for arity, count in arity_counts.items():
	print(f' {arity} arguments: {count} calls')

	print('-----')
	print(f'already matches (foo=foo): {kwargs_foo_equal_foo}')
	print(f'could have been a matched kwarg: {could_have_been_a_matched_kwarg}')
	print(f'did not match: {did_not_match}')

	print('----')
	benefits_from_new_syntax = kwargs_foo_equal_foo + could_have_been_a_matched_kwarg
	total = did_not_match + benefits_from_new_syntax
	print(f'Arguments that would benefit from new syntax suggestion: {benefits_from_new_syntax} ({benefits_from_new_syntax/total * 100:.2f}%)')