Skip to content

Instantly share code, notes, and snippets.

@nevercast
Created August 10, 2020 00:27
Show Gist options
  • Save nevercast/53820d1694865f0de707f3ce2db93ab5 to your computer and use it in GitHub Desktop.
Save nevercast/53820d1694865f0de707f3ce2db93ab5 to your computer and use it in GitHub Desktop.
Check Python file for side effects on import
#!/usr/bin/env python
#### MIT Licence
# Copyright 2020 Josh "nevercast" Lloyd
# This notice must remain intact for all copies or substantial portions of the Software
####
# First release: 2020-08-10
from __future__ import print_function
import ast
import sys
def _ast_node_map(*nodes):
""" For each node in nodes, looks up ast.node, if it exists it's part
of the return value, if it doesn't, it's ignored. This allows us
to check for nodes that exist in this version of Python and
ignore ones that do not.
"""
return tuple(node for node in (getattr(ast, node_name, None) for node_name in nodes) if node is not None)
# Clean nodes are nodes that do not inherently have side effects
CLEAN_NODES = _ast_node_map('AsyncFunctionDef', 'FunctionDef', 'ClassDef')
# Import nodes need to be added to a stack when doing a deep search
IMPORT_NODES = _ast_node_map('Import', 'ImportFrom')
# Dirty nodes aren't immediately rejected, but instead must contain only Dirty or Clean nodes themselves.
DIRTY_NODES = _ast_node_map('If', 'Try', 'TryExcept')
# Every other type of node is immediately rejected as a side effect
# Note: 'Assign' isn't always bad, that's how you set globals. By always rejecting them however, I leave it
# up to the developer to decide if the particular assignment is bad or not. Being aware of all global
# assignments inside a file on import could provide helpful insight. If this tool was ever used as
# some sort of lint checker, it would need options on how to handle Assign better, that's out of scope
# currently though.
# Can we handle __name__ checks, this is currently implemented dirty by checking the `If` line with a string
# compare. It was faster than covering all the edge cases with AST. Currently we only support it if we are
# using an unparser than gives back valid Python source, i.e. astunparse
CAN_HANDLE_DUNDER_NAME = False
# Takes a node and returns a string representation of it
def _ast_unparser(node, tree=None, code=None):
return ast.dump(node)
# Python 3.8 offers a better printer for ast => code
if hasattr(ast, 'get_source_segment'):
def _ast_unparser(node, tree=None, code=None):
if code is None:
return ast.dump(node)
return ast.get_source_segment(code, node)
# If astunparse is installed, that's the best ast printer
try:
import astunparse
def _ast_unparser(node, tree=None, code=None):
return astunparse.unparse(node)
CAN_HANDLE_DUNDER_NAME = True
except ImportError:
pass
def verify_file_is_sideeffect_free(filename_or_ast, inspect_imports=False):
if isinstance(filename_or_ast, str):
with open(filename_or_ast) as file_handle:
source_code = file_handle.read()
tree = ast.parse(source_code)
else:
tree = filename_or_ast
try:
import inspect
source_code = inspect.getsource(tree)
except Exception:
source_code = ""
rejections = {} # root: [children]
imports = []
inspection_stack = tree.body[:]
def collect_children(parent_node, children, stack):
for child in children:
child.parent = parent_node
stack += children
for node in inspection_stack:
if isinstance(node, CLEAN_NODES):
continue
if isinstance(node, IMPORT_NODES):
imports.append(node)
continue
if isinstance(node, DIRTY_NODES):
LIST_ATTRS = ('body', 'values', 'ops', 'comparators', 'handlers')
SCALAR_ATTRS = ('op', 'left', 'right', 'value')
# Special handling for if __name__, we only inspect the test rather than the body
# Originally I started checking the AST, but it gets complicated fast
# Just a simple case of `if __name__.startswith()` becomes painful. So I cheated...
# I just render the source, and check the first line contains __name__, that's it.
if CAN_HANDLE_DUNDER_NAME and isinstance(node, ast.If):
if_source = _ast_unparser(node=node, tree=tree, code=source_code).strip().splitlines()[0]
if '__name__' in if_source:
LIST_ATTRS = ('values', 'ops', 'comparators')
SCALAR_ATTRS = ('op', 'left', 'right', 'value')
print('#', if_source, ' # Ignored as a side effect')
for list_attr in LIST_ATTRS:
collect_children(node, getattr(node, list_attr, []), inspection_stack)
for scalar_attr in SCALAR_ATTRS:
if hasattr(node, scalar_attr):
collect_children(node, [getattr(node, scalar_attr)], inspection_stack)
else:
# Get the source code for this rejected segment
# node_source = _ast_unparser(node=node, tree=tree, code=source_code)
# Check if this node has parents, in which case we follow the dirty train
rejected_node = node
while hasattr(node, 'parent'):
node = node.parent
if node in rejections:
rejection = rejections[node]
else:
rejections[node] = rejection = []
if rejected_node != node and rejected_node not in rejection:
rejection.append(rejected_node)
# Print the rejections
for root, children in rejections.items():
# Optionally we can output each of the lines inside a Dirty block that tripped us up
# this is a little hard to read on the terminal unless we perhaps colored each bad line.
# Coloring each line is something I intend to implement in the future.
# if children:
# print('Multiple rejections:')
# for child in children:
# print(_ast_unparser(node=child, tree=tree, code=source_code).strip())
# print('Inside block:')
# else:
print(_ast_unparser(node=root, tree=tree, code=source_code).strip())
print('#', len(imports), 'imports were not checked.')
if __name__ == '__main__':
def main():
if 2 != len(sys.argv):
print('Syntax:', __file__, 'file_to_check.py')
return
target_file = sys.argv[1]
print('# Checking', target_file)
verify_file_is_sideeffect_free(target_file)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment