Last active
July 17, 2018 10:16
-
-
Save lebigot/6c6d339a228d309f7d4672d168944622 to your computer and use it in GitHub Desktop.
Python code analyzer that reports some specific (and ad hoc) parts of the code (raise, warn…).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Python 2 | |
""" | |
Analyze Python code and find some ad hoc elements (like raise statements). | |
Each element found is converted into a simple code form (comments are removed, | |
etc.) and its location is available (both as line and column numbers, but also | |
through the top-level function of class where it is defined), as well as its | |
(custom) type. | |
For the reported elements, see the CodeReporter class. | |
""" | |
import re | |
import ast | |
import astor | |
import astmonkey.transformers | |
def find_top_level_func(node): | |
""" | |
Return the name of the top-level function or class (or None if there is | |
none) above the given AST node. | |
This is useful for helping users locate a specific node in the corresponding | |
source code (e.g. when line numbers might change in the future). | |
node -- ast node decorated with a "parent" node pointing to its parent | |
node, and similarly for all the parents. | |
astmonkey.transformers.ParentChildNodeTransformer(). | |
""" | |
last_name = None # Last function or class name encountered | |
while True: | |
parent = node.parent | |
if parent is None: | |
# Top-level reached: | |
return last_name | |
# Do we have a function or class definition? | |
if isinstance(parent, (ast.FunctionDef, ast.ClassDef)): | |
last_name = parent.name | |
# We keep looking above node: | |
node = parent | |
def markdown_escape(text): | |
""" | |
Escape special markdown characters in the given text. | |
""" | |
# Reference: https://daringfireball.net/projects/markdown/syntax#backslash | |
escaped = re.sub(r"([\\`*_{}\[\]\(\)#+-.!])", r"\\\1", text) | |
# Reference: https://daringfireball.net/projects/markdown/syntax#autoescape | |
escaped = re.sub("&", "&", escaped) | |
escaped = re.sub("<", "<", escaped) | |
return escaped | |
def indent(text, prefix): | |
""" | |
Add the given prefix to all the lines in text. | |
""" | |
return "\n".join( | |
prefix + line for line in text.split("\n")) | |
class Element(object): | |
""" | |
Code element. | |
The code element contains code, an enclosing top-function (or None | |
if the element is at the top level), and a location (line and column) | |
in the source, and a type. | |
""" | |
def __init__(self, code, lineno, col_offset, top_func, type_): | |
""" | |
Attributes are created with the same name as the input variables, | |
except for type_ which gives a type argument. | |
code -- string with the code element. | |
lineno, col_offset -- location of the code in its source (the first | |
line is line 1, the first column offset is 0). | |
top_func -- name of the top-level function or class where the code can | |
be found. | |
type_ -- custom element type (can be anything). | |
""" | |
self.code = code | |
self.lineno = lineno | |
self.col_offset = col_offset | |
self.top_func = top_func | |
self.type = type_ | |
def _top_func_repr(self): | |
""" | |
Return the top function name if it is not None, and otherwise | |
'<module>'. | |
""" | |
return ( | |
self.top_func + "()" | |
if self.top_func is not None | |
else "<module>") | |
def __str__(self): | |
return "Line {}, col. {} in {}:\n{}".format( | |
self.lineno, self.col_offset, | |
self._top_func_repr(), self.code) | |
def markdown_repr(self): | |
""" | |
Markdown representation of the code element. | |
""" | |
# Code segments were rstrip()ed upon saving, and by construction they | |
# have no leading white space: we fully control the dipslay: | |
return "Line {}, col. {} in `{}`:\n\n{}\n".format( | |
self.lineno, self.col_offset, | |
self._top_func_repr(), | |
indent(self.code, " "*8)) | |
class CodeReporter(ast.NodeVisitor): | |
""" | |
Analyze occurrences of specific Python constructs. | |
The following code Elements are identified, by calling the visit() | |
method: | |
- raise expression (with type "raise"), | |
- any call whose name contains "warn" or "Warn", like in warnings.warn() | |
(with type "warn"). | |
The list of Elements found is in the "elements" attribute. Elements | |
are added one by one as they are encountered in code. | |
""" | |
def __init__(self, *largs, **kwargs): | |
""" | |
All arguments are passed to the superclass initialization. | |
""" | |
self.elements = [] # List of Elements | |
super(CodeReporter, self).__init__(*largs, **kwargs) | |
def _add_element(self, node, type_): | |
""" | |
Add an Element to self.element that represents the given node, | |
of the given type. | |
node -- ast.AST node. | |
type_ -- custom type (completely free). | |
""" | |
self.elements.append(Element( | |
astor.to_source(node).rstrip(), | |
node.lineno, node.col_offset, | |
find_top_level_func(node), | |
type_)) | |
def visit_Raise(self, raise_node): | |
""" | |
Add an Element representing the given ast.Raise node to the elements | |
found. | |
""" | |
self._add_element(raise_node, "raise") | |
def visit_Call(self, call_node): | |
""" | |
Add an Element representing the given ast.Call node to the elements | |
found, if the name called contains "warn" or "Warn". | |
""" | |
# We can have a function which is either an identifier (Name node like | |
# "warn") or an attribute access (Attribute node like "warnings.warn"). | |
# We use a broader check by looking for "warn" in the name: | |
if re.search("[wW]arn", astor.to_source(call_node.func)): | |
self._add_element(call_node, "warn") | |
def find_elements(path): | |
""" | |
Search the given code for some the code elements searched for in | |
CustomVisitor. | |
""" | |
with open(path) as code_file: | |
code_tree = ast.parse(code_file.read(), path) | |
code_reporter = CodeReporter() | |
# Analysis of the code: | |
code_reporter.visit( | |
# We add "parent" nodes: | |
astmonkey.transformers.ParentChildNodeTransformer().visit(code_tree)) | |
return code_reporter.elements | |
if __name__ == "__main__": | |
import argparse | |
import collections | |
parser = argparse.ArgumentParser( | |
description=""" | |
Analyze Python programs and report some specific code elements as | |
MarkDown. | |
Use pydoc on this program for details. | |
""") | |
parser.add_argument( | |
"file_paths", metavar="file_path", | |
help="Python program to be parsed.", nargs="+") | |
args = parser.parse_args() | |
for file_path in args.file_paths: | |
print "#", markdown_escape(file_path) | |
for element in find_elements(file_path): | |
print "- {}".format(element.markdown_repr()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment