Skip to content

Instantly share code, notes, and snippets.

@dwf
Created February 25, 2014 19:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dwf/9216386 to your computer and use it in GitHub Desktop.
Save dwf/9216386 to your computer and use it in GitHub Desktop.
Code extracted from the numpydoc package.
"""Extract reference documentation from the NumPy source tree.
"""
import inspect
import textwrap
import re
import sys
import types
class Reader(object):
"""A line-based string reader.
"""
def __init__(self, data):
"""
Parameters
----------
data : str
String with lines separated by '\n'.
"""
if isinstance(data,list):
self._str = data
else:
self._str = data.split('\n') # store string as list of lines
self.reset()
def __getitem__(self, n):
return self._str[n]
def reset(self):
self._l = 0 # current line nr
def read(self):
if not self.eof():
out = self[self._l]
self._l += 1
return out
else:
return ''
def seek_next_non_empty_line(self):
for l in self[self._l:]:
if l.strip():
break
else:
self._l += 1
def eof(self):
return self._l >= len(self._str)
def read_to_condition(self, condition_func):
start = self._l
for line in self[start:]:
if condition_func(line):
return self[start:self._l]
self._l += 1
if self.eof():
return self[start:self._l+1]
return []
def read_to_next_empty_line(self):
self.seek_next_non_empty_line()
def is_empty(line):
return not line.strip()
return self.read_to_condition(is_empty)
def read_to_next_unindented_line(self):
def is_unindented(line):
return (line.strip() and (len(line.lstrip()) == len(line)))
return self.read_to_condition(is_unindented)
def peek(self,n=0):
if self._l + n < len(self._str):
return self[self._l + n]
else:
return ''
def is_empty(self):
return not ''.join(self._str).strip()
def __iter__(self):
for line in self._str:
yield line
class NumpyDocString(object):
def __init__(self, docstring):
docstring = docstring.split('\n')
# De-indent paragraph
try:
indent = min(len(s) - len(s.lstrip()) for s in docstring
if s.strip())
except ValueError:
indent = 0
for n,line in enumerate(docstring):
docstring[n] = docstring[n][indent:]
self._doc = Reader(docstring)
self._parsed_data = {
'Signature': '',
'Summary': '',
'Extended Summary': [],
'Parameters': [],
'Other Parameters': [],
'Returns': [],
'Raises': [],
'Warns': [],
'See Also': [],
'Notes': [],
'References': '',
'Examples': '',
'index': {},
'Attributes': [],
'Methods': [],
}
self.section_order = []
self._parse()
def __getitem__(self,key):
return self._parsed_data[key]
def __setitem__(self,key,val):
if not self._parsed_data.has_key(key):
raise ValueError("Unknown section %s" % key)
else:
self._parsed_data[key] = val
def _is_at_section(self):
self._doc.seek_next_non_empty_line()
if self._doc.eof():
return False
l1 = self._doc.peek().strip() # e.g. Parameters
if l1.startswith('.. index::'):
return True
l2 = self._doc.peek(1).strip() # ----------
return (len(l1) == len(l2) and l2 == '-'*len(l1))
def _strip(self,doc):
i = 0
j = 0
for i,line in enumerate(doc):
if line.strip(): break
for j,line in enumerate(doc[::-1]):
if line.strip(): break
return doc[i:len(doc)-j]
def _read_to_next_section(self):
section = self._doc.read_to_next_empty_line()
while not self._is_at_section() and not self._doc.eof():
if not self._doc.peek(-1).strip(): # previous line was empty
section += ['']
section += self._doc.read_to_next_empty_line()
return section
def _read_sections(self):
while not self._doc.eof():
data = self._read_to_next_section()
name = data[0].strip()
if name.startswith('..'): # index section
yield name, data[1:]
elif len(data) < 2:
yield StopIteration
else:
yield name, self._strip(data[2:])
def _parse_param_list(self,content):
r = Reader(content)
params = []
while not r.eof():
header = r.read().strip()
if ' : ' in header:
arg_name, arg_type = header.split(' : ')[:2]
else:
arg_name, arg_type = header, ''
desc = r.read_to_next_unindented_line()
for n,line in enumerate(desc):
desc[n] = line.strip()
desc = desc #'\n'.join(desc)
params.append((arg_name,arg_type,desc))
return params
def _parse_see_also(self, content):
"""
func_name : Descriptive text
continued text
another_func_name : Descriptive text
func_name1, func_name2, func_name3
"""
functions = []
current_func = None
rest = []
for line in content:
if not line.strip(): continue
if ':' in line:
if current_func:
functions.append((current_func, rest))
r = line.split(':', 1)
current_func = r[0].strip()
r[1] = r[1].strip()
if r[1]:
rest = [r[1]]
else:
rest = []
elif not line.startswith(' '):
if current_func:
functions.append((current_func, rest))
current_func = None
rest = []
if ',' in line:
for func in line.split(','):
func = func.strip()
if func:
functions.append((func, []))
elif line.strip():
current_func = line.strip()
elif current_func is not None:
rest.append(line.strip())
if current_func:
functions.append((current_func, rest))
return functions
def _parse_index(self, section, content):
"""
.. index: default
:refguide: something, else, and more
"""
def strip_each_in(lst):
return [s.strip() for s in lst]
out = {}
section = section.split('::')
if len(section) > 1:
out['default'] = strip_each_in(section[1].split(','))[0]
for line in content:
line = line.split(':')
if len(line) > 2:
out[line[1]] = strip_each_in(line[2].split(','))
return out
def _parse_summary(self):
"""Grab signature (if given) and summary"""
summary = self._doc.read_to_next_empty_line()
summary_str = " ".join([s.strip() for s in summary])
if re.compile('^([\w. ]+=)?[\w\.]+\(.*\)$').match(summary_str):
self['Signature'] = summary_str
if not self._is_at_section():
self['Summary'] = self._doc.read_to_next_empty_line()
else:
self['Summary'] = summary
if not self._is_at_section():
self['Extended Summary'] = self._read_to_next_section()
def _parse(self):
self._doc.reset()
self._parse_summary()
for (section, content) in self._read_sections():
if not section.startswith('..'):
section = ' '.join([s.capitalize() for s in section.split(' ')])
if section in ('Parameters', 'Other Parameters', 'Returns',
'Raises', 'Warns', 'Attributes', 'Methods'):
self[section] = self._parse_param_list(content)
self.section_order.append(section)
elif section.startswith('.. index::'):
self['index'] = self._parse_index(section, content)
self.section_order.append('index')
elif section.lower() == 'see also':
self['See Also'] = self._parse_see_also(content)
self.section_order.append('See Also')
else:
self[section] = content
self.section_order.append(section)
# string conversion routines
def _str_header(self, name, symbol='-'):
return [name, len(name)*symbol]
def _str_indent(self, doc, indent=4):
out = []
for line in doc:
out += [' '*indent + line]
return out
def _str_signature(self):
if not self['Signature']:
return []
return ["*%s*" % self['Signature'].replace('*','\*')] + ['']
def _str_summary(self):
return self['Summary'] + ['']
def _str_extended_summary(self):
return self['Extended Summary'] + ['']
def _str_param_list(self, name):
out = []
if self[name]:
out += self._str_header(name)
for param,param_type,desc in self[name]:
out += ['%s : %s' % (param, param_type)]
out += self._str_indent(desc)
out += ['']
return out
def _str_section(self, name):
out = []
if self[name]:
out += self._str_header(name)
out += self[name]
out += ['']
return out
def _str_see_also(self):
if not self['See Also']: return []
out = []
out += self._str_header("See Also")
last_had_desc = True
for func, desc in self['See Also']:
if desc or last_had_desc:
out += ['']
out += ["`%s`_" % func]
else:
out[-1] += ", `%s`_" % func
if desc:
out += self._str_indent(desc)
last_had_desc = True
else:
last_had_desc = False
out += ['']
return out
def _str_index(self):
idx = self['index']
out = []
out += ['.. index:: %s' % idx.get('default','')]
for section, references in idx.iteritems():
if section == 'default':
continue
out += [' :%s: %s' % (section, ', '.join(references))]
return out
def __str__(self):
out = []
out += self._str_signature()
out += self._str_summary()
out += self._str_extended_summary()
for param_list in ('Parameters','Other Parameters','Returns','Raises','Warns'):
out += self._str_param_list(param_list)
out += self._str_see_also()
for s in ('Notes','References','Examples'):
out += self._str_section(s)
out += self._str_index()
return '\n'.join(out)
# --
def get_errors(self, check_order=True):
errors = []
self._doc.reset()
for j, line in enumerate(self._doc):
if len(line) > 75:
errors.append("Line %d too long: \"%s\"..." % (j+1, line[:30]))
if check_order:
canonical_order = ['Signature', 'Summary', 'Extended Summary',
'Attributes', 'Methods', 'Parameters',
'Other Parameters','Returns', 'Raises', 'Warns',
'See Also', 'Notes', 'References', 'Examples',
'index']
for s in self.section_order:
while canonical_order and s != canonical_order[0]:
canonical_order.pop(0)
if not canonical_order:
errors.append("Sections in wrong order (starting at %s)" % s)
return errors
def indent(str,indent=4):
indent_str = ' '*indent
if str is None:
return indent_str
lines = str.split('\n')
return '\n'.join(indent_str + l for l in lines)
class NumpyFunctionDocString(NumpyDocString):
def _parse(self):
self._parsed_data = {
'Signature': '',
'Summary': '',
'Extended Summary': [],
'Parameters': [],
'Other Parameters': [],
'Returns': [],
'Raises': [],
'Warns': [],
'See Also': [],
'Notes': [],
'References': '',
'Examples': '',
'index': {},
}
return NumpyDocString._parse(self)
def get_errors(self):
errors = NumpyDocString.get_errors(self)
if not self['Signature']:
errors.append("No function signature")
if not self['Summary']:
errors.append("No function summary line")
if len(" ".join(self['Summary'])) > 3*80:
errors.append("Brief function summary is longer than 3 lines")
if not (re.match('^\w+\(\)$', self['Signature']) or self['Parameters']):
errors.append("No Parameters section")
return errors
class NumpyClassDocString(NumpyDocString):
def _parse(self):
self._parsed_data = {
'Signature': '',
'Summary': '',
'Extended Summary': [],
'Parameters': [],
'Other Parameters': [],
'Raises': [],
'Warns': [],
'See Also': [],
'Notes': [],
'References': '',
'Examples': '',
'index': {},
'Attributes': [],
'Methods': [],
}
return NumpyDocString._parse(self)
def __str__(self):
out = []
out += self._str_signature()
out += self._str_summary()
out += self._str_extended_summary()
for param_list in ('Attributes', 'Methods', 'Parameters', 'Raises',
'Warns'):
out += self._str_param_list(param_list)
out += self._str_see_also()
for s in ('Notes','References','Examples'):
out += self._str_section(s)
out += self._str_index()
return '\n'.join(out)
def get_errors(self):
errors = NumpyDocString.get_errors(self)
return errors
class NumpyModuleDocString(NumpyDocString):
"""
Module doc strings: no parsing is done.
"""
def _parse(self):
self.out = []
def __str__(self):
return "\n".join(self._doc._str)
def get_errors(self):
errors = NumpyDocString.get_errors(self, check_order=False)
return errors
def header(text, style='-'):
return text + '\n' + style*len(text) + '\n'
class SphinxDocString(NumpyDocString):
# string conversion routines
def _str_header(self, name, symbol='`'):
return ['**' + name + '**'] + [symbol*(len(name)+4)]
def _str_indent(self, doc, indent=4):
out = []
for line in doc:
out += [' '*indent + line]
return out
def _str_signature(self):
return ['``%s``' % self['Signature'].replace('*','\*')] + ['']
def _str_summary(self):
return self['Summary'] + ['']
def _str_extended_summary(self):
return self['Extended Summary'] + ['']
def _str_param_list(self, name):
out = []
if self[name]:
out += self._str_header(name)
out += ['']
for param,param_type,desc in self[name]:
out += self._str_indent(['**%s** : %s' % (param, param_type)])
out += ['']
out += self._str_indent(desc,8)
out += ['']
return out
def _str_section(self, name):
out = []
if self[name]:
out += self._str_header(name)
out += ['']
content = self._str_indent(self[name])
out += content
out += ['']
return out
def _str_index(self):
idx = self['index']
out = []
out += ['.. index:: %s' % idx.get('default','')]
for section, references in idx.iteritems():
if section == 'default':
continue
out += [' :%s: %s' % (section, ', '.join(references))]
return out
def __str__(self, indent=0):
out = []
out += self._str_summary()
out += self._str_extended_summary()
for param_list in ('Parameters','Returns','Raises','Warns'):
out += self._str_param_list(param_list)
for s in ('Notes','References','Examples'):
out += self._str_section(s)
# out += self._str_index()
out = self._str_indent(out,indent)
return '\n'.join(out)
class FunctionDoc(object):
def __init__(self,func):
self._f = func
def __str__(self):
out = ''
doclines = inspect.getdoc(self._f) or ''
try:
doc = SphinxDocString(doclines)
except Exception, e:
print '*'*78
print "ERROR: '%s' while parsing `%s`" % (e, self._f)
print '*'*78
#print "Docstring follows:"
#print doclines
#print '='*78
return out
if doc['Signature']:
out += '%s\n' % header('**%s**' %
doc['Signature'].replace('*','\*'), '-')
else:
try:
# try to read signature
argspec = inspect.getargspec(self._f)
argspec = inspect.formatargspec(*argspec)
argspec = argspec.replace('*','\*')
out += header('%s%s' % (self._f.__name__, argspec), '-')
except TypeError, e:
out += '%s\n' % header('**%s()**' % self._f.__name__, '-')
out += str(doc)
return out
class ClassDoc(object):
def __init__(self,cls,modulename=''):
if not inspect.isclass(cls):
raise ValueError("Initialise using an object")
self._cls = cls
if modulename and not modulename.endswith('.'):
modulename += '.'
self._mod = modulename
self._name = cls.__name__
@property
def methods(self):
return [name for name,func in inspect.getmembers(self._cls)
if not name.startswith('_') and callable(func)]
def __str__(self):
out = ''
def replace_header(match):
return '"'*(match.end() - match.start())
for m in self.methods:
print "Parsing `%s`" % m
out += str(FunctionDoc(getattr(self._cls,m))) + '\n\n'
out += '.. index::\n single: %s; %s\n\n' % (self._name, m)
return out
def handle_function(val, name):
func_errors = []
docstring = inspect.getdoc(val)
if docstring is None:
func_errors.append((name, '**missing** function-level docstring'))
else:
func_errors = [
(name, e) for e in NumpyFunctionDocString(docstring).get_errors()
]
return func_errors
def handle_method(method, method_name, class_name):
method_errors = []
docstring = inspect.getdoc(method)
if docstring is None:
method_errors.append((class_name, method_name,
'**missing** method-level docstring'))
else:
method_errors = [
(class_name, method_name, e) for e in
NumpyFunctionDocString(docstring).get_errors()
]
return method_errors
def handle_class(val, class_name):
cls_errors = []
docstring = inspect.getdoc(val)
if docstring is None:
cls_errors.append((class_name,
'**missing** class-level docstring'))
else:
cls_errors = [
(e,) for e in
NumpyClassDocString(docstring).get_errors()
]
# Get public methods and parse their docstrings
methods = dict(((name, func) for name, func in
inspect.getmembers(val) if not
name.startswith('_') and callable(func)))
for m_name, method in methods.iteritems():
cls_errors.extend(handle_method(method, m_name, class_name))
return cls_errors
def docstring_errors(filename, global_dict=None):
"""
Run a Python file, parse the docstrings of all the classes
and functions it declares, and return them.
Parameters
----------
filename : str
Filename of the module to run.
global_dict : dict, optional
Globals dictionary to pass along to `execfile()`.
Returns
-------
all_errors : list
Each entry of the list is a tuple, of length 2 or 3, with
format either
(func_or_class_name, docstring_error_description)
or
(class_name, method_name, docstring_error_description)
"""
if global_dict is None:
global_dict = {}
if '__file__' not in global_dict:
global_dict['__file__'] = filename
try:
execfile(filename, global_dict)
except SystemExit:
pass
all_errors = []
valid_types = [types.FunctionType, types.ClassType, types.TypeType]
for key, val in global_dict.iteritems():
if not key.startswith('_'):
if type(val) in valid_types and val.__module__ == '__builtin__':
# Functions
if type(val) == types.FunctionType:
all_errors.extend(handle_function(val, key))
# New-style classes
elif type(val) == types.TypeType:
all_errors.extend(handle_class(val, key))
# Old-style classes
elif type(val) == types.ClassType:
all_errors.extend(handle_class(val, key))
return all_errors
if __name__ == "__main__":
all_errors = docstring_errors(sys.argv[1])
if len(all_errors) > 0:
print "*" * 30, "docstring errors", "*" * 30
for line in all_errors:
print ':'.join(line)
sys.exit(int(len(all_errors) > 0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment