nuno-andre/extract_comments.py

## extract_comments.py
"""
Extract block and inline comments, docstrings and
no-op'd string literals from Python code.
"""
from tokenize import tok_name, tokenize, TokenInfo
import itertools
import ast

TokenInfo.is_type = lambda t, tok_type: tok_name[t.type] == tok_type

def parse_line(line):
    comments = [t.string.lstrip('#') for t in line if t.is_type('COMMENT')]
    if comments:
        yield from comments
    elif not any(t.is_type('OP') for t in line):
        if line[-1].is_type('STRING'):
            yield ast.literal_eval(line[-1].string)

def parse_file(file):
    for key, group in itertools.groupby(
        tokenize(open(file, 'rb').readline),
        lambda t: t.is_type('NEWLINE')
    ):
        for comment in parse_line(list(group)):
            yield comment.strip()
	"""
	Extract block and inline comments, docstrings and
	no-op'd string literals from Python code.
	"""
	from tokenize import tok_name, tokenize, TokenInfo
	import itertools
	import ast

	TokenInfo.is_type = lambda t, tok_type: tok_name[t.type] == tok_type

	def parse_line(line):
	comments = [t.string.lstrip('#') for t in line if t.is_type('COMMENT')]
	if comments:
	yield from comments
	elif not any(t.is_type('OP') for t in line):
	if line[-1].is_type('STRING'):
	yield ast.literal_eval(line[-1].string)

	def parse_file(file):
	for key, group in itertools.groupby(
	tokenize(open(file, 'rb').readline),
	lambda t: t.is_type('NEWLINE')
	):
	for comment in parse_line(list(group)):
	yield comment.strip()