Skip to content

Instantly share code, notes, and snippets.

@nuno-andre
Created March 13, 2019 06:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nuno-andre/443c968a678c6f2f6b7650902bebe1ce to your computer and use it in GitHub Desktop.
Save nuno-andre/443c968a678c6f2f6b7650902bebe1ce to your computer and use it in GitHub Desktop.
"""
Extract block and inline comments, docstrings and
no-op'd string literals from Python code.
"""
from tokenize import tok_name, tokenize, TokenInfo
import itertools
import ast
TokenInfo.is_type = lambda t, tok_type: tok_name[t.type] == tok_type
def parse_line(line):
comments = [t.string.lstrip('#') for t in line if t.is_type('COMMENT')]
if comments:
yield from comments
elif not any(t.is_type('OP') for t in line):
if line[-1].is_type('STRING'):
yield ast.literal_eval(line[-1].string)
def parse_file(file):
for key, group in itertools.groupby(
tokenize(open(file, 'rb').readline),
lambda t: t.is_type('NEWLINE')
):
for comment in parse_line(list(group)):
yield comment.strip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment