Skip to content

Instantly share code, notes, and snippets.

@rldotai
Created September 12, 2022 19:24
Show Gist options
  • Save rldotai/31fd8fa76f8bdea1537f0b2612b1e322 to your computer and use it in GitHub Desktop.
Save rldotai/31fd8fa76f8bdea1537f0b2612b1e322 to your computer and use it in GitHub Desktop.
Get comments in the supplied Python code
import io, tokenize
def extract_comments(code: str | io.TextIOBase) -> str:
"""
Extract comments from a piece of Python code, returning a string of
*just* the comments.
Example:
>>> extract_comments(r'''
... # A comment
... def identity(x):
... "This is a docstring, not a comment."
... # Here's a comment inside a function
... return x # and an inline comment
...
... ''')
"# A comment\n# Here's a comment inside a function\n# and an inline comment\n"
A modified version of: https://stackoverflow.com/a/34512388
set to use Python 3.
"""
res = []
last = None
if isinstance(code, str):
buffer = io.StringIO(code)
else:
buffer = code
# pass in stringio.readline to generate_tokens
for toktype, tokval, begin, end, line in tokenize.generate_tokens(buffer.readline):
if toktype == tokenize.COMMENT:
res.append((toktype, tokval))
elif toktype in (tokenize.NEWLINE, tokenize.NL) and last == tokenize.COMMENT:
res.append((toktype, tokval))
else:
pass
# Record the token type (for preserving newlines)
last = toktype
return tokenize.untokenize(res)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"input", type=argparse.FileType("r"), help="Input to extract comments from"
)
args = parser.parse_args()
print(extract_comments(args.input.read()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment