Remove comments and docstrings from a python fille.
""" Strip comments and docstrings from a file.
import sys, token, tokenize
def do_file(fname):
""" Run on just one file.
source = open(fname)
mod = open(fname + ",strip", "w")
prev_toktype = token.INDENT
first_line = None
last_lineno = -1
last_col = 0
tokgen = tokenize.generate_tokens(source.readline)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if 0: # Change to if 1 to see the tokens fly by.
print("%10s %-14s %-20r %r" % (
tokenize.tok_name.get(toktype, toktype),
"%d.%d-%d.%d" % (slineno, scol, elineno, ecol),
ttext, ltext
if slineno > last_lineno:
last_col = 0
if scol > last_col:
mod.write(" " * (scol - last_col))
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
elif toktype == tokenize.COMMENT:
# Comment
prev_toktype = toktype
last_col = ecol
last_lineno = elineno
if __name__ == '__main__':
LouisPi commented Jul 9, 2019

Thanks for this! It works perfectly for my project.

biwa7636 commented Nov 27, 2019

Works great, thanks!

Practcdi commented Jan 10, 2020

Single line of code
sed 's/#.*$//g'

kb3dow commented Nov 2, 2020

There is a bug with the code (having to do with the logic prev_toktype == token.INDENT)
If there is a docstring (1 line or multiline) that begins at column 1 (with no preceeding spaces/tabs), it is not stripped out.

So an input of the form

""" string 1 """
    """ string 2 """

In this case string 1 is not stripped out

kb3dow commented Nov 2, 2020


    if toktype == token.STRING and prev_toktype == token.INDENT:


    if toktype == token.STRING and (prev_toktype == token.INDENT or prev_toktype == token.NEWLINE):

does the job.

newdive commented Dec 11, 2020

this will not generate legal code if a method has nothing but a doc string
you can check python/lib/ for example

thread13 commented Jul 24, 2021

credits: Ned Batchelder

check also the comments to his answer

