Skip to content

Instantly share code, notes, and snippets.

@kampersanda
Forked from ChunMinChang/remove_c_style_comments.py
Last active May 1, 2021 05:56
Show Gist options
  • Save kampersanda/646f82b7c40e7814b3fd736f54ce75f7 to your computer and use it in GitHub Desktop.
Save kampersanda/646f82b7c40e7814b3fd736f54ce75f7 to your computer and use it in GitHub Desktop.
Python: Remove C/C++ style comments #parser
#!/usr/bin/env python3
import os
from argparse import ArgumentParser
from remove_c_style_comments import *
from glob import glob
def clean_comment(cpp_files, output_dir):
for cpp_file in cpp_files:
print(f'format {cpp_file}')
uncmt_text = commentRemover(open(cpp_file, 'rt').read())
open(f'{output_dir}/{cpp_file}', 'wt').write(uncmt_text)
def main():
parser = ArgumentParser()
parser.add_argument('--input_dir', type=str, default='.')
parser.add_argument('--overwrite', type=bool, default=False)
args = parser.parse_args()
output_dir = '.'
if not args.overwrite:
output_dir += '/uncmt'
os.makedirs(output_dir, exist_ok=False)
clean_comment(glob(f'{args.input_dir}/*.cpp'), output_dir)
clean_comment(glob(f'{args.input_dir}/*.hpp'), output_dir)
if __name__ == "__main__":
main()
#!/usr/bin/env python3
import re
import sys
def removeComments(text):
""" remove c-style comments.
text: blob of text with comments (can include newlines)
returns: text with comments removed
"""
pattern = r"""
## --------- COMMENT ---------
//.*?$ ## Start of // .... comment
| ##
/\* ## Start of /* ... */ comment
[^*]*\*+ ## Non-* followed by 1-or-more *'s
( ##
[^/*][^*]*\*+ ##
)* ## 0-or-more things which don't start with /
## but do end with '*'
/ ## End of /* ... */ comment
| ## -OR- various things which aren't comments:
( ##
## ------ " ... " STRING ------
" ## Start of " ... " string
( ##
\\. ## Escaped char
| ## -OR-
[^"\\] ## Non "\ characters
)* ##
" ## End of " ... " string
| ## -OR-
##
## ------ ' ... ' STRING ------
' ## Start of ' ... ' string
( ##
\\. ## Escaped char
| ## -OR-
[^'\\] ## Non '\ characters
)* ##
' ## End of ' ... ' string
| ## -OR-
##
## ------ ANYTHING ELSE -------
. ## Anything other char
[^/"'\\]* ## Chars which doesn't start a comment, string
) ## or escape
"""
regex = re.compile(pattern, re.VERBOSE | re.MULTILINE | re.DOTALL)
noncomments = [m.group(2) for m in regex.finditer(text) if m.group(2)]
return "".join(noncomments)
def commentRemover(text):
def replacer(match):
s = match.group(0)
if s.startswith('/'):
return " " # note: a space and not an empty string
else:
return s
pattern = re.compile(
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
re.DOTALL | re.MULTILINE
)
return re.sub(pattern, replacer, text)
def main():
filename = 'test.h'
with open(filename) as f:
uncmtFile = commentRemover(f.read())
print(uncmtFile)
if __name__ == "__main__":
main()
/* This is a C-style comment. */
This is not a comment.
/* This is another
* C-style comment.
*/
"This is /* also not a comment */"
// This is also a comment
This is still // a comment
This is still /* a comment */
This is still /* a comment */ again
This is the final line
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment