-
-
Save kampersanda/646f82b7c40e7814b3fd736f54ce75f7 to your computer and use it in GitHub Desktop.
Python: Remove C/C++ style comments #parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
from argparse import ArgumentParser | |
from remove_c_style_comments import * | |
from glob import glob | |
def clean_comment(cpp_files, output_dir): | |
for cpp_file in cpp_files: | |
print(f'format {cpp_file}') | |
uncmt_text = commentRemover(open(cpp_file, 'rt').read()) | |
open(f'{output_dir}/{cpp_file}', 'wt').write(uncmt_text) | |
def main(): | |
parser = ArgumentParser() | |
parser.add_argument('--input_dir', type=str, default='.') | |
parser.add_argument('--overwrite', type=bool, default=False) | |
args = parser.parse_args() | |
output_dir = '.' | |
if not args.overwrite: | |
output_dir += '/uncmt' | |
os.makedirs(output_dir, exist_ok=False) | |
clean_comment(glob(f'{args.input_dir}/*.cpp'), output_dir) | |
clean_comment(glob(f'{args.input_dir}/*.hpp'), output_dir) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import re | |
import sys | |
def removeComments(text): | |
""" remove c-style comments. | |
text: blob of text with comments (can include newlines) | |
returns: text with comments removed | |
""" | |
pattern = r""" | |
## --------- COMMENT --------- | |
//.*?$ ## Start of // .... comment | |
| ## | |
/\* ## Start of /* ... */ comment | |
[^*]*\*+ ## Non-* followed by 1-or-more *'s | |
( ## | |
[^/*][^*]*\*+ ## | |
)* ## 0-or-more things which don't start with / | |
## but do end with '*' | |
/ ## End of /* ... */ comment | |
| ## -OR- various things which aren't comments: | |
( ## | |
## ------ " ... " STRING ------ | |
" ## Start of " ... " string | |
( ## | |
\\. ## Escaped char | |
| ## -OR- | |
[^"\\] ## Non "\ characters | |
)* ## | |
" ## End of " ... " string | |
| ## -OR- | |
## | |
## ------ ' ... ' STRING ------ | |
' ## Start of ' ... ' string | |
( ## | |
\\. ## Escaped char | |
| ## -OR- | |
[^'\\] ## Non '\ characters | |
)* ## | |
' ## End of ' ... ' string | |
| ## -OR- | |
## | |
## ------ ANYTHING ELSE ------- | |
. ## Anything other char | |
[^/"'\\]* ## Chars which doesn't start a comment, string | |
) ## or escape | |
""" | |
regex = re.compile(pattern, re.VERBOSE | re.MULTILINE | re.DOTALL) | |
noncomments = [m.group(2) for m in regex.finditer(text) if m.group(2)] | |
return "".join(noncomments) | |
def commentRemover(text): | |
def replacer(match): | |
s = match.group(0) | |
if s.startswith('/'): | |
return " " # note: a space and not an empty string | |
else: | |
return s | |
pattern = re.compile( | |
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', | |
re.DOTALL | re.MULTILINE | |
) | |
return re.sub(pattern, replacer, text) | |
def main(): | |
filename = 'test.h' | |
with open(filename) as f: | |
uncmtFile = commentRemover(f.read()) | |
print(uncmtFile) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* This is a C-style comment. */ | |
This is not a comment. | |
/* This is another | |
* C-style comment. | |
*/ | |
"This is /* also not a comment */" | |
// This is also a comment | |
This is still // a comment | |
This is still /* a comment */ | |
This is still /* a comment */ again | |
This is the final line |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment