Skip to content

Instantly share code, notes, and snippets.

@pweids
Created August 4, 2018 21:00
Show Gist options
  • Save pweids/6769d9ed3a087a04dd65c8ceb6c7e2f2 to your computer and use it in GitHub Desktop.
Save pweids/6769d9ed3a087a04dd65c8ceb6c7e2f2 to your computer and use it in GitHub Desktop.
Two different methods of stripping /* ... */ style comments. One using a finite state machine, and one using a more pythonic recursive function. The latter is much faster
import unittest
from enum import Enum
def strip_C_comments(code: str) -> str:
"""
This function strips out C style /* comments */ from code
It is Pythonic and 100x faster than strip_by_char
:param code: the code to be parsed for comments
:return: code without comments
"""
cmt_open = code.find('/*')
if cmt_open is -1:
return code
cmt_close = code.find('*/', cmt_open)
if cmt_close is -1:
raise ValueError("syntax error: unclosed comment")
return strip_C_comments(code[:cmt_open] + code[cmt_close + 2:].lstrip())
def strip_by_char(code: str) -> str:
"""
This function is similar to strip_C_comments() but does it with a finite
state machine char-by-char
:param code: the code to be parse for comments
:return: code without comments
"""
class State(Enum):
OUTSIDE = 1
START = 2
INSIDE = 3
FINISH = 4
CLOSED = 5
state = State.OUTSIDE
outstr = []
for c in code:
if state is State.OUTSIDE:
if c is '/':
state = State.START
else: outstr.append(c)
elif state is State.START:
if c is '*':
state = State.INSIDE
else:
outstr.append('/')
outstr.append(c)
state = state.OUTSIDE
elif state is State.INSIDE:
if c is '*':
state = State.FINISH
elif state is State.FINISH:
if c is '/':
state = State.CLOSED
else:
state = State.INSIDE
elif state is State.CLOSED:
if c not in ' /\n':
state = State.OUTSIDE
outstr.append(c)
elif c is '/':
state = state.START
if state is State.START:
outstr.append('/')
if state is State.INSIDE:
raise ValueError("Comment not closed")
return ''.join(outstr)
strip_comments = strip_C_comments
class TestCommentParser(unittest.TestCase):
single = "/* comment */"
oneline_after = "def method(self): /* do stuff */"
oneline_before = "/* comment */ def dostuff(self):"
multiline = ('/* big\n'
'comment */\n'
'def dostuff(self):')
multiline2 = ('/* big /*\n'
'comment */\n'
'def dostuff(self):')
ignore_open = '/don\'t strip me bro */'
no_close = '/* i never end'
nested_comment = '/* i /* start */ and end'
two_nested_comments = '/* i /* start */ and end */ twice'
many_comments = '/*one*/ /*two*/ notcomment'
two_multiline = ('/*\n'
'one\n'
'*/\n'
'/*\n'
'two\n'
'*/\n'
'code'
)
def test_many_comments(self):
self.assertEqual(strip_comments(self.many_comments), 'notcomment')
def test_oneline(self):
self.assertEqual(strip_comments(self.single), "")
def test_oneline_after(self):
self.assertEqual(strip_comments(self.oneline_after), 'def method(self): ')
def test_oneline_before(self):
self.assertEqual(strip_comments(self.oneline_before), 'def dostuff(self):')
def test_multiline(self):
self.assertEqual(strip_comments(self.multiline), 'def dostuff(self):')
def test_multiline2(self):
self.assertEqual(strip_comments(self.multiline2), 'def dostuff(self):')
def test_two_multiline(self):
self.assertEqual(strip_comments(self.two_multiline), 'code')
def test_ignore_open(self):
self.assertEqual(strip_comments(self.ignore_open), self.ignore_open)
def test_no_close(self):
with self.assertRaises(ValueError):
strip_comments(self.no_close)
def test_nested_comment(self):
self.assertEqual(strip_comments(self.nested_comment), 'and end')
def test_two_nested_comments(self):
self.assertEqual(strip_comments(self.two_nested_comments), 'and end */ twice')
if __name__ == "__main__":
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment