Created
February 10, 2014 17:04
-
-
Save jvanasco/8919972 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import pprint | |
import re | |
test_text = { | |
'test_1' : { | |
'description' : "no change", | |
'input' : """Hello. | |
This is a sample for [doing an inline link][1] and [another inline link][two] | |
[1]: http://123.123.123.123 | |
[two]: http://123.123.123.123""", | |
'output' : """Hello. | |
This is a sample for [doing an inline link][1] and [another inline link][two] | |
[1]: http://123.123.123.123 | |
[two]: http://123.123.123.123""", | |
}, | |
'test_2' : { | |
'description' : "clean items", | |
'input' : """Hello. | |
This is a sample for [doing an inline link][1] and [another inline link][two] | |
[1]: http://123.123.123.123 | |
[two]: http://123.123.123.123""", | |
'output' : """Hello. | |
This is a sample for [doing an inline link][1] and [another inline link][two] | |
[1]: http://123.123.123.123 | |
[two]: http://123.123.123.123""", | |
}, | |
'test_3' : { | |
'description' : "clean items, potential false-positive on the inside", | |
'input' : """Hello. | |
This next stuff should be handled like preformatted text | |
[AAA]: http://123.123.123.123 | |
[BCD]: http://123.123.123.123 | |
Now, this is a sample for [doing an inline link][1] and [another inline link][two] | |
[1]: http://123.123.123.123 | |
[two]: http://123.123.123.123""", | |
'output' : """Hello. | |
This next stuff should be handled like preformatted text | |
[AAA]: http://123.123.123.123 | |
[BCD]: http://123.123.123.123 | |
Now, this is a sample for [doing an inline link][1] and [another inline link][two] | |
[1]: http://123.123.123.123 | |
[two]: http://123.123.123.123""", | |
}, | |
'test_4' : { | |
'description' : "no newline, so leave as-is", | |
'input' : """Hello. | |
This is a sample for [doing an inline link][1] and [another inline link][two] | |
[1]: http://123.123.123.123 | |
[two]: http://123.123.123.123""", | |
'output' : """Hello. | |
This is a sample for [doing an inline link][1] and [another inline link][two] | |
[1]: http://123.123.123.123 | |
[two]: http://123.123.123.123""", | |
}, | |
} | |
RE_MARKDOWN_footnote_A = re.compile(""" | |
(?P<labels_section> | |
(?: ## we must start with an empty / whitepace-only line | |
^\s*$ | |
) | |
\s* ## there can be more whitespace lines | |
(?P<labels> | |
(?P<a_label> | |
^ | |
[\ \t]* ## we could have 0-n spaces or tabs | |
\[ ## BRACKET - open | |
(?P<id> | |
[^\]]+ | |
) | |
\] ## BRACKET - close | |
\s* | |
: ## COLON | |
\s* | |
(?P<link> ## WE want anything here | |
[^$]+ | |
) | |
$ | |
)+ ## multiple labels | |
) | |
\s* ## we might have some empty lines | |
\Z ## ensure the end of document | |
) | |
""",re.VERBOSE|re.I|re.M) | |
RE_MARKDOWN_label_A = re.compile(r'^\s*\[([^^\]]+)\]\s*:\s*(.+)$', re.MULTILINE) | |
""" | |
^[\s]*$ ## we MUST have a blank line | |
^[\s]*$ ## ok, we could have newline here too | |
\z ## match the line end | |
""" | |
class TestRegexSetA(unittest.TestCase): | |
def _cleanup_text( self , text ): | |
text = text.strip() | |
m_section = RE_MARKDOWN_footnote_A.search( text ) | |
if m_section: | |
m_section_dict = m_section.groupdict() | |
labels_section = m_section_dict['labels_section'] | |
m_labels = RE_MARKDOWN_label_A.findall( m_section_dict['labels_section'] ) | |
cleaned = [""] | |
for l in m_labels: | |
cleaned.append( "[%s]: %s" % l ) | |
cleaned = '\n'.join(cleaned) | |
text = text.replace( labels_section , cleaned ) | |
return text.strip() | |
def test_TextTest_1( self ): | |
test = test_text['test_1'] | |
output = self._cleanup_text( test['input'] ) | |
assert( output == test['output'] ) | |
def test_TextTest_2( self ): | |
test = test_text['test_2'] | |
output = self._cleanup_text( test['input'] ) | |
assert( output == test['output'] ) | |
def test_TextTest_3( self ): | |
test = test_text['test_3'] | |
output = self._cleanup_text( test['input'] ) | |
print output | |
assert( output == test['output'] ) | |
def test_TextTest_4( self ): | |
test = test_text['test_4'] | |
output = self._cleanup_text( test['input'] ) | |
assert( output == test['output'] ) | |
RE_MARKDOWN_footnote_B = re.compile(""" | |
(?P<labels_section> | |
(?: | |
^[\s]*$ | |
) | |
(?P<labels> | |
################################ | |
(?: | |
^ # beginning of the line; | |
\s* # may include whitespace | |
\[ # opening bracket | |
(?:[^\]]+) ### our ID | |
\] # closing bracket | |
\s* # optional whitespace | |
: # colon | |
\s* # optional whitespace | |
(?:[^\n]+) # our link is everything up to a new line | |
$ # end of the line | |
[\n]? | |
)+ ### THIS REPEATS 1+ times | |
################################ | |
) | |
\s* ### we could have variable whitespace | |
\Z ### END OF STRING | |
) | |
""",re.VERBOSE|re.I|re.M) | |
class TestRegexSetB(unittest.TestCase): | |
def _cleanup_text( self , text ): | |
text = text.strip() | |
m_section = RE_MARKDOWN_footnote_B.search( text ) | |
if m_section: | |
m_section_dict = m_section.groupdict() | |
labels_section = m_section_dict['labels_section'] | |
m_labels = RE_MARKDOWN_label_A.findall( m_section_dict['labels_section'] ) | |
cleaned = [""] | |
for l in m_labels: | |
cleaned.append( ("[%s]: %s" % l).strip() ) | |
cleaned = '\n'.join(cleaned) | |
text = text.replace( labels_section , cleaned ) | |
return text.strip() | |
def test_TextTest_1( self ): | |
test = test_text['test_1'] | |
output = self._cleanup_text( test['input'] ) | |
assert( output == test['output'] ) | |
def test_TextTest_2( self ): | |
test = test_text['test_2'] | |
output = self._cleanup_text( test['input'] ) | |
assert( output == test['output'] ) | |
def test_TextTest_3( self ): | |
test = test_text['test_3'] | |
output = self._cleanup_text( test['input'] ) | |
assert( output == test['output'] ) | |
def test_TextTest_4( self ): | |
test = test_text['test_4'] | |
output = self._cleanup_text( test['input'] ) | |
assert( output == test['output'] ) | |
if __name__ == '__main__': | |
## init the test loader | |
loader = unittest.TestLoader() | |
suites_list = [] | |
## every test to run on startup... | |
suite = loader.loadTestsFromTestCase(TestRegexSetA) | |
suites_list.append(suite) | |
suite = loader.loadTestsFromTestCase(TestRegexSetB) | |
suites_list.append(suite) | |
## run it | |
big_suite = unittest.TestSuite(suites_list) | |
runner = unittest.TextTestRunner(verbosity=3) | |
results = runner.run(big_suite) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment