jvanasco/test_markdown_cleanup.py

## test_markdown_cleanup.py
import unittest
import pprint
import re


test_text = {

    'test_1' : {
        'description' : "no change",
        'input' : """Hello.

This is a sample for [doing an inline link][1] and [another inline link][two]

[1]: http://123.123.123.123
[two]: http://123.123.123.123""",
        'output' : """Hello.

This is a sample for [doing an inline link][1] and [another inline link][two]

[1]: http://123.123.123.123
[two]: http://123.123.123.123""",
    },


    'test_2' : {
        'description' : "clean items",
        'input' : """Hello.

This is a sample for [doing an inline link][1] and [another inline link][two]

    [1]: http://123.123.123.123
  [two]:               http://123.123.123.123""",
        'output' : """Hello.

This is a sample for [doing an inline link][1] and [another inline link][two]

[1]: http://123.123.123.123
[two]: http://123.123.123.123""",
    },


    'test_3' : {
        'description' : "clean items, potential false-positive on the inside",
        'input' : """Hello.

This next stuff should be handled like preformatted text

    [AAA]: http://123.123.123.123
    [BCD]:               http://123.123.123.123

Now, this is a sample for [doing an inline link][1] and [another inline link][two]

    [1]: http://123.123.123.123
  [two]:               http://123.123.123.123""",
        'output' : """Hello.

This next stuff should be handled like preformatted text

    [AAA]: http://123.123.123.123
    [BCD]:               http://123.123.123.123

Now, this is a sample for [doing an inline link][1] and [another inline link][two]

[1]: http://123.123.123.123
[two]: http://123.123.123.123""",
    },


    'test_4' : {
        'description' : "no newline, so leave as-is",
        'input' : """Hello.
This is a sample for [doing an inline link][1] and [another inline link][two]
    [1]: http://123.123.123.123
  [two]:               http://123.123.123.123""",
        'output' : """Hello.
This is a sample for [doing an inline link][1] and [another inline link][two]
    [1]: http://123.123.123.123
  [two]:               http://123.123.123.123""",
    },

}


RE_MARKDOWN_footnote_A = re.compile("""
    (?P<labels_section>
        (?:                            ## we must start with an empty / whitepace-only line
            ^\s*$
        )
        \s*                             ## there can be more whitespace lines
        (?P<labels>
            (?P<a_label>
                ^
                    [\ \t]*                     ## we could have 0-n spaces or tabs
                    \[                          ## BRACKET - open
                        (?P<id>
                            [^\]]+
                        )
                    \]                          ## BRACKET - close
                    \s*
                    :                           ## COLON
                    \s*
                    (?P<link>                   ## WE want anything here
                        [^$]+
                    )
                $
            )+                                  ## multiple labels
        )
        \s*                                     ## we might have some empty lines
        \Z                                      ## ensure the end of document
    )
""",re.VERBOSE|re.I|re.M)

RE_MARKDOWN_label_A = re.compile(r'^\s*\[([^^\]]+)\]\s*:\s*(.+)$', re.MULTILINE)


"""
        ^[\s]*$         ## we MUST have a blank line
        ^[\s]*$         ## ok, we could have  newline here too
       \z               ## match the line end

"""


class TestRegexSetA(unittest.TestCase):

    def _cleanup_text( self , text ):
        text = text.strip()
        m_section = RE_MARKDOWN_footnote_A.search( text )
        if m_section:
            m_section_dict = m_section.groupdict()
            labels_section = m_section_dict['labels_section']
            m_labels = RE_MARKDOWN_label_A.findall( m_section_dict['labels_section'] )
            cleaned = [""]
            for l in m_labels:
                cleaned.append( "[%s]: %s" % l )
            cleaned = '\n'.join(cleaned)
            text = text.replace( labels_section , cleaned )
        return text.strip()


    def test_TextTest_1( self ):
        test = test_text['test_1']
        output = self._cleanup_text( test['input'] )
        assert( output == test['output'] )


    def test_TextTest_2( self ):
        test = test_text['test_2']
        output = self._cleanup_text( test['input'] )
        assert( output == test['output'] )

    def test_TextTest_3( self ):
        test = test_text['test_3']
        output = self._cleanup_text( test['input'] )
        print output
        assert( output == test['output'] )


    def test_TextTest_4( self ):
        test = test_text['test_4']
        output = self._cleanup_text( test['input'] )
        assert( output == test['output'] )


RE_MARKDOWN_footnote_B = re.compile("""
    (?P<labels_section>
        (?:
            ^[\s]*$
        )
        (?P<labels>
        ################################
            (?:
                ^                   # beginning of the line;
                  \s*               # may include whitespace
                  \[                # opening bracket
                     (?:[^\]]+)     ### our ID
                  \]                # closing bracket
                \s*                 # optional whitespace
                  :                 # colon
                \s*                 # optional whitespace
                  (?:[^\n]+)        # our link is everything up to a new line
                $                   # end of the line
                [\n]?
            )+                      ### THIS REPEATS 1+ times
        ################################
        )
        \s*                         ### we could have variable whitespace
        \Z                          ### END OF STRING
    )
""",re.VERBOSE|re.I|re.M)


class TestRegexSetB(unittest.TestCase):

    def _cleanup_text( self , text ):
        text = text.strip()
        m_section = RE_MARKDOWN_footnote_B.search( text )
        if m_section:
            m_section_dict = m_section.groupdict()
            labels_section = m_section_dict['labels_section']
            m_labels = RE_MARKDOWN_label_A.findall( m_section_dict['labels_section'] )
            cleaned = [""]
            for l in m_labels:
                cleaned.append( ("[%s]: %s" % l).strip() )
            cleaned = '\n'.join(cleaned)
            text = text.replace( labels_section , cleaned )
        return text.strip()


    def test_TextTest_1( self ):
        test = test_text['test_1']
        output = self._cleanup_text( test['input'] )
        assert( output == test['output'] )


    def test_TextTest_2( self ):
        test = test_text['test_2']
        output = self._cleanup_text( test['input'] )
        assert( output == test['output'] )

    def test_TextTest_3( self ):
        test = test_text['test_3']
        output = self._cleanup_text( test['input'] )
        assert( output == test['output'] )

    def test_TextTest_4( self ):
        test = test_text['test_4']
        output = self._cleanup_text( test['input'] )
        assert( output == test['output'] )


if __name__ == '__main__':

    ## init the test loader
    loader = unittest.TestLoader()
    suites_list = []

    ## every test to run on startup...
    suite = loader.loadTestsFromTestCase(TestRegexSetA)
    suites_list.append(suite)

    suite = loader.loadTestsFromTestCase(TestRegexSetB)
    suites_list.append(suite)

    ## run it
    big_suite = unittest.TestSuite(suites_list)
    runner = unittest.TextTestRunner(verbosity=3)
    results = runner.run(big_suite)
	import unittest
	import pprint
	import re



	test_text = {

	'test_1' : {
	'description' : "no change",
	'input' : """Hello.

	This is a sample for [doing an inline link][1] and [another inline link][two]

	[1]: http://123.123.123.123
	[two]: http://123.123.123.123""",
	'output' : """Hello.

	This is a sample for [doing an inline link][1] and [another inline link][two]

	[1]: http://123.123.123.123
	[two]: http://123.123.123.123""",
	},


	'test_2' : {
	'description' : "clean items",
	'input' : """Hello.

	This is a sample for [doing an inline link][1] and [another inline link][two]

	[1]: http://123.123.123.123
	[two]: http://123.123.123.123""",
	'output' : """Hello.

	This is a sample for [doing an inline link][1] and [another inline link][two]

	[1]: http://123.123.123.123
	[two]: http://123.123.123.123""",
	},


	'test_3' : {
	'description' : "clean items, potential false-positive on the inside",
	'input' : """Hello.

	This next stuff should be handled like preformatted text

	[AAA]: http://123.123.123.123
	[BCD]: http://123.123.123.123

	Now, this is a sample for [doing an inline link][1] and [another inline link][two]

	[1]: http://123.123.123.123
	[two]: http://123.123.123.123""",
	'output' : """Hello.

	This next stuff should be handled like preformatted text

	[AAA]: http://123.123.123.123
	[BCD]: http://123.123.123.123

	Now, this is a sample for [doing an inline link][1] and [another inline link][two]

	[1]: http://123.123.123.123
	[two]: http://123.123.123.123""",
	},


	'test_4' : {
	'description' : "no newline, so leave as-is",
	'input' : """Hello.
	This is a sample for [doing an inline link][1] and [another inline link][two]
	[1]: http://123.123.123.123
	[two]: http://123.123.123.123""",
	'output' : """Hello.
	This is a sample for [doing an inline link][1] and [another inline link][two]
	[1]: http://123.123.123.123
	[two]: http://123.123.123.123""",
	},

	}




	RE_MARKDOWN_footnote_A = re.compile("""
	(?P<labels_section>
	(?: ## we must start with an empty / whitepace-only line
	^\s*$
	)
	\s* ## there can be more whitespace lines
	(?P<labels>
	(?P<a_label>
	^
	[\ \t]* ## we could have 0-n spaces or tabs
	\[ ## BRACKET - open
	(?P<id>
	[^\]]+
	)
	\] ## BRACKET - close
	\s*
	: ## COLON
	\s*
	(?P<link> ## WE want anything here
	[^$]+
	)
	$
	)+ ## multiple labels
	)
	\s* ## we might have some empty lines
	\Z ## ensure the end of document
	)
	""",re.VERBOSE\|re.I\|re.M)

	RE_MARKDOWN_label_A = re.compile(r'^\s\[([^^\]]+)\]\s:\s*(.+)$', re.MULTILINE)


	"""
	^[\s]*$ ## we MUST have a blank line
	^[\s]*$ ## ok, we could have newline here too
	\z ## match the line end

	"""


	class TestRegexSetA(unittest.TestCase):

	def _cleanup_text( self , text ):
	text = text.strip()
	m_section = RE_MARKDOWN_footnote_A.search( text )
	if m_section:
	m_section_dict = m_section.groupdict()
	labels_section = m_section_dict['labels_section']
	m_labels = RE_MARKDOWN_label_A.findall( m_section_dict['labels_section'] )
	cleaned = [""]
	for l in m_labels:
	cleaned.append( "[%s]: %s" % l )
	cleaned = '\n'.join(cleaned)
	text = text.replace( labels_section , cleaned )
	return text.strip()


	def test_TextTest_1( self ):
	test = test_text['test_1']
	output = self._cleanup_text( test['input'] )
	assert( output == test['output'] )


	def test_TextTest_2( self ):
	test = test_text['test_2']
	output = self._cleanup_text( test['input'] )
	assert( output == test['output'] )

	def test_TextTest_3( self ):
	test = test_text['test_3']
	output = self._cleanup_text( test['input'] )
	print output
	assert( output == test['output'] )


	def test_TextTest_4( self ):
	test = test_text['test_4']
	output = self._cleanup_text( test['input'] )
	assert( output == test['output'] )










	RE_MARKDOWN_footnote_B = re.compile("""
	(?P<labels_section>
	(?:
	^[\s]*$
	)
	(?P<labels>
	################################
	(?:
	^ # beginning of the line;
	\s* # may include whitespace
	\[ # opening bracket
	(?:[^\]]+) ### our ID
	\] # closing bracket
	\s* # optional whitespace
	: # colon
	\s* # optional whitespace
	(?:[^\n]+) # our link is everything up to a new line
	$ # end of the line
	[\n]?
	)+ ### THIS REPEATS 1+ times
	################################
	)
	\s* ### we could have variable whitespace
	\Z ### END OF STRING
	)
	""",re.VERBOSE\|re.I\|re.M)



	class TestRegexSetB(unittest.TestCase):

	def _cleanup_text( self , text ):
	text = text.strip()
	m_section = RE_MARKDOWN_footnote_B.search( text )
	if m_section:
	m_section_dict = m_section.groupdict()
	labels_section = m_section_dict['labels_section']
	m_labels = RE_MARKDOWN_label_A.findall( m_section_dict['labels_section'] )
	cleaned = [""]
	for l in m_labels:
	cleaned.append( ("[%s]: %s" % l).strip() )
	cleaned = '\n'.join(cleaned)
	text = text.replace( labels_section , cleaned )
	return text.strip()


	def test_TextTest_1( self ):
	test = test_text['test_1']
	output = self._cleanup_text( test['input'] )
	assert( output == test['output'] )


	def test_TextTest_2( self ):
	test = test_text['test_2']
	output = self._cleanup_text( test['input'] )
	assert( output == test['output'] )

	def test_TextTest_3( self ):
	test = test_text['test_3']
	output = self._cleanup_text( test['input'] )
	assert( output == test['output'] )

	def test_TextTest_4( self ):
	test = test_text['test_4']
	output = self._cleanup_text( test['input'] )
	assert( output == test['output'] )


	if __name__ == '__main__':

	## init the test loader
	loader = unittest.TestLoader()
	suites_list = []

	## every test to run on startup...
	suite = loader.loadTestsFromTestCase(TestRegexSetA)
	suites_list.append(suite)

	suite = loader.loadTestsFromTestCase(TestRegexSetB)
	suites_list.append(suite)

	## run it
	big_suite = unittest.TestSuite(suites_list)
	runner = unittest.TextTestRunner(verbosity=3)
	results = runner.run(big_suite)