Created
September 16, 2018 21:29
-
-
Save AlexanderAA/3e9b651cc636e162252fd58fd1e6ec4e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# Events - start tag, end tag | |
START = 'start' | |
END = 'end' | |
def regex_lexer(markup, rgx_tag="<(?P<event>/?)(?P<tag>[A-Z])>"): | |
""" Regex-based lexer. | |
Usage: | |
>>> list(regex_lexer("")) | |
[] | |
>>> list(regex_lexer("<A></B>")) | |
[('start', 'A'), ('end', 'B')] | |
:param markup: Input text | |
:type markup: str | |
:param rgx_tag: Regular expression to match a single tag. | |
Must contain named capturing groups "event" and "tag". | |
:type rgx_tag: str | |
:return: [(event, tag)] | |
:rtype: [(str, str)] | |
""" | |
matches = re.finditer(rgx_tag, markup) | |
for match in matches: | |
tag = match.group('tag') | |
event = END if match.group('event') == '/' else START | |
yield event, tag | |
def tag_checker(markup, lexer=regex_lexer): | |
""" Check markup tags correctness, return success or error message | |
:param markup: Input text | |
:type markup: str | |
:param lexer: Lexer callable | |
:return: Error or Success message | |
:rtype: str | |
Usage: | |
>>> tag_checker("") | |
'Correctly tagged paragraph' | |
>>> tag_checker(" ") | |
'Correctly tagged paragraph' | |
>>> tag_checker("<A><B></B></A>") | |
'Correctly tagged paragraph' | |
>>> tag_checker("<A><B></B>") | |
'Expected </A> found #' | |
>>> tag_checker("<A><B></A>") | |
'Expected </B> found </A>' | |
>>> tag_checker("The following text<C><B>is centred and in boldface</B></C>") | |
'Correctly tagged paragraph' | |
>>> tag_checker("<B>This <\g>is <B>boldface</B> in <<*> a</B> <\6> <<d>sentence") | |
'Correctly tagged paragraph' | |
>>> tag_checker("<B><C> This should be centred and in boldface, but the tags are wrongly nested </B></C>") | |
'Expected </C> found </B>' | |
>>> tag_checker("<B>This should be in boldface, but there is an extra closing tag</B></C>") | |
'Expected # found </C>' | |
>>> tag_checker("<B><C>This should be centred and in boldface, but there is a missing closing tag</C>") | |
'Expected </B> found #' | |
""" | |
lexemes = lexer(markup) | |
stack = [] | |
for (event, tag) in lexemes: | |
if event == START: | |
stack.append(tag) | |
elif event == END: | |
if len(stack) == 0: | |
return "Expected # found </{}>".format(tag) | |
expected_tag = stack.pop() | |
if tag != expected_tag: | |
return "Expected </{}> found </{}>".format(expected_tag, tag) | |
if len(stack) > 0: | |
expected_tag = stack.pop() | |
return "Expected </{}> found #".format(expected_tag) | |
return "Correctly tagged paragraph" | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment