Skip to content

Instantly share code, notes, and snippets.

@vgmoose
Created September 7, 2013 19:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vgmoose/6478345 to your computer and use it in GitHub Desktop.
Save vgmoose/6478345 to your computer and use it in GitHub Desktop.
accepts <foo<bar> input as valid html and doesn't throw malformed start tag
def check_for_whole_start_tag(self, i):
rawdata = self.rawdata
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:\s+ # whitespace before attribute name
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
(?:\s*=\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|\"[^\"]*\" # LIT-enclosed value
|[^'\">\s]+ # bare value
)
)?
)
)*
\s* # trailing whitespace
""", re.VERBOSE)
m = locatestarttagend.match(rawdata, i)
if m:
j = m.end()
next = rawdata[j:j+1]
if next == ">":
return j + 1
if next == "/":
if rawdata.startswith("/>", j):
return j + 2
if rawdata.startswith("/", j):
# buffer boundary
return -1
# else bogus input
self.updatepos(i, j + 1)
self.error("malformed empty start tag")
if next == "":
# end of input
return -1
if next in ("abcdefghijklmnopqrstuvwxyz=/"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
# end of input in or before attribute value, or we have the
# '/' from a '/>' ending
return -1
self.updatepos(i, j)
# start of new code
if (next == "<"):
return -1
# end of new code
self.error("malformed start tag")
raise AssertionError("we should not get here!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment