Created
April 19, 2018 10:56
-
-
Save FernandoBasso/42d0e711802eeda19854a058e670f9f5 to your computer and use it in GitHub Desktop.
python html.parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MyHTMLParser(HTMLParser): | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.isTags = False | |
self.tags = None | |
def handle_starttag(self, tag, attrs): | |
if tag == 'p': | |
if containsAttr(attrs, 'class', 'tags'): | |
self.isTags = True | |
elif isThisX(tag, attrs): | |
self.isX = True | |
elif isThisX(tag, attrs): | |
self.isY | |
else: | |
self.isAdd = False | |
def handle_endtag(self, tag): | |
if tag == 'p': | |
self.isTags = False | |
elif wasX(tag): | |
self.isX = False | |
elif wasY(tag): | |
self.isY = False | |
def handle_data(self, data): | |
if self.isTags: | |
print('text: ', data) | |
elif self.isX: | |
print('X: ', data); | |
elif self.isY: | |
print('Y', data); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment