Skip to content

Instantly share code, notes, and snippets.

@GuyMicciche
Created August 17, 2023 20:59
Show Gist options
  • Save GuyMicciche/0b876460a0173086c8a744a0f99b11d4 to your computer and use it in GitHub Desktop.
Save GuyMicciche/0b876460a0173086c8a744a0f99b11d4 to your computer and use it in GitHub Desktop.
# Extract the content of the second tabContent div from the list of all tabContent contents
from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
def __init__(self):
super().__init__()
self.recording = False
self.current_data = []
self.all_tabContent_contents = []
def handle_starttag(self, tag, attrs):
if tag == 'div':
for name, value in attrs:
if name == 'class' and 'tabContent' in value.split():
self.recording = True
self.current_data = [] # Clear current data for new tabContent div
break
def handle_endtag(self, tag):
if self.recording and tag == 'div':
self.recording = False
self.all_tabContent_contents.append(''.join(self.current_data).strip())
def handle_data(self, data):
if self.recording:
self.current_data.append(data)
parser = MyHTMLParser()
parser.feed(inputData['htmlContent'])
# Extract the content of the second tabContent div from the list of all tabContent contents
extractedText = parser.all_tabContent_contents[1] if len(parser.all_tabContent_contents) > 1 else 'Not Found'
return {'extractedText': extractedText}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment