Instantly share code, notes, and snippets.

What would you like to do?
def parse_soup_to_simple_html(self):
news_list = self.__soup.find_all(['h1', 'h2']) # h1
#print (news_list)
htmltext = '''
<head><title>Simple News Link Scrapper</title></head>
news_links = '<ol>'
for tag in news_list:
if tag.parent.get('href'):
# print (self.__url + tag.parent.get('href'), tag.string)
link = self.__url + tag.parent.get('href')
title = tag.string
news_links += "<li><a href='{}' target='_blank'>{}</a></li>\n".format(link, title)
news_links += '</ol>'
htmltext = htmltext.format(NEWS_LINKS=news_links)
# print(htmltext)
self.write_webpage_as_html(filepath="html/simplenews.html", data=htmltext.encode())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment