mahmudahsan/parse_soup_to_simple_html.py

## parse_soup_to_simple_html.py
def parse_soup_to_simple_html(self):
        news_list = self.__soup.find_all(['h1', 'h2']) # h1

        #print (news_list)

        htmltext = '''
<html>
    <head><title>Simple News Link Scrapper</title></head>
    <body>
        {NEWS_LINKS}
    </body>
</html>
'''

        news_links = '<ol>'

        for tag in news_list:
            if tag.parent.get('href'):
                # print (self.__url + tag.parent.get('href'), tag.string)
                link  = self.__url + tag.parent.get('href')
                title = tag.string
                news_links += "<li><a href='{}' target='_blank'>{}</a></li>\n".format(link, title)

        news_links += '</ol>'
        htmltext = htmltext.format(NEWS_LINKS=news_links)

        # print(htmltext)
        self.write_webpage_as_html(filepath="html/simplenews.html", data=htmltext.encode())
	def parse_soup_to_simple_html(self):
	news_list = self.__soup.find_all(['h1', 'h2']) # h1

	#print (news_list)

	htmltext = '''
	<html>
	<head><title>Simple News Link Scrapper</title></head>
	<body>
	{NEWS_LINKS}
	</body>
	</html>
	'''

	news_links = '<ol>'

	for tag in news_list:
	if tag.parent.get('href'):
	# print (self.__url + tag.parent.get('href'), tag.string)
	link = self.__url + tag.parent.get('href')
	title = tag.string
	news_links += "<li><a href='{}' target='_blank'>{}</a></li>\n".format(link, title)

	news_links += '</ol>'
	htmltext = htmltext.format(NEWS_LINKS=news_links)

	# print(htmltext)
	self.write_webpage_as_html(filepath="html/simplenews.html", data=htmltext.encode())