nsdevaraj/scrap.py

## scrap.py
"""
This script scrapes a Blogspot blog by iterating back in its history.

Usage:
    1. Provide blogspot links
    2. Press CTRL-C when you want to stop it.

Note: Your IP-number may be temporarily banned from the Blogger service if over-used.
Use on your own risk.
"""

import requests
import io
import re
from bs4 import BeautifulSoup
file1 = open('link.txt', 'r')
lines = file1.readlines()
counter = 0

for l in lines:
    counter += 1 #Update the counter from proper filenames
    page = requests.get(l)
    soup = BeautifulSoup(page.content, "html.parser")
    div = soup.find_all("div", {"class": "entry"})
    with open(str(counter) + ".html", "w") as outputfile: #open file
        outputfile.write(str(div)) #write to file
        print("Press CTRL-C to exit the program.")
	"""
	This script scrapes a Blogspot blog by iterating back in its history.

	Usage:
	1. Provide blogspot links
	2. Press CTRL-C when you want to stop it.

	Note: Your IP-number may be temporarily banned from the Blogger service if over-used.
	Use on your own risk.
	"""

	import requests
	import io
	import re
	from bs4 import BeautifulSoup
	file1 = open('link.txt', 'r')
	lines = file1.readlines()
	counter = 0

	for l in lines:
	counter += 1 #Update the counter from proper filenames
	page = requests.get(l)
	soup = BeautifulSoup(page.content, "html.parser")
	div = soup.find_all("div", {"class": "entry"})
	with open(str(counter) + ".html", "w") as outputfile: #open file
	outputfile.write(str(div)) #write to file
	print("Press CTRL-C to exit the program.")