Created
March 7, 2015 13:53
-
-
Save reachtarunhere/eb40c758eeb0e62803eb to your computer and use it in GitHub Desktop.
A simple python script that downloads all comic strips from Garbage Bins website. Makes use of python's urllib and beautiful soup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
from bs4 import BeautifulSoup | |
previous_comic_url = 'http://www.garbagebinstudios.com/comic-strips.html' #the base url | |
while previous_comic_url != 'http://www.garbagebinstudios.com/comic-strips/misc.html': #after the last comic the url shifts to misc | |
response = urllib.urlopen(previous_comic_url.encode('utf-8')) | |
html = response.read() | |
soup = BeautifulSoup(html) | |
comic_tag = soup.find('a', attrs={'class':'cm-image-previewer cm-previewer'}) | |
comic_url = r'http://www.garbagebinstudios.com'+comic_tag['href'] | |
previous_comic_tag = soup.find('div', attrs={'style':'padding-right:20px;float:right;padding-top:7px;'}).a | |
previous_comic_url = 'http://www.garbagebinstudios.com'+previous_comic_tag['href'] | |
image_name = comic_url[comic_url.find('/1/')+3:] | |
print 'Downloading ' + image_name + ' Now' | |
urllib.urlretrieve(comic_url.encode('utf-8'), image_name.encode('utf-8')) | |
print 'Download Complete' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment