Created
April 7, 2016 17:15
-
-
Save Rimbo/b06d28ad693c94824a709088b9da889a to your computer and use it in GitHub Desktop.
A script to download the archives of Girl Genius, Buck Godot and BattlePug locally. Designed so you can add your own comics to it. PLEASE SUPPORT THE ARTISTS!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
from bs4 import BeautifulSoup | |
from urllib2 import urlopen | |
from urllib import urlretrieve | |
import argparse | |
class getComic: | |
"""Base class for comics | |
""" | |
# override these | |
comicName=None | |
host=None | |
firstComic=None | |
def getImage( self, pgsoup ): | |
"""override this | |
""" | |
return None | |
def getNext( self, pgsoup ): | |
"""override this | |
""" | |
return None | |
# Don't override these | |
def grabPage( self, url ): | |
pg = urlopen( url ) | |
pgsoup = BeautifulSoup( pg ) | |
imageurl = self.getImage( pgsoup ) | |
nexturl = self.getNext( pgsoup ) | |
return imageurl,nexturl | |
def getImages( self ): | |
savename = self.comicName + "_%05d.jpg" | |
count = 0 | |
nexturl = self.host % self.firstComic | |
while nexturl: | |
comic,nexturl = self.grabPage( nexturl ) | |
print "Grabbing " + savename % count | |
urlretrieve( comic, savename % count ) | |
count+=1 | |
class BattlePug( getComic ): | |
comicName = "BattlePug" | |
host = "http://battlepug.com/%s" | |
firstComic = "/comic/first" | |
def getImage( self, pgsoup ): | |
pgcomic=pgsoup.body.find('div',id='comic') | |
pgpage=pgcomic.find('div','page') | |
imageurl = pgpage.img['data-image'] | |
return imageurl | |
def getNext( self, pgsoup ): | |
pgtrans=pgsoup.body.find('div',id='transport') | |
pgnext=pgtrans.find_all('a')[3] #next | |
if pgnext.has_attr('href'): | |
nexturl=pgnext['href'] | |
else: | |
nexturl=None | |
return self.host % nexturl | |
class BuckGodot( getComic ): | |
comicName = "Buck Godot" | |
host = "http://www.airshipentertainment.com/%s" | |
firstComic = "buckcomic.php?date=20070111" | |
def getImage( self, pgsoup ): | |
i = pgsoup.find('img',alt='Comic') | |
return i['src'] | |
def getNext( self, pgsoup ): | |
n = pgsoup.find('img',alt='The Next Comic') | |
return n.parent['href'] | |
class GirlGenius( getComic ): | |
comicName = "Girl Genius" | |
host = "http://www.girlgeniusonline.com/%s" | |
firstComic = "comic.php?date=20021104" | |
def getImage( self, pgsoup ): | |
i = pgsoup.find('img',alt='Comic') | |
return i['src'] | |
def getNext( self, pgsoup ): | |
n = pgsoup.find('a',title='The Next Comic') | |
return n['href'] | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument( 'comicName', type = str, help = "the comic to download") | |
args = parser.parse_args() | |
sparkles = BattlePug() | |
winslow = BuckGodot() | |
agatha = GirlGenius() | |
for bla in [ sparkles, winslow, agatha ]: | |
if ( args.comicName == bla.comicName ): | |
bla.getImages() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment