Skip to content

Instantly share code, notes, and snippets.

@Rimbo
Created April 7, 2016 17:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Rimbo/b06d28ad693c94824a709088b9da889a to your computer and use it in GitHub Desktop.
Save Rimbo/b06d28ad693c94824a709088b9da889a to your computer and use it in GitHub Desktop.
A script to download the archives of Girl Genius, Buck Godot and BattlePug locally. Designed so you can add your own comics to it. PLEASE SUPPORT THE ARTISTS!
#!/usr/local/bin/python
from bs4 import BeautifulSoup
from urllib2 import urlopen
from urllib import urlretrieve
import argparse
class getComic:
"""Base class for comics
"""
# override these
comicName=None
host=None
firstComic=None
def getImage( self, pgsoup ):
"""override this
"""
return None
def getNext( self, pgsoup ):
"""override this
"""
return None
# Don't override these
def grabPage( self, url ):
pg = urlopen( url )
pgsoup = BeautifulSoup( pg )
imageurl = self.getImage( pgsoup )
nexturl = self.getNext( pgsoup )
return imageurl,nexturl
def getImages( self ):
savename = self.comicName + "_%05d.jpg"
count = 0
nexturl = self.host % self.firstComic
while nexturl:
comic,nexturl = self.grabPage( nexturl )
print "Grabbing " + savename % count
urlretrieve( comic, savename % count )
count+=1
class BattlePug( getComic ):
comicName = "BattlePug"
host = "http://battlepug.com/%s"
firstComic = "/comic/first"
def getImage( self, pgsoup ):
pgcomic=pgsoup.body.find('div',id='comic')
pgpage=pgcomic.find('div','page')
imageurl = pgpage.img['data-image']
return imageurl
def getNext( self, pgsoup ):
pgtrans=pgsoup.body.find('div',id='transport')
pgnext=pgtrans.find_all('a')[3] #next
if pgnext.has_attr('href'):
nexturl=pgnext['href']
else:
nexturl=None
return self.host % nexturl
class BuckGodot( getComic ):
comicName = "Buck Godot"
host = "http://www.airshipentertainment.com/%s"
firstComic = "buckcomic.php?date=20070111"
def getImage( self, pgsoup ):
i = pgsoup.find('img',alt='Comic')
return i['src']
def getNext( self, pgsoup ):
n = pgsoup.find('img',alt='The Next Comic')
return n.parent['href']
class GirlGenius( getComic ):
comicName = "Girl Genius"
host = "http://www.girlgeniusonline.com/%s"
firstComic = "comic.php?date=20021104"
def getImage( self, pgsoup ):
i = pgsoup.find('img',alt='Comic')
return i['src']
def getNext( self, pgsoup ):
n = pgsoup.find('a',title='The Next Comic')
return n['href']
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument( 'comicName', type = str, help = "the comic to download")
args = parser.parse_args()
sparkles = BattlePug()
winslow = BuckGodot()
agatha = GirlGenius()
for bla in [ sparkles, winslow, agatha ]:
if ( args.comicName == bla.comicName ):
bla.getImages()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment