Skip to content

Instantly share code, notes, and snippets.

@rajeshnayar
Last active August 29, 2015 14:22
Show Gist options
  • Save rajeshnayar/e715d6ef2a7d6ba47aff to your computer and use it in GitHub Desktop.
Save rajeshnayar/e715d6ef2a7d6ba47aff to your computer and use it in GitHub Desktop.
def main():
from urllib.request import Request, urlopen, urlretrieve
from os.path import isfile
import re
print('** WELCOME **\n\n')
comic_base_url='http://zenpencils.com/comic/'
img_base_url='http://1j74duke02o49spu52ub76g1bzq.wpengine.netdna-cdn.com/wp-content/uploads/'
print('** DOWNLOADING CRUCIAL DATA **')
root_source = urlopen(Request(comic_base_url, headers={'User-Agent': 'Chrome'})).read()
print('** GENERATING LIST OF COMICS **\n\n')
all_comics_url_list=re.search('<option.*/option>',str(root_source)).group(0).split('\\n')
print('** STARTING COMIC DOWNLOADS **\n')
for link in all_comics_url_list:
comic_id=re.search('(%s)(.*)(/\">)'%comic_base_url,link).group(2)
if isfile('ZenPencils/%s.png'%comic_id) or isfile('ZenPencils/%s.gif'%comic_id) or isfile('ZenPencils/%s.jpg'%comic_id):
print('%s already exists --- moving on\n'%comic_id)
continue
print('%s --- begin'%comic_id)
comic_url=comic_base_url+comic_id
comic_source = urlopen(Request(comic_url, headers={'User-Agent': 'Chrome'})).read()
img_url = re.search('(%s)([0-9]+.*)(\.[a-z]+.*)(\" alt=\"\" title=\"\")'%img_base_url, str(comic_source))
img_id=img_url.group(2)
img_format=img_url.group(3)
img_url=img_url.group(1)+img_url.group(2)+img_url.group(3)
urlretrieve(img_url,'ZenPencils/%s%s'%(comic_id,img_format))
print('%s --- done\n'%comic_id)
print('\n** ALL DOWNLOADS FINISHED **')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment