Skip to content

Instantly share code, notes, and snippets.

@Atrion
Last active December 28, 2020 23:17
Show Gist options
  • Save Atrion/a29748edbdf5aff933de839d3f997056 to your computer and use it in GitHub Desktop.
Save Atrion/a29748edbdf5aff933de839d3f997056 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Wed May 20 23:20:53 2020
@author: mazdoc, kjerk
"""
### https://www.reddit.com/r/DataHoarder/comments/gnj9qc/gocomics_universal_downloader_python_script/
### Comic name comes from the URL and needs to be exactly as it is in the URL
### You should also supply the first date of the comic.
### Calvin and Hobbes Started on November 18, 1985 ie 1985-11-18
### foxtrot/1988/04/11
import datetime
import os
import requests
import time
# Config
base_url = 'https://www.gocomics.com/{}/{}/{}/{}'
comic_name = 'calvinandhobbes'
save_dir = './comics/'+comic_name
first_date = '1985-11-18'
requestWaitMs = 500 # Be kind when scraping
# Init Save Dir
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
file_list = os.listdir(save_dir)
file_count = len(file_list)
date_cursor = datetime.date(int(first_date[0:4]), int(first_date[5:7]), int(first_date[8:10]))
if file_count != 0:
last_file_name = os.path.basename(file_list[len(file_list) - 1])
date_cursor = datetime.date.fromisoformat(os.path.splitext(last_file_name)[0])
print("Starting from checkpoint: {}".format(date_cursor))
now = datetime.datetime.now()
while date_cursor <= now.date():
save_file_name = '{}.gif'.format(date_cursor)
print("Saving: {}".format(save_file_name))
url = base_url.format(comic_name,date_cursor.year,date_cursor.month,date_cursor.day)
r = requests.get(url, allow_redirects=True)
loc = int(r.text.find('https://assets.amuniversal.com/'))
imgurl = r.text[loc:loc+63]
imgr = requests.get(imgurl, allow_redirects=True)
with open('{}/{}'.format(save_dir, save_file_name), 'wb') as fh:
fh.write(imgr.content)
date_cursor += datetime.timedelta(days=1)
time.sleep(requestWaitMs / 1000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment