kade-robertson/archive-xkcd.py

## archive-xkcd.py
#!/usr/bin/env python3

import os
import sys
import json
import shutil
import datetime
import requests

class Comic(object):
    def __init__(self, data):
        for k, v in data.items():
            self.__dict__[k] = v
            if k in ('day', 'month', 'year'):
                self.__dict__[k] = int(v)
    def __str__(self):
        pdate = datetime.date(year=self.year, month=self.month, day=self.day)
        out = 'Title: {}\n'.format(self.title)
        out += 'Index: {}\n'.format(self.num)
        out += 'Date: {}\n'.format(pdate.strftime('%Y-%m-%d'))
        out += 'Alt-Text: {}\n'.format(self.alt)
        out += 'Permalink: https://xkcd.com/{}/\n'.format(self.num)
        out += 'Image link: {}\n\n'.format(self.img)
        out += 'Transcript\n----------\n\n{}'.format(self.transcript)
        return out
    def __repr__(self):
        return json.dumps(self.__dict__, indent = 4)
    def filename(self, ext=''):
        safer = self.safe_title.replace('?', '').replace('/', '-')
        return '{:04d}-{}{}'.format(self.num, safer, ext)

base_url = r'https://xkcd.com/{0}/info.0.json'
base_dir = os.path.join(os.getcwd(), 'xkcd')
meta_dir = os.path.join(base_dir, 'metadata')

if not os.path.isdir(base_dir):
    print('Creating xkcd dir at {}'.format(base_dir))
    os.makedirs(base_dir)
if not os.path.isdir(meta_dir):
    print('Creating metadata dir at {}'.format(meta_dir))
    os.makedirs(meta_dir)

with requests.Session() as sess:
    comic = 1 if len(sys.argv) == 1 else int(sys.argv[1])
    if comic == 404:
        comic += 1
    cdata = sess.get(base_url.format(comic))
    print('Starting from comic #{}'.format(comic))
    while cdata.status_code != 404:
        com = Comic(cdata.json())
        print('Archiving {}'.format(com.filename()))
        imgd = sess.get(com.img, stream=True)
        with open(os.path.join(base_dir, com.filename('.jpg')), 'wb') as imgf:
            shutil.copyfileobj(imgd.raw, imgf)
        with open(os.path.join(meta_dir, com.filename('.txt')), 'w', encoding='utf-8') as plainf:
            plainf.write(str(com))
        with open(os.path.join(meta_dir, com.filename('.json')), 'w', encoding='utf-8') as jsonf:
            jsonf.write(repr(com))
        comic += 1
        if comic == 404:
            comic += 1
        cdata = sess.get(base_url.format(comic))
    print('Archiving complete.')
	#!/usr/bin/env python3

	import os
	import sys
	import json
	import shutil
	import datetime
	import requests

	class Comic(object):
	def __init__(self, data):
	for k, v in data.items():
	self.__dict__[k] = v
	if k in ('day', 'month', 'year'):
	self.__dict__[k] = int(v)
	def __str__(self):
	pdate = datetime.date(year=self.year, month=self.month, day=self.day)
	out = 'Title: {}\n'.format(self.title)
	out += 'Index: {}\n'.format(self.num)
	out += 'Date: {}\n'.format(pdate.strftime('%Y-%m-%d'))
	out += 'Alt-Text: {}\n'.format(self.alt)
	out += 'Permalink: https://xkcd.com/{}/\n'.format(self.num)
	out += 'Image link: {}\n\n'.format(self.img)
	out += 'Transcript\n----------\n\n{}'.format(self.transcript)
	return out
	def __repr__(self):
	return json.dumps(self.__dict__, indent = 4)
	def filename(self, ext=''):
	safer = self.safe_title.replace('?', '').replace('/', '-')
	return '{:04d}-{}{}'.format(self.num, safer, ext)

	base_url = r'https://xkcd.com/{0}/info.0.json'
	base_dir = os.path.join(os.getcwd(), 'xkcd')
	meta_dir = os.path.join(base_dir, 'metadata')

	if not os.path.isdir(base_dir):
	print('Creating xkcd dir at {}'.format(base_dir))
	os.makedirs(base_dir)
	if not os.path.isdir(meta_dir):
	print('Creating metadata dir at {}'.format(meta_dir))
	os.makedirs(meta_dir)

	with requests.Session() as sess:
	comic = 1 if len(sys.argv) == 1 else int(sys.argv[1])
	if comic == 404:
	comic += 1
	cdata = sess.get(base_url.format(comic))
	print('Starting from comic #{}'.format(comic))
	while cdata.status_code != 404:
	com = Comic(cdata.json())
	print('Archiving {}'.format(com.filename()))
	imgd = sess.get(com.img, stream=True)
	with open(os.path.join(base_dir, com.filename('.jpg')), 'wb') as imgf:
	shutil.copyfileobj(imgd.raw, imgf)
	with open(os.path.join(meta_dir, com.filename('.txt')), 'w', encoding='utf-8') as plainf:
	plainf.write(str(com))
	with open(os.path.join(meta_dir, com.filename('.json')), 'w', encoding='utf-8') as jsonf:
	jsonf.write(repr(com))
	comic += 1
	if comic == 404:
	comic += 1
	cdata = sess.get(base_url.format(comic))
	print('Archiving complete.')