Last active
May 16, 2024 17:11
-
-
Save nobkd/322e96fe741cafcbc9babcf2f75a0139 to your computer and use it in GitHub Desktop.
Python ^3.10: Download all xkcd comics and comic data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from get_info import save_infos | |
from get_imgs import save_imgs | |
if __name__ == '__main__': | |
pth = 'xkcd_save' | |
save_infos(pth) | |
print() | |
save_imgs(pth) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from os import path, listdir | |
from multiprocessing import Pool | |
import json | |
import re | |
def load_infos(save_dir: str) -> list[str]: | |
files = listdir(save_dir) | |
json_files = [] | |
for f in files: | |
if re.match('.*\.json$', f): | |
json_files.append(f) | |
return json_files | |
def save_img(save_dir: str, file_path: str) -> None: | |
pth = path.join(save_dir, file_path) | |
with open(pth, 'rt') as fp: | |
json_data = json.load(fp) | |
num = json_data["num"] | |
img = json_data["img"] | |
save_path = path.join(save_dir, f'{num}.{img.split(".")[-1]}') | |
if path.exists(save_path): | |
#print('Exists:', num) | |
return | |
data = requests.get(img, stream=True) | |
if not data.ok: | |
print('Not OK:', num) | |
return | |
print('Saving:', num) | |
with open(save_path, 'wb') as f: | |
for chunk in data.iter_content(chunk_size=128): | |
f.write(chunk) | |
print('Done:', num) | |
def save_imgs(save_dir: str) -> None: | |
print('Checking Images...') | |
imgs_to_save = load_infos(save_dir) | |
pool = Pool() | |
for img_info in imgs_to_save: | |
pool.apply_async(save_img, (save_dir, img_info)) | |
pool.close() | |
pool.join() | |
if __name__ == '__main__': | |
pth = 'xkcd_save' | |
save_imgs(pth) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from os import mkdir, path | |
from multiprocessing import Pool | |
import json | |
BASE_URL = 'https://xkcd.com/' | |
JSON_URL = 'info.0.json' | |
def last_comic_num() -> int: | |
return json.loads(requests.get(BASE_URL + JSON_URL).content)['num'] | |
def save_info(save_path: str, num: int) -> None: | |
pth = path.join(save_path, f'{num}.json') | |
if path.exists(pth): | |
#print('Exists:', num) | |
return | |
data = requests.get(f'{BASE_URL}{num}/{JSON_URL}', stream=True) | |
if not data.ok: | |
print('Not OK:', num) | |
return | |
print('Saving:', num) | |
with open(pth, 'wb') as f: | |
for chunk in data.iter_content(chunk_size=128): | |
f.write(chunk) | |
print('Done:', num) | |
def save_infos(save_path: str) -> None: | |
print('Checking Info...') | |
if not path.exists(save_path): | |
mkdir(save_path) | |
last_num = last_comic_num() | |
print('Latest Comic:', last_num) | |
pool = Pool() | |
for i in range(1, last_num + 1): | |
pool.apply_async(save_info, (save_path, i)) | |
pool.close() | |
pool.join() | |
if __name__ == '__main__': | |
pth = 'xkcd_save' | |
save_infos(pth) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment