Last active
November 28, 2022 21:34
-
-
Save aquaerius/9d7990be1338044869df9380eb4d1118 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!usr/bin/python3 | |
#multidownload_xkcd.py - Multi Threaded image download | |
import requests, os, bs4, threading | |
import logging | |
#store comics in ./xkcd | |
os.makedirs('xkcd', exist_ok=True) | |
os.chdir("xkcd") | |
logging.basicConfig(filename='multidownloadXkcdLog.txt', level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s') | |
def downloadXkcd(startComic, endComic): | |
try: | |
for urlNumber in range(startComic, endComic): | |
#Download the page | |
print('Downloading page http://xkcd.com/%s...' %(urlNumber)) | |
res = requests.get('http://xkcd.com/%s'%(urlNumber)) | |
res.raise_for_status() | |
soup = bs4.BeautifulSoup(res.text, 'lxml') | |
#Find the url of the comic image | |
comicElem = soup.select('#comic img') | |
if comicElem == []: | |
print('Could not find comic image.') | |
else: | |
comicUrl = "/".join([res.url.rstrip("/"),comicElem[0].get('src').lstrip("/")]) | |
#Download the image. | |
print('Downloading image %s...' % (comicUrl)) | |
res = requests.get(comicUrl) | |
res.raise_for_status() | |
#Save te image to ./xkcd | |
imageFile = open("-".join([str(urlNumber),os.path.basename(comicUrl)]), 'wb') | |
for chunk in res.iter_content(100000): | |
imageFile.write(chunk) | |
imageFile.close() | |
except Exception as e: | |
logging.error(str(e)) | |
#Create and start Thread objects. | |
downloadThreads = [] #A list of all Thread objects. | |
for i in range(1, 10, 1): #Loops 14 times, creates 14 threads | |
downloadThread = threading.Thread(target=downloadXkcd, args=(i, i+99)) | |
downloadThreads.append(downloadThread) | |
try: | |
downloadThread.start() | |
except Exception as e: | |
print(e) | |
#Wait for all threads to end. | |
for downloadThread in downloadThreads: | |
downloadThread.join() | |
print('Done.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Downloads all XKCD comic images in multiple threads.