Skip to content

Instantly share code, notes, and snippets.

@aquaerius
Last active November 28, 2022 21:34
Show Gist options
  • Save aquaerius/9d7990be1338044869df9380eb4d1118 to your computer and use it in GitHub Desktop.
Save aquaerius/9d7990be1338044869df9380eb4d1118 to your computer and use it in GitHub Desktop.
#!usr/bin/python3
#multidownload_xkcd.py - Multi Threaded image download
import requests, os, bs4, threading
import logging
#store comics in ./xkcd
os.makedirs('xkcd', exist_ok=True)
os.chdir("xkcd")
logging.basicConfig(filename='multidownloadXkcdLog.txt', level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
def downloadXkcd(startComic, endComic):
try:
for urlNumber in range(startComic, endComic):
#Download the page
print('Downloading page http://xkcd.com/%s...' %(urlNumber))
res = requests.get('http://xkcd.com/%s'%(urlNumber))
res.raise_for_status()
soup = bs4.BeautifulSoup(res.text, 'lxml')
#Find the url of the comic image
comicElem = soup.select('#comic img')
if comicElem == []:
print('Could not find comic image.')
else:
comicUrl = "/".join([res.url.rstrip("/"),comicElem[0].get('src').lstrip("/")])
#Download the image.
print('Downloading image %s...' % (comicUrl))
res = requests.get(comicUrl)
res.raise_for_status()
#Save te image to ./xkcd
imageFile = open("-".join([str(urlNumber),os.path.basename(comicUrl)]), 'wb')
for chunk in res.iter_content(100000):
imageFile.write(chunk)
imageFile.close()
except Exception as e:
logging.error(str(e))
#Create and start Thread objects.
downloadThreads = [] #A list of all Thread objects.
for i in range(1, 10, 1): #Loops 14 times, creates 14 threads
downloadThread = threading.Thread(target=downloadXkcd, args=(i, i+99))
downloadThreads.append(downloadThread)
try:
downloadThread.start()
except Exception as e:
print(e)
#Wait for all threads to end.
for downloadThread in downloadThreads:
downloadThread.join()
print('Done.')
@aquaerius
Copy link
Author

Downloads all XKCD comic images in multiple threads.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment