Last active
April 1, 2019 20:59
-
-
Save timbledum/701202fdef399ab5462fb6f607196544 to your computer and use it in GitHub Desktop.
For the fam here: https://www.reddit.com/r/learnpython/comments/b85e0l/despite_running_pip_install_requests_and_pip3/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# downloadXkcd.py - Downloads every single XKCD comic. | |
import requests, os, bs4 | |
url = "http://xkcd.com" # starting url | |
os.makedirs("xkcd", exist_ok=True) # store comics in ./xkcd | |
while not url.endswith("#"): | |
# Download the page. | |
print("Downloading page %s..." % url) | |
res = requests.get(url) | |
res.raise_for_status() | |
soup = bs4.BeautifulSoup(res.text) | |
# Find the URL of the comic image. | |
comicElem = soup.select("#comic img") | |
if comicElem == []: | |
print("Could not find comic image.") | |
else: | |
comicUrl = comicElem[0].get("src") | |
print("Downloading image %s..." % (comicUrl)) | |
if comicUrl.startswith("//"): | |
comicUrl = "https:" + comicUrl | |
# Download the image. | |
res = requests.get(comicUrl) | |
res.raise_for_status() | |
# Save the image to ./xkcd | |
imageFile = open(os.path.join("xkcd", os.path.basename(comicUrl)), "wb") | |
for chunk in res.iter_content(100000): | |
imageFile.write(chunk) | |
imageFile.close() | |
# Get the Prev button's url. | |
prevLink = soup.select('a[rel="prev"]')[0] | |
url = "http://xkcd.com" + prevLink.get("href") | |
print("Done.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment