Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
XKCDコミックのサイトからコミックをダウンロードする。
# -*- coding:utf-8 -*-
"""
XKCDコミックのサイトからコミックをダウンロードする。
httpsには対応していない。
https://automatetheboringstuff.com/chapter11/
"""
import bs4
import os
import requests
def main():
os.makedirs('xkcd', exist_ok=True)
url = 'http://xkcd.com/'
while not url.endswith('#'):
# Download the page
print('Downloading page %s...' % url)
res = requests.get(url)
res.raise_for_status()
soup = bs4.BeautifulSoup(res.text, 'html.parser')
# Find and Download image.
els = soup.select('#comic img')
if not els:
print('Could not find comic image.')
else:
try:
comic_url = 'http:' + els[0].get('src')
print('Downloading image %s...' % comic_url)
res = requests.get(comic_url)
res.raise_for_status()
except requests.exceptions.MissingSchema:
# skip this comic
print('skip this comic.')
prev_link = soup.select('a[rel="prev"]')[0]
url = 'http://xkcd.com' + prev_link.get('href')
continue
# Save the image to ./xkcd.
fp = open(os.path.join('xkcd', os.path.basename(comic_url)), 'wb')
for chunk in res.iter_content(1024 * 10):
fp.write(chunk)
fp.close()
print('save file %s...' % fp.name)
# Get the Prev button's url.
prev_link = soup.select('a[rel="prev"]')[0]
url = 'http://xkcd.com' + prev_link.get('href')
print('Done.')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.