Created
February 16, 2017 05:58
-
-
Save yoheiMune/33a3e1c2066fe3f4ba43b13ea7fa53fa to your computer and use it in GitHub Desktop.
G's ACADEMY TOKYO のメンター一覧からメンター画像を取得するサンプル.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
G's ACADEMY TOKYO からメンター画像を取得するサンプル. | |
""" | |
import os | |
from urllib.request import urlopen | |
from pprint import pprint | |
from bs4 import BeautifulSoup | |
# メンター一覧ページのHTMLを取得します. | |
with urlopen("http://gsacademy.tokyo/mentor/") as res: | |
html = res.read().decode("utf-8") | |
# BeautifulSoupインスタンスを生成します. | |
soup = BeautifulSoup(html, "html.parser") | |
# 画像のURL一覧を作成します. | |
img_urls = [e["src"] for e in soup.select(".heading.c2 img")] | |
# img_urls = [] | |
# for e in soup.select(".heading.c2 img"): | |
# img_urls.append(e["src"]) | |
# 「http://〜」の形式に変換します. | |
img_urls = [u if u.find("http") == 0 else "http://gsacademy.tokyo" + u for u in img_urls] | |
# img_urls2 = [] | |
# for u in img_urls: | |
# if u.find("http") == 0: | |
# img_urls2.append(u) | |
# else: | |
# img_urls2.append("http://gsacademy.tokyo" + u) | |
# img_urls = img_urls2 | |
# 保存先のディレクトリを作成します. | |
if not os.path.exists("img"): | |
os.mkdir("img") | |
# 画像を読み込んで、ディレクトリに保存します. | |
for i, url in enumerate(img_urls): | |
print(i, url) | |
with urlopen(url) as res: | |
img = res.read() | |
with open("img/%d.png" % (i+1), "wb") as f: | |
f.write(img) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment