Created
April 12, 2017 11:53
-
-
Save tsu-nera/8958fc66b003c801ae05249e098b7a86 to your computer and use it in GitHub Desktop.
月間AV女優ランキング ベスト100から画像をダウンロード
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os.path import basename | |
import urllib.request | |
import requests | |
from bs4 import BeautifulSoup | |
from time import sleep | |
pages = ['http://www.dmm.co.jp/mono/dvd/-/ranking/=/term=monthly/mode=actress/rank=1_20/', | |
'http://www.dmm.co.jp/mono/dvd/-/ranking/=/term=monthly/mode=actress/rank=21_40/', | |
'http://www.dmm.co.jp/mono/dvd/-/ranking/=/term=monthly/mode=actress/rank=41_60/', | |
'http://www.dmm.co.jp/mono/dvd/-/ranking/=/term=monthly/mode=actress/rank=61_80/', | |
'http://www.dmm.co.jp/mono/dvd/-/ranking/=/term=monthly/mode=actress/rank=81_100/'] | |
for page in pages: | |
html = urllib.request.urlopen(page).read() | |
soup = BeautifulSoup(html, 'html.parser') | |
tables = soup.find_all('table', {'class': 'w100 mg-b20 work'}) | |
for table in tables: | |
for block in table.findAll("td", attrs = {"class": "bd-b"}): | |
img = block.find("img") | |
link = img['src'] | |
with open(basename(link), "wb") as f: | |
print(link) | |
f.write(requests.get(link).content) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment