Last active
August 29, 2015 14:10
-
-
Save knuu/2c9d905672acfc1104db to your computer and use it in GitHub Desktop.
image-downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib import request | |
from html.parser import HTMLParser | |
import datetime | |
import os.path | |
# urlと保存する場所を指定して画像を一括ダウンロードする | |
class MyHTMLParser(HTMLParser): | |
""" | |
HTMLをparseしてリンクを抜き出す | |
""" | |
def __init__(self): | |
super().__init__(self) | |
self.imglist = [] | |
def handle_starttag(self, tag, attrs): | |
""" | |
imgタグならリンクをimglistに入れる | |
""" | |
if tag == 'img': | |
attr = dict(attrs) | |
if 'src' in attr: | |
self.imglist.append(attr['src']) | |
def main(): | |
url = "" # 画像を保存したいページのURL | |
src = request.urlopen(url).read() | |
src = src.decode("utf-8") | |
parser = MyHTMLParser() | |
parser.feed(src) | |
parser.close() | |
savepath = "保存するフォルダのパス/{}{}" | |
for cnt, imgurl in enumerate(parser.imglist): | |
if not imgurl.startswith("http"): | |
imgurl = url + imgurl | |
root, ext = os.path.splitext(imgurl) | |
# 画像の名前は日付と番号で指定 | |
request.urlretrieve(imgurl, savepath.format( | |
datetime.datetime.now().strftime("%Y%m%d%H%M%S") + str(cnt), ext)) | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment