Skip to content

Instantly share code, notes, and snippets.

@knuu
Last active August 29, 2015 14:10
Show Gist options
  • Save knuu/2c9d905672acfc1104db to your computer and use it in GitHub Desktop.
Save knuu/2c9d905672acfc1104db to your computer and use it in GitHub Desktop.
image-downloader
from urllib import request
from html.parser import HTMLParser
import datetime
import os.path
# urlと保存する場所を指定して画像を一括ダウンロードする
class MyHTMLParser(HTMLParser):
"""
HTMLをparseしてリンクを抜き出す
"""
def __init__(self):
super().__init__(self)
self.imglist = []
def handle_starttag(self, tag, attrs):
"""
imgタグならリンクをimglistに入れる
"""
if tag == 'img':
attr = dict(attrs)
if 'src' in attr:
self.imglist.append(attr['src'])
def main():
url = "" # 画像を保存したいページのURL
src = request.urlopen(url).read()
src = src.decode("utf-8")
parser = MyHTMLParser()
parser.feed(src)
parser.close()
savepath = "保存するフォルダのパス/{}{}"
for cnt, imgurl in enumerate(parser.imglist):
if not imgurl.startswith("http"):
imgurl = url + imgurl
root, ext = os.path.splitext(imgurl)
# 画像の名前は日付と番号で指定
request.urlretrieve(imgurl, savepath.format(
datetime.datetime.now().strftime("%Y%m%d%H%M%S") + str(cnt), ext))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment