Skip to content

Instantly share code, notes, and snippets.

@curegit
Created June 17, 2020 07:37
Show Gist options
  • Save curegit/e2e263c63f9d6c6de9f13dade5ee20ce to your computer and use it in GitHub Desktop.
Save curegit/e2e263c63f9d6c6de9f13dade5ee20ce to your computer and use it in GitHub Desktop.
Safebooru から画像をスクレイピングするスクリプト
import sys
from os import makedirs
from itertools import count
from os.path import basename
from xml.etree import ElementTree
from urllib.parse import urlencode
from urllib.request import urlopen, urlretrieve
for tag in sys.argv[1:]:
dir = basename(tag)
makedirs(dir, exist_ok=True)
base_url = "https://safebooru.org/index.php?page=dapi&s=post&q=index&{}"
for page in count(1):
url = base_url.format(urlencode({ "tags": tag, "pid": page }))
with urlopen(url) as response:
content = response.read()
xml = ElementTree.fromstring(content)
posts = list(xml.iter("post"))
if len(posts) < 1:
break
for post in posts:
image_url = post.get("file_url")
urlretrieve(image_url, filename=f"{dir}/{basename(image_url)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment