Skip to content

Instantly share code, notes, and snippets.

@flushpot1125
Last active February 7, 2026 22:44
Show Gist options
  • Select an option

  • Save flushpot1125/8d4c78bb65c2623b3edd9fae4d3c899f to your computer and use it in GitHub Desktop.

Select an option

Save flushpot1125/8d4c78bb65c2623b3edd9fae4d3c899f to your computer and use it in GitHub Desktop.
# ブログに含まれる画像をクロールするコード
import requests
from bs4 import BeautifulSoup
import urllib.request
url = 'https://www.crossroad-tech.com/entry/babylonjs-editor-postprocess'
res = requests.get(url)
soup = BeautifulSoup(res.text,'html.parser')
img_tags = soup.find_all("img")
img_urls = []
for img_tag in img_tags:
url = img_tag.get("src")
if url != None:
img_urls.append(url)
# 今回はこのような画像URLが取得できる
#image_urls[0]="https://cdn-ak.f.st-hatena.com/images/fotolife/T/Takyu/20251221/20251221073656.jpg"
#image_urls[1]="https://cdn-ak.f.st-hatena.com/images/fotolife/T/Takyu/20251221/20251221170649.jpg"
for img_url in img_urls:
# "Takyu"以降の文字列を取得
if "Takyu" in img_url:
# "Takyu"の位置を見つける
takyu_index = img_url.find("Takyu")
# "Takyu"以降の文字列を取得
filename_part = img_url[takyu_index:]
# "/"を"_"に変換
filename = filename_part.replace("/", "_")
# 保存先のパスを作成
dst_path = './images/' + filename
# 画像をダウンロードして保存
urllib.request.urlretrieve(img_url, dst_path)
print(f"Downloaded: {filename}")
#実行結果
# $ python3 ./python_scraping_images.py
#Downloaded: Takyu_20251221_20251221073656.jpg
#Downloaded: Takyu_20251221_20251221170649.jpg
#Downloaded: Takyu_20251221_20251221154945.jpg
#Downloaded: Takyu_20251221_20251221160632.jpg
#Downloaded: Takyu_20251221_20251221170340.gif
#Downloaded: Takyu_profile.png?1584799880
#Downloaded: Takyu_20181225_20181225224730.jpg
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment