Skip to content

Instantly share code, notes, and snippets.

Last active August 6, 2023 23:42
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
import os
import requests
import json
import re
import time
from bs4 import BeautifulSoup
from datetime import datetime
dl_dir = 'dl'
os.makedirs(dl_dir, exist_ok=True)
def save_files(url, page):
if page%10==0:
req_url = url+f"?page={page}"
response = requests.get(req_url)
soup = BeautifulSoup(response.text, 'html.parser')
# JavaScript変数gonを取得
script = soup.find("script").string
# ""から";"までを抽出
match ='*?);', script)
if match:
json_string =
# 最後の"}"以降を削除
json_string = json_string.rsplit("}", 1)[0] + "}"
gon = json.loads(json_string)
raise Exception("Could not find JavaScript variable 'gon'")
# メディアファイルを保存
for media_file in gon["mediaFiles"]:
took_at = media_file["tookAt"]
took_at_datetime = datetime.fromisoformat(took_at.replace("Z", "+00:00"))
took_at_str = took_at_datetime.strftime("%Y%m%d%H%M%S")
filename = f'{took_at_str}.{media_file["contentType"].split("/")[-1]}'
file_path = os.path.join(dl_dir, filename)
media_url = f'{url}/media_files/{media_file["uuid"]}/download'
response = requests.get(media_url) # メディアファイルのURLを取得
with open(file_path, 'wb') as f:
# 負荷をかけないように1秒につき1枚
# 次のページが存在する場合は再帰的に処理
if gon["hasNext"]:
save_files(url, page+1)
if __name__ == "__main__":
url = "" # アルバムのURL
save_files(url, 1) # 初回の呼び出し
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment