nijave/archive-org-capture-dates.py

## archive-org-capture-dates.py
import requests

search_url = "https://leekduck.com/boxsales/"

headers = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0",
    "Referer": "https://web.archive.org/web/*"
}

sparklines = requests.get(
    "https://web.archive.org/__wb/sparkline",
    headers=headers,
    params={
        "output": "json",
        "collection": "web",
        "url": search_url,
    },
).json()

capture_dates = []
for year in sparklines["years"].keys():
    items = requests.get(
        "https://web.archive.org/__wb/calendarcaptures/2",
        headers=headers,
        params={
            "date": year,
            "groupby": "day",
            "url": search_url,
        },
    ).json()

    for date, status_code, _ in items["items"]:
        if status_code//100 != 2:
            continue
        date = year + str(date).zfill(4)
        timestamps = requests.get(
            "https://web.archive.org/__wb/calendarcaptures/2",
            headers=headers,
            params={
                "date": date,
                "url": search_url,
            },
        ).json()
        for ts in timestamps["items"]:
            time, status_code, _ = ts
            if status_code//100 != 2:
                continue
            capture_dates.append(date + str(time).zfill(6))

print("\n".join(capture_dates))
	import requests

	search_url = "https://leekduck.com/boxsales/"

	headers = {
	"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0",
	"Referer": "https://web.archive.org/web/*"
	}

	sparklines = requests.get(
	"https://web.archive.org/__wb/sparkline",
	headers=headers,
	params={
	"output": "json",
	"collection": "web",
	"url": search_url,
	},
	).json()

	capture_dates = []
	for year in sparklines["years"].keys():
	items = requests.get(
	"https://web.archive.org/__wb/calendarcaptures/2",
	headers=headers,
	params={
	"date": year,
	"groupby": "day",
	"url": search_url,
	},
	).json()

	for date, status_code, _ in items["items"]:
	if status_code//100 != 2:
	continue
	date = year + str(date).zfill(4)
	timestamps = requests.get(
	"https://web.archive.org/__wb/calendarcaptures/2",
	headers=headers,
	params={
	"date": date,
	"url": search_url,
	},
	).json()
	for ts in timestamps["items"]:
	time, status_code, _ = ts
	if status_code//100 != 2:
	continue
	capture_dates.append(date + str(time).zfill(6))

	print("\n".join(capture_dates))