Skip to content

Instantly share code, notes, and snippets.

@nijave
Created December 28, 2022 03:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nijave/7001f4361bc4c60979434f1077ca2c36 to your computer and use it in GitHub Desktop.
Save nijave/7001f4361bc4c60979434f1077ca2c36 to your computer and use it in GitHub Desktop.
Archive.org capture date dumper
import requests
search_url = "https://leekduck.com/boxsales/"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0",
"Referer": "https://web.archive.org/web/*"
}
sparklines = requests.get(
"https://web.archive.org/__wb/sparkline",
headers=headers,
params={
"output": "json",
"collection": "web",
"url": search_url,
},
).json()
capture_dates = []
for year in sparklines["years"].keys():
items = requests.get(
"https://web.archive.org/__wb/calendarcaptures/2",
headers=headers,
params={
"date": year,
"groupby": "day",
"url": search_url,
},
).json()
for date, status_code, _ in items["items"]:
if status_code//100 != 2:
continue
date = year + str(date).zfill(4)
timestamps = requests.get(
"https://web.archive.org/__wb/calendarcaptures/2",
headers=headers,
params={
"date": date,
"url": search_url,
},
).json()
for ts in timestamps["items"]:
time, status_code, _ = ts
if status_code//100 != 2:
continue
capture_dates.append(date + str(time).zfill(6))
print("\n".join(capture_dates))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment