Skip to content

Instantly share code, notes, and snippets.

@robertknight
Last active February 6, 2023 11:17
Show Gist options
  • Save robertknight/bb0d5f12b3c907d0829576a41310a31c to your computer and use it in GitHub Desktop.
Save robertknight/bb0d5f12b3c907d0829576a41310a31c to your computer and use it in GitHub Desktop.
Script to page through annotations in a Hypothesis group
import os
import requests
search_after = ""
page_size = 200
group = os.environ["HYPOTHESIS_GROUP"]
access_token = os.environ["HYPOTHESIS_ACCESS_TOKEN"]
max_to_fetch = 10_000 # Max annotations to fetch before we stop
print(f"Paging through annotations in group {group}")
total_anns = 0 # Total annotations fetched so far
missing_total_anns = 0 # Sum of "missing" annotations from pages fetched so far
while True:
url = "https://hypothes.is/api/search"
resp = requests.get(
url,
params={
"limit": max(0, min(page_size, max_to_fetch - total_anns)),
"sort": "updated",
"order": "desc",
"_separate_replies": "false",
"group": group,
"search_after": search_after,
},
headers={
"Authorization": f"Bearer {access_token}",
},
)
resp.raise_for_status()
resp_data = resp.json()
annotations = resp_data["rows"]
expected_total = min(max_to_fetch, resp_data["total"])
expected_page = min(
page_size, max(expected_total - total_anns - missing_total_anns, 0)
)
missing_total_anns += expected_page - len(annotations)
total_anns += len(annotations)
if len(annotations) == 0 or total_anns == max_to_fetch:
print(f"Fetched {total_anns} total annotations (missing {missing_total_anns})")
break
if len(annotations) != expected_page:
print(
f"Expected {expected_page} annotations in page but got {len(annotations)}"
)
first_id = annotations[0]["id"]
last_id = annotations[-1]["id"]
print(f"Fetched {len(annotations)} annotations from {first_id}..{last_id}")
search_after = annotations[-1]["updated"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment