Last active
February 6, 2023 11:17
-
-
Save robertknight/bb0d5f12b3c907d0829576a41310a31c to your computer and use it in GitHub Desktop.
Script to page through annotations in a Hypothesis group
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
search_after = "" | |
page_size = 200 | |
group = os.environ["HYPOTHESIS_GROUP"] | |
access_token = os.environ["HYPOTHESIS_ACCESS_TOKEN"] | |
max_to_fetch = 10_000 # Max annotations to fetch before we stop | |
print(f"Paging through annotations in group {group}") | |
total_anns = 0 # Total annotations fetched so far | |
missing_total_anns = 0 # Sum of "missing" annotations from pages fetched so far | |
while True: | |
url = "https://hypothes.is/api/search" | |
resp = requests.get( | |
url, | |
params={ | |
"limit": max(0, min(page_size, max_to_fetch - total_anns)), | |
"sort": "updated", | |
"order": "desc", | |
"_separate_replies": "false", | |
"group": group, | |
"search_after": search_after, | |
}, | |
headers={ | |
"Authorization": f"Bearer {access_token}", | |
}, | |
) | |
resp.raise_for_status() | |
resp_data = resp.json() | |
annotations = resp_data["rows"] | |
expected_total = min(max_to_fetch, resp_data["total"]) | |
expected_page = min( | |
page_size, max(expected_total - total_anns - missing_total_anns, 0) | |
) | |
missing_total_anns += expected_page - len(annotations) | |
total_anns += len(annotations) | |
if len(annotations) == 0 or total_anns == max_to_fetch: | |
print(f"Fetched {total_anns} total annotations (missing {missing_total_anns})") | |
break | |
if len(annotations) != expected_page: | |
print( | |
f"Expected {expected_page} annotations in page but got {len(annotations)}" | |
) | |
first_id = annotations[0]["id"] | |
last_id = annotations[-1]["id"] | |
print(f"Fetched {len(annotations)} annotations from {first_id}..{last_id}") | |
search_after = annotations[-1]["updated"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment