Skip to content

Instantly share code, notes, and snippets.

@alanorth
Created August 22, 2023 06:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alanorth/6e39a0915a6da5344e04f874776daf41 to your computer and use it in GitHub Desktop.
Save alanorth/6e39a0915a6da5344e04f874776daf41 to your computer and use it in GitHub Desktop.
Test harvesting 106,000 items from the DSpace 7 REST API
#!/usr/bin/env python3
import signal
import requests
def signal_handler(signal, frame):
sys.exit(1)
# set the signal handler for SIGINT (^C) so we can exit cleanly
signal.signal(signal.SIGINT, signal_handler)
rest_api_base = "https://dspace7test.ilri.org/server/api"
rest_api_endpoint = "discover/search/objects"
request_url = f"{rest_api_base}/{rest_api_endpoint}"
request_params = {"dsoType": "item", "size": 100}
request = requests.get(request_url, params=request_params)
if request.status_code == requests.codes.ok:
data = request.json()
page_number = data['_embedded']['searchResult']['page']['number']
total_pages = data['_embedded']['searchResult']['page']['totalPages']
number_items = len(data['_embedded']['searchResult']['_embedded']['objects'])
print(f"Processed page {page_number} / {total_pages}")
while page_number < total_pages:
request_params.update(page=page_number + 1)
request = requests.get(request_url, params=request_params)
if request.status_code == requests.codes.ok:
data = request.json()
page_number = data['_embedded']['searchResult']['page']['number']
total_pages = data['_embedded']['searchResult']['page']['totalPages']
print(f"Processed page {page_number} / {total_pages}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment