Skip to content

Instantly share code, notes, and snippets.

@alaniwi
Created February 11, 2019 10:48
Show Gist options
  • Save alaniwi/f5ab883275ca416fd2bd69ada01e4334 to your computer and use it in GitHub Desktop.
Save alaniwi/f5ab883275ca416fd2bd69ada01e4334 to your computer and use it in GitHub Desktop.
Script to get a list of dataset IDs from the index node via the ESGF search.
"""
Gets a list of dataset IDs from the index node via the ESGF search.
"""
import requests
class GetSolrIDs(object):
def __init__(self,
url='https://c3s-models-index.ceda.ac.uk/esg-search/search/',
data_node='data.mips.copernicus-climate.eu',
verbose=False):
self.url = url
self.data_node = data_node
self.verbose = verbose
def _get_ids_page(self, start, count):
if self.verbose:
print("querying with start={}".format(start))
field = 'instance_id'
params = {'offset': start,
'limit': count,
'type': 'Dataset',
'data_node': self.data_node,
'format': 'application/solr+json',
'fields': field}
resp = requests.get(self.url, params=params)
content = resp.json()
return [doc[field] for doc in content["response"]["docs"]]
def get_ids(self, num_per_request=10000):
all_ids = []
start = 0
while True:
ids = self._get_ids_page(start, num_per_request)
if not ids:
return all_ids
all_ids.extend(ids)
start += num_per_request
def main():
getter = GetSolrIDs()
ids = getter.get_ids()
for id in sorted(ids):
print(id)
if __name__ == '__main__':
main()
@alaniwi
Copy link
Author

alaniwi commented Feb 11, 2019

Needs requests.

@alaniwi
Copy link
Author

alaniwi commented Feb 12, 2019

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment