Skip to content

Instantly share code, notes, and snippets.

@7yl4r
Created July 20, 2022 19:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 7yl4r/724ff6c956ae127675dd8802d84bb00f to your computer and use it in GitHub Desktop.
Save 7yl4r/724ff6c956ae127675dd8802d84bb00f to your computer and use it in GitHub Desktop.
pyOBIS pagination implementations
def search(
scientificname=None,taxonid=None,nodeid=None,datasetid=None,startdate=None,enddate=None,
startdepth=None,enddepth=None,geometry=None,year=None,flags=None,fields=None,size=5000,
offset=0,mof=False,hasextensions=None,**kwargs
):
args = {
'taxonid': taxonid,'obisid': obisid,'datasetid': datasetid,
'scientificname': scientificname,'startdate': startdate,
'enddate': enddate,'startdepth': startdepth,'enddepth': enddepth,
'geometry': geometry,'year': year,'fields': fields,
'flags': flags,'offset': offset,'mof': mof,'size':0,
'hasextensions': hasextensions
}
out = obis_GET(url,args,'application/json; charset=utf-8', **kwargs)
limit = out["total"] if size == 5000 else size
for i in range(5000,limit+1,5000):
if args["size"]!=0:
# stop iterating if we have reached the last result
try:
res["results"][4999]
except:
break
args['after'] = res["results"][4999]['id']
args['size'] = 5000
res=obis_GET(url, args, 'application/json; charset=utf-8', **kwargs)
out["results"]+=res["results"]
def search(
scientificname=None,taxonid=None,nodeid=None,datasetid=None,startdate=None,enddate=None,
startdepth=None,enddepth=None,geometry=None,year=None,flags=None,fields=None,size=5000,
offset=0,mof=False,hasextensions=None,**kwargs
):
args = {
'taxonid': taxonid,'obisid': obisid,'datasetid': datasetid,
'scientificname': scientificname,'startdate': startdate,
'enddate': enddate,'startdepth': startdepth,'enddepth': enddepth,
'geometry': geometry,'year': year,'fields': fields,
'flags': flags,'offset': offset,'mof': mof,'size': size,
'hasextensions': hasextensions
}
MAX_PER_REQUEST = 5000
args['size'] = min(size, MAX_PER_REQUEST)
out = obis_GET(url, args, 'application/json; charset=utf-8', **kwargs)
# perform additional requests if needed:
while (len(out['results']) < size): # while n_records_fetched < n_records_desired
args['after'] = out['results'][-1]['id']
res = obis_GET(url, args, 'application/json; charset=utf-8', **kwargs)
out['results'] += res['results']
return out # NOTE: should we return out['results'] here instead?
def search(
scientificname=None,taxonid=None,nodeid=None,datasetid=None,startdate=None,enddate=None,
startdepth=None,enddepth=None,geometry=None,year=None,flags=None,fields=None,size=5000,
offset=0,mof=False,hasextensions=None,**kwargs
):
for i in range(5000, size, 5000):
if i == 5000:
# build url for first call
occurrence_id = 'occurrence?taxonid={}&startdepth={}&enddepth={}&geometry={}&fields={}&size=5000'.format(aphiaID,startdepth,enddepth,geometry,f)
else:
# build url for remaining calls
uuid = data.iloc[-1]['id']
occurrence_id = 'occurrence?taxonid={}&startdepth={}&enddepth={}&geometry={}&fields={}&after={}&size=5000'.format(aphiaID,startdepth,enddepth,geometry,f,uuid)
url = '{}{}'.format(base,occurrence_id)
# open the api call
response= urlopen(url)
# read in the json
data_json = json.loads(response.read())
# build a mongo dataframe with all results
data = pd.concat([data, pd.DataFrame(data_json['results'])])
print('Data shape:',len(data_json['results']),data.shape)
# kick out of loop at the end
if len(data_json['results']) < 5000:
break
# Need to grab from last iteration to expected size.
uuid = data.iloc[-1]['id']
occurrence_id = 'occurrence?taxonid={}&startdepth={}&enddepth={}&geometry={}&fields={}&after={}&size=5000'.format(aphiaID,startdepth,enddepth,geometry,f,uuid)
url = '{}{}'.format(base,occurrence_id)
response= urlopen(url)
data_json = json.loads(response.read())
data = pd.concat([data, pd.DataFrame(data_json['results'])])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment