Created
July 20, 2022 19:04
-
-
Save 7yl4r/724ff6c956ae127675dd8802d84bb00f to your computer and use it in GitHub Desktop.
pyOBIS pagination implementations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def search( | |
scientificname=None,taxonid=None,nodeid=None,datasetid=None,startdate=None,enddate=None, | |
startdepth=None,enddepth=None,geometry=None,year=None,flags=None,fields=None,size=5000, | |
offset=0,mof=False,hasextensions=None,**kwargs | |
): | |
args = { | |
'taxonid': taxonid,'obisid': obisid,'datasetid': datasetid, | |
'scientificname': scientificname,'startdate': startdate, | |
'enddate': enddate,'startdepth': startdepth,'enddepth': enddepth, | |
'geometry': geometry,'year': year,'fields': fields, | |
'flags': flags,'offset': offset,'mof': mof,'size':0, | |
'hasextensions': hasextensions | |
} | |
out = obis_GET(url,args,'application/json; charset=utf-8', **kwargs) | |
limit = out["total"] if size == 5000 else size | |
for i in range(5000,limit+1,5000): | |
if args["size"]!=0: | |
# stop iterating if we have reached the last result | |
try: | |
res["results"][4999] | |
except: | |
break | |
args['after'] = res["results"][4999]['id'] | |
args['size'] = 5000 | |
res=obis_GET(url, args, 'application/json; charset=utf-8', **kwargs) | |
out["results"]+=res["results"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def search( | |
scientificname=None,taxonid=None,nodeid=None,datasetid=None,startdate=None,enddate=None, | |
startdepth=None,enddepth=None,geometry=None,year=None,flags=None,fields=None,size=5000, | |
offset=0,mof=False,hasextensions=None,**kwargs | |
): | |
args = { | |
'taxonid': taxonid,'obisid': obisid,'datasetid': datasetid, | |
'scientificname': scientificname,'startdate': startdate, | |
'enddate': enddate,'startdepth': startdepth,'enddepth': enddepth, | |
'geometry': geometry,'year': year,'fields': fields, | |
'flags': flags,'offset': offset,'mof': mof,'size': size, | |
'hasextensions': hasextensions | |
} | |
MAX_PER_REQUEST = 5000 | |
args['size'] = min(size, MAX_PER_REQUEST) | |
out = obis_GET(url, args, 'application/json; charset=utf-8', **kwargs) | |
# perform additional requests if needed: | |
while (len(out['results']) < size): # while n_records_fetched < n_records_desired | |
args['after'] = out['results'][-1]['id'] | |
res = obis_GET(url, args, 'application/json; charset=utf-8', **kwargs) | |
out['results'] += res['results'] | |
return out # NOTE: should we return out['results'] here instead? |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def search( | |
scientificname=None,taxonid=None,nodeid=None,datasetid=None,startdate=None,enddate=None, | |
startdepth=None,enddepth=None,geometry=None,year=None,flags=None,fields=None,size=5000, | |
offset=0,mof=False,hasextensions=None,**kwargs | |
): | |
for i in range(5000, size, 5000): | |
if i == 5000: | |
# build url for first call | |
occurrence_id = 'occurrence?taxonid={}&startdepth={}&enddepth={}&geometry={}&fields={}&size=5000'.format(aphiaID,startdepth,enddepth,geometry,f) | |
else: | |
# build url for remaining calls | |
uuid = data.iloc[-1]['id'] | |
occurrence_id = 'occurrence?taxonid={}&startdepth={}&enddepth={}&geometry={}&fields={}&after={}&size=5000'.format(aphiaID,startdepth,enddepth,geometry,f,uuid) | |
url = '{}{}'.format(base,occurrence_id) | |
# open the api call | |
response= urlopen(url) | |
# read in the json | |
data_json = json.loads(response.read()) | |
# build a mongo dataframe with all results | |
data = pd.concat([data, pd.DataFrame(data_json['results'])]) | |
print('Data shape:',len(data_json['results']),data.shape) | |
# kick out of loop at the end | |
if len(data_json['results']) < 5000: | |
break | |
# Need to grab from last iteration to expected size. | |
uuid = data.iloc[-1]['id'] | |
occurrence_id = 'occurrence?taxonid={}&startdepth={}&enddepth={}&geometry={}&fields={}&after={}&size=5000'.format(aphiaID,startdepth,enddepth,geometry,f,uuid) | |
url = '{}{}'.format(base,occurrence_id) | |
response= urlopen(url) | |
data_json = json.loads(response.read()) | |
data = pd.concat([data, pd.DataFrame(data_json['results'])]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment