Skip to content

Instantly share code, notes, and snippets.

@peterdesmet
Last active May 11, 2016 07:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterdesmet/e85479aca7055dbf8e79a0d96c333cd8 to your computer and use it in GitHub Desktop.
Save peterdesmet/e85479aca7055dbf8e79a0d96c333cd8 to your computer and use it in GitHub Desktop.
Python script to get specific metadata from GBIF-registered datasets.
# -*- coding: utf-8 -*-
import requests
import json
import csv
import sys
def get_datasets(offset, limit):
sys.stderr.write('Requesting batch {}\n'.format(offset)) # Use stderr to not contaminate stdout, which is used for the results
request = requests.get('http://api.gbif.org/v1/dataset/', params={'limit': limit, 'offset': offset})
results = request.json()['results']
return results
def parse_metadata(dataset, fields):
metadata = []
for field in fields:
try:
metadata.append(dataset[field].encode('utf-8'))
except KeyError, e:
metadata.append('')
return metadata
def main(limit=20):
still_more_datasets = True
offset = 0
csvwriter = csv.writer(sys.stdout, lineterminator='\n')
# Get fields to keep
fields_to_keep = sys.argv[1:] # Remove first element = script itself
# Add headers
csvwriter.writerow(fields_to_keep)
# Request metadata
while still_more_datasets:
next_batch = get_datasets(offset, limit)
if len(next_batch) == 0:
# Nothing returned, stop requesting
still_more_datasets = False
else:
for dataset in next_batch:
metadata = parse_metadata(dataset, fields_to_keep)
csvwriter.writerow(metadata)
offset += limit
if __name__ == '__main__': # If run from command line
sys.exit(main())
@peterdesmet
Copy link
Author

peterdesmet commented May 11, 2016

Usage: python get_gbif_datasets.py key title type rights > datasets.csv

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment