Skip to content

Instantly share code, notes, and snippets.

@hughlilly
Last active August 29, 2018 00:35
Show Gist options
  • Save hughlilly/f86168b4cc60c9287b6208bc4be7af5f to your computer and use it in GitHub Desktop.
Save hughlilly/f86168b4cc60c9287b6208bc4be7af5f to your computer and use it in GitHub Desktop.
Get Digital NZ data and write to out.csv
from pprint import pprint as pp
import settings, json, csv, codecs, cStringIO, requests
# Get API key from local file settings.py
key = settings.DNZ_API_KEY
# Define content partner names as variables
# This is to get around encoding of macron in AAG name
AAG = 'Auckland Art Gallery Toi o T'+unichr(257)+'maki'
AM = 'Auckland War Memorial Museum Tamaki Paenga Hira'
AL = 'Auckland Libraries'
# Define keyword(s) to search on
k = 'Auckland'
# Set page count
page = 1
# Total number of pages -- remember this must start at 1, not 0, so 4, for example, will deliver 3 pages' worth of data
pages = 4
# return x num of records per page (max. 100)
per_page = 100
while page < pages :
# create request URL (json)
url = 'http://api.digitalnz.org/v3/records.json?[api_key]='+key+'&or[content_partner][]='+AM+'&or[content_partner][]='+AAG+'&or[content_partner][]='+AL+'&and[category][]=Images&[text]='+k+'&per_page='+str(per_page)+'&page='+str(page)+'&fields=id,title,content_partner,description,display_date,updated_at,landing_url,large_thumbnail_url'
print 'Request URL:\n\n' + url + '\n'
print '=============================\n'
resp = requests.request("GET", url)
resp_json = json.loads(resp.text)
print 'Page', page
with open('out.csv','a') as f:
w = csv.writer(f, delimiter=',', quoting=csv.QUOTE_MINIMAL, quotechar='"')
for value in resp_json["search"]['results']:
w.writerow([value['id'], value['title'], value['content_partner'][0].encode('utf-8'), value['description'], value['display_date'], value['updated_at'], value['landing_url'], value['large_thumbnail_url']])
page += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment