Skip to content

Instantly share code, notes, and snippets.



Last active Sep 19, 2019
What would you like to do?
Turn JSON from Omeka API items resource into CSV file
import httplib2
import urllib
import mimetypes
class OmekaClient:
def __init__(self, endpoint, key=None):
self._endpoint = endpoint
self._key = key
self._http = httplib2.Http()
def get(self, resource, id=None, query={}):
return self._request("GET", resource, id=id, query=query)
def post(self, resource, data, query={}, headers={}):
return self._request("POST", resource, data=data, query=query, headers=headers)
def put(self, resource, id, data, query={}):
return self._request("PUT", resource, id, data=data, query=query)
def delete(self, resource, id, query={}):
return self._request("DELETE", resource, id, query=query)
def post_file(self, data, filename, contents):
""" data is JSON metadata, filename is a string, contents is file contents """
BOUNDARY = '----------E19zNvXGzXaLvS5C'
CRLF = '\r\n'
headers = {'Content-Type': 'multipart/form-data; boundary=' + BOUNDARY}
L = []
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="data"')
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="file"; filename="%s"' % filename)
L.append('Content-Type: %s' % self.get_content_type(filename))
L.append('--' + BOUNDARY)
body = CRLF.join(L)
headers['content-length'] = str(len(body))
query = {}
return"files", body, query, headers)
def get_content_type(self, filename):
""" use mimetypes to detect type of file to be uploaded """
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
def _request(self, method, resource, id=None, data=None, query=None, headers=None):
url = self._endpoint + "/" + resource
if id is not None:
url += "/" + str(id)
if self._key is not None:
query["key"] = self._key
url += "?" + urllib.urlencode(query)
resp, content = self._http.request(url, method, body=data, headers=headers)
return resp, content
# requires
from omekaclient import OmekaClient
import json
import csv
Extract top-level metadata and element_texts from items returned by
Omeka 2.x API request, and then write to a CSV file. Only makes columns
for fields that actually have content in at least one item.
TODO: pagination, resources besides items
endpoint = 'http://youromeka/api'
outfile = 'output.csv'
# make API request; quit if there's an error
response, content = OmekaClient(endpoint).get('items')
if response.status != 200:
print response.status, response.reason
# turn response into dictionary, retrieve metadata
fields = []
data = json.loads(content)
for D in data:
if D['tags']:
tags = [ d['name'] for d in D['tags'] ]
D['tags'] = ', '.join(tags)
if type(D['files']) is dict:
D['files'] = D['files']['count']
for key in ['item_type', 'collection', 'owner']:
if type(D[key]) is dict:
D[key] = unicode(D[key]['id'])
for d in D['element_texts']:
k = d['element']['name']
v = d['text']
D[k] = v
for k, v in D.items():
if type(v) is dict or type(v) is list:
del D[k]
if type(v) is bool or type(v) is int:
D[k] = unicode(v)
for k in D.keys():
if k not in fields: fields.append(k)
# write to CSV output file using DictWriter instance
o = open(outfile, 'w')
c = csv.DictWriter(o, [f.encode('utf-8', 'replace') for f in sorted(fields)], restval='None', extrasaction='ignore')
for D in data:
c.writerow({k:v.encode('utf-8', 'replace') for k,v in D.items() if isinstance(v, unicode)})

This comment has been minimized.

Copy link

@mnauge mnauge commented Jul 25, 2017

Hi, thanks a lot for your program ! It perfectly works to upload text file :-)
But i've got an error when i try to upload jpeg file :-(
open(filename, "r").read()

UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment