Skip to content

Instantly share code, notes, and snippets.

@wcaleb wcaleb/omekaclient.py
Last active Sep 19, 2019

Embed
What would you like to do?
Turn JSON from Omeka API items resource into CSV file
# https://github.com/jimsafley/omeka-client-py
import httplib2
import urllib
import mimetypes
class OmekaClient:
def __init__(self, endpoint, key=None):
self._endpoint = endpoint
self._key = key
self._http = httplib2.Http()
def get(self, resource, id=None, query={}):
return self._request("GET", resource, id=id, query=query)
def post(self, resource, data, query={}, headers={}):
return self._request("POST", resource, data=data, query=query, headers=headers)
def put(self, resource, id, data, query={}):
return self._request("PUT", resource, id, data=data, query=query)
def delete(self, resource, id, query={}):
return self._request("DELETE", resource, id, query=query)
def post_file(self, data, filename, contents):
""" data is JSON metadata, filename is a string, contents is file contents """
BOUNDARY = '----------E19zNvXGzXaLvS5C'
CRLF = '\r\n'
headers = {'Content-Type': 'multipart/form-data; boundary=' + BOUNDARY}
L = []
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="data"')
L.append('')
L.append(data)
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="file"; filename="%s"' % filename)
L.append('Content-Type: %s' % self.get_content_type(filename))
L.append('')
L.append(contents)
L.append('--' + BOUNDARY)
body = CRLF.join(L)
headers['content-length'] = str(len(body))
query = {}
return self.post("files", body, query, headers)
def get_content_type(self, filename):
""" use mimetypes to detect type of file to be uploaded """
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
def _request(self, method, resource, id=None, data=None, query=None, headers=None):
url = self._endpoint + "/" + resource
if id is not None:
url += "/" + str(id)
if self._key is not None:
query["key"] = self._key
url += "?" + urllib.urlencode(query)
resp, content = self._http.request(url, method, body=data, headers=headers)
return resp, content
# requires https://github.com/jimsafley/omeka-client-py
from omekaclient import OmekaClient
import json
import csv
'''
Extract top-level metadata and element_texts from items returned by
Omeka 2.x API request, and then write to a CSV file. Only makes columns
for fields that actually have content in at least one item.
TODO: pagination, resources besides items
'''
endpoint = 'http://youromeka/api'
outfile = 'output.csv'
# make API request; quit if there's an error
response, content = OmekaClient(endpoint).get('items')
if response.status != 200:
print response.status, response.reason
exit()
# turn response into dictionary, retrieve metadata
fields = []
data = json.loads(content)
for D in data:
if D['tags']:
tags = [ d['name'] for d in D['tags'] ]
D['tags'] = ', '.join(tags)
if type(D['files']) is dict:
D['files'] = D['files']['count']
for key in ['item_type', 'collection', 'owner']:
if type(D[key]) is dict:
D[key] = unicode(D[key]['id'])
for d in D['element_texts']:
k = d['element']['name']
v = d['text']
D[k] = v
for k, v in D.items():
if type(v) is dict or type(v) is list:
del D[k]
if type(v) is bool or type(v) is int:
D[k] = unicode(v)
for k in D.keys():
if k not in fields: fields.append(k)
# write to CSV output file using DictWriter instance
o = open(outfile, 'w')
c = csv.DictWriter(o, [f.encode('utf-8', 'replace') for f in sorted(fields)], restval='None', extrasaction='ignore')
c.writeheader()
for D in data:
c.writerow({k:v.encode('utf-8', 'replace') for k,v in D.items() if isinstance(v, unicode)})
o.close()
@mnauge

This comment has been minimized.

Copy link

commented Jul 25, 2017

Hi, thanks a lot for your program ! It perfectly works to upload text file :-)
But i've got an error when i try to upload jpeg file :-(
"""
open(filename, "r").read()

UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d
"""

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.