Skip to content

Instantly share code, notes, and snippets.

@wcaleb

wcaleb/omekaclient.py

Last active Sep 19, 2019
Embed
What would you like to do?
Turn JSON from Omeka API items resource into CSV file
# https://github.com/jimsafley/omeka-client-py
import httplib2
import urllib
import mimetypes
class OmekaClient:
def __init__(self, endpoint, key=None):
self._endpoint = endpoint
self._key = key
self._http = httplib2.Http()
def get(self, resource, id=None, query={}):
return self._request("GET", resource, id=id, query=query)
def post(self, resource, data, query={}, headers={}):
return self._request("POST", resource, data=data, query=query, headers=headers)
def put(self, resource, id, data, query={}):
return self._request("PUT", resource, id, data=data, query=query)
def delete(self, resource, id, query={}):
return self._request("DELETE", resource, id, query=query)
def post_file(self, data, filename, contents):
""" data is JSON metadata, filename is a string, contents is file contents """
BOUNDARY = '----------E19zNvXGzXaLvS5C'
CRLF = '\r\n'
headers = {'Content-Type': 'multipart/form-data; boundary=' + BOUNDARY}
L = []
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="data"')
L.append('')
L.append(data)
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="file"; filename="%s"' % filename)
L.append('Content-Type: %s' % self.get_content_type(filename))
L.append('')
L.append(contents)
L.append('--' + BOUNDARY)
body = CRLF.join(L)
headers['content-length'] = str(len(body))
query = {}
return self.post("files", body, query, headers)
def get_content_type(self, filename):
""" use mimetypes to detect type of file to be uploaded """
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
def _request(self, method, resource, id=None, data=None, query=None, headers=None):
url = self._endpoint + "/" + resource
if id is not None:
url += "/" + str(id)
if self._key is not None:
query["key"] = self._key
url += "?" + urllib.urlencode(query)
resp, content = self._http.request(url, method, body=data, headers=headers)
return resp, content
# requires https://github.com/jimsafley/omeka-client-py
from omekaclient import OmekaClient
import json
import csv
'''
Extract top-level metadata and element_texts from items returned by
Omeka 2.x API request, and then write to a CSV file. Only makes columns
for fields that actually have content in at least one item.
TODO: pagination, resources besides items
'''
endpoint = 'http://youromeka/api'
outfile = 'output.csv'
# make API request; quit if there's an error
response, content = OmekaClient(endpoint).get('items')
if response.status != 200:
print response.status, response.reason
exit()
# turn response into dictionary, retrieve metadata
fields = []
data = json.loads(content)
for D in data:
if D['tags']:
tags = [ d['name'] for d in D['tags'] ]
D['tags'] = ', '.join(tags)
if type(D['files']) is dict:
D['files'] = D['files']['count']
for key in ['item_type', 'collection', 'owner']:
if type(D[key]) is dict:
D[key] = unicode(D[key]['id'])
for d in D['element_texts']:
k = d['element']['name']
v = d['text']
D[k] = v
for k, v in D.items():
if type(v) is dict or type(v) is list:
del D[k]
if type(v) is bool or type(v) is int:
D[k] = unicode(v)
for k in D.keys():
if k not in fields: fields.append(k)
# write to CSV output file using DictWriter instance
o = open(outfile, 'w')
c = csv.DictWriter(o, [f.encode('utf-8', 'replace') for f in sorted(fields)], restval='None', extrasaction='ignore')
c.writeheader()
for D in data:
c.writerow({k:v.encode('utf-8', 'replace') for k,v in D.items() if isinstance(v, unicode)})
o.close()
@mnauge

This comment has been minimized.

Copy link

@mnauge mnauge commented Jul 25, 2017

Hi, thanks a lot for your program ! It perfectly works to upload text file :-)
But i've got an error when i try to upload jpeg file :-(
"""
open(filename, "r").read()

UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d
"""

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment