Last active
September 19, 2019 18:30
-
-
Save wcaleb/1f8f7f779df91fd43c37 to your computer and use it in GitHub Desktop.
Turn JSON from Omeka API items resource into CSV file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://github.com/jimsafley/omeka-client-py | |
import httplib2 | |
import urllib | |
import mimetypes | |
class OmekaClient: | |
def __init__(self, endpoint, key=None): | |
self._endpoint = endpoint | |
self._key = key | |
self._http = httplib2.Http() | |
def get(self, resource, id=None, query={}): | |
return self._request("GET", resource, id=id, query=query) | |
def post(self, resource, data, query={}, headers={}): | |
return self._request("POST", resource, data=data, query=query, headers=headers) | |
def put(self, resource, id, data, query={}): | |
return self._request("PUT", resource, id, data=data, query=query) | |
def delete(self, resource, id, query={}): | |
return self._request("DELETE", resource, id, query=query) | |
def post_file(self, data, filename, contents): | |
""" data is JSON metadata, filename is a string, contents is file contents """ | |
BOUNDARY = '----------E19zNvXGzXaLvS5C' | |
CRLF = '\r\n' | |
headers = {'Content-Type': 'multipart/form-data; boundary=' + BOUNDARY} | |
L = [] | |
L.append('--' + BOUNDARY) | |
L.append('Content-Disposition: form-data; name="data"') | |
L.append('') | |
L.append(data) | |
L.append('--' + BOUNDARY) | |
L.append('Content-Disposition: form-data; name="file"; filename="%s"' % filename) | |
L.append('Content-Type: %s' % self.get_content_type(filename)) | |
L.append('') | |
L.append(contents) | |
L.append('--' + BOUNDARY) | |
body = CRLF.join(L) | |
headers['content-length'] = str(len(body)) | |
query = {} | |
return self.post("files", body, query, headers) | |
def get_content_type(self, filename): | |
""" use mimetypes to detect type of file to be uploaded """ | |
return mimetypes.guess_type(filename)[0] or 'application/octet-stream' | |
def _request(self, method, resource, id=None, data=None, query=None, headers=None): | |
url = self._endpoint + "/" + resource | |
if id is not None: | |
url += "/" + str(id) | |
if self._key is not None: | |
query["key"] = self._key | |
url += "?" + urllib.urlencode(query) | |
resp, content = self._http.request(url, method, body=data, headers=headers) | |
return resp, content |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# requires https://github.com/jimsafley/omeka-client-py | |
from omekaclient import OmekaClient | |
import json | |
import csv | |
''' | |
Extract top-level metadata and element_texts from items returned by | |
Omeka 2.x API request, and then write to a CSV file. Only makes columns | |
for fields that actually have content in at least one item. | |
TODO: pagination, resources besides items | |
''' | |
endpoint = 'http://youromeka/api' | |
outfile = 'output.csv' | |
# make API request; quit if there's an error | |
response, content = OmekaClient(endpoint).get('items') | |
if response.status != 200: | |
print response.status, response.reason | |
exit() | |
# turn response into dictionary, retrieve metadata | |
fields = [] | |
data = json.loads(content) | |
for D in data: | |
if D['tags']: | |
tags = [ d['name'] for d in D['tags'] ] | |
D['tags'] = ', '.join(tags) | |
if type(D['files']) is dict: | |
D['files'] = D['files']['count'] | |
for key in ['item_type', 'collection', 'owner']: | |
if type(D[key]) is dict: | |
D[key] = unicode(D[key]['id']) | |
for d in D['element_texts']: | |
k = d['element']['name'] | |
v = d['text'] | |
D[k] = v | |
for k, v in D.items(): | |
if type(v) is dict or type(v) is list: | |
del D[k] | |
if type(v) is bool or type(v) is int: | |
D[k] = unicode(v) | |
for k in D.keys(): | |
if k not in fields: fields.append(k) | |
# write to CSV output file using DictWriter instance | |
o = open(outfile, 'w') | |
c = csv.DictWriter(o, [f.encode('utf-8', 'replace') for f in sorted(fields)], restval='None', extrasaction='ignore') | |
c.writeheader() | |
for D in data: | |
c.writerow({k:v.encode('utf-8', 'replace') for k,v in D.items() if isinstance(v, unicode)}) | |
o.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, thanks a lot for your program ! It perfectly works to upload text file :-)
But i've got an error when i try to upload jpeg file :-(
"""
open(filename, "r").read()
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d
"""