Skip to content

Instantly share code, notes, and snippets.

@gadamc
Last active December 11, 2015 20:48
Show Gist options
  • Save gadamc/4657695 to your computer and use it in GitHub Desktop.
Save gadamc/4657695 to your computer and use it in GitHub Desktop.
#you'll need to easy_install jsonschema and couchdbkit
#
# -- download the schema description by
# running this from your terminal
#
# wget https://raw.github.com/gadamc/persephone/schema/_attachments/schema/data.schema.ietf_draft_v3.aarm_spec_v1.01.json -O schema.json
#
import json, cPickle, jsonschema, couchdbkit
doclist =[]
schemadoc = json.load( open('schema.json') )
#for loop over each line in latex file # as james has shown
#parse line in latex file to extract data
#'line' is a list of values from the table.
#now store the values in the 'line' list in a python dictionary.
#that matches our schema.json
doc={}
doc['type'] ='measurement'
#...
doc_sample={}
doc_sample['m_name'] = line[1]
#...
doc['sample'] = doc_sample
#... measurement, data source,
#use a python schema validator tool here to ensure docs are in correct format.
#note, this is version 1.01 which requires doc['measurement']['m_date'] to be
#a list of one or two date strings in the format of YYYY-MM-DD
#and doc['measurement]['m_result][X]['error'] to be a list of one or two numbers to specify
#a symmetric or asymmetric error bar
#
#also, any extra user fields can be added to 'data_source', 'sample' and 'measurement'
#as an object containing only simple values stored within 'u_extra'. For example:
#doc['sample']['u_extra'] = { 'a':'string', 'b':5 }
#
#this is not allowed
# doc['sample']['u_extra'] = { 'a':'string', 'b': [5, 4, 3] }
#
jsonschema(doc, schemadoc) #raises an Excpetion if doc format fails.
doclist.append(doc) #store data in list for later
#three ways to store to disk
json.dump(doclist, open('/path/to/filename.json', 'w') , indent=1) #you don't need indent, but it prints pretty json
cPickle.dump(doclist, open('/path/to/filename.pickle', 'w'))
db = couchdbkit.Server('http://localhost:5984').get_or_create_db('exodata')
db.bulk_save(doclist)
# To read the data out
doclist = json.load(open('/path/to/filename.json'))
doclist = cPickle.load( open('/path/to/filename.pickle') )
db = couchdbkit.Server('http://localhost:5984').get_or_create_db('exodata')
vr = db.all_docs(include_docs=True)
doclist = []
for row in vr:
doclist.append(row['doc'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment