Skip to content

Instantly share code, notes, and snippets.

@lukecampbell
Created February 23, 2012 19:43
Show Gist options
  • Save lukecampbell/1894654 to your computer and use it in GitHub Desktop.
Save lukecampbell/1894654 to your computer and use it in GitHub Desktop.
Mangling science data
granule = {
"_id": "1B7D0B452E77DA9AF02C6DA2CD099CBE60AD1D4F",
"_rev": "1-bab10c0be95cd2f771b0413918d668b6",
"identifiables": {
"pressure_bounds": {
"definition": "",
"reference_frame": "",
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"updatable": True,
"type_": "QuantityRangeElement",
"value_pair": [
0.12627243954352793,
40.402941145170885
],
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"pressure_data": {
"definition": "",
"reference_frame": "",
"nil_values_ids": [
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axis": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"label": "",
"type_": "CoordinateAxis",
"bounds_id": "pressure_bounds",
"updatable": True,
"optional": False,
"id": "",
"values_path": "/east/pressure"
},
"record_count": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"value": 20,
"updatable": True,
"type_": "CountElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"longitude_bounds": {
"definition": "",
"reference_frame": "",
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"updatable": True,
"type_": "QuantityRangeElement",
"value_pair": [
8.782779521540416,
341.48867353583273
],
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"stream_encoding": {
"sha1": "928C0F21E0798A3DED431722F36ADD59D5CDABF4",
"compression": None,
"encoding_type": "hdf5",
"type_": "Encoding",
"record_dimension": "",
"label": "",
"id": "",
"description": ""
},
"temp_bounds": {
"definition": "",
"reference_frame": "",
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"updatable": True,
"type_": "QuantityRangeElement",
"value_pair": [
3.020883696759623,
17.08421557904047
],
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"time_bounds": {
"definition": "",
"reference_frame": "",
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"updatable": True,
"type_": "QuantityRangeElement",
"value_pair": [
1,
20
],
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"longitude": {
"definition": "",
"reference_frame": "",
"nil_values_ids": [
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axis": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"label": "",
"type_": "CoordinateAxis",
"bounds_id": "longitude_bounds",
"updatable": True,
"optional": False,
"id": "",
"values_path": "/east/longitude"
},
"latitude": {
"definition": "",
"reference_frame": "",
"nil_values_ids": [
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axis": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"label": "",
"type_": "CoordinateAxis",
"bounds_id": "latitude_bounds",
"updatable": True,
"optional": False,
"id": "",
"values_path": "/east/latitude"
},
"time": {
"definition": "",
"reference_frame": "",
"nil_values_ids": [
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axis": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"label": "",
"type_": "CoordinateAxis",
"bounds_id": "time_bounds",
"updatable": True,
"optional": False,
"id": "",
"values_path": "/east/time"
},
"temp_data": {
"definition": "",
"reference_frame": "",
"nil_values_ids": [
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"label": "",
"type_": "RangeSet",
"bounds_id": [
"temp_bounds"
],
"updatable": True,
"optional": False,
"id": "",
"values_path": "/east/temperature"
},
"cndr_bounds": {
"definition": "",
"reference_frame": "",
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"updatable": True,
"type_": "QuantityRangeElement",
"value_pair": [
10.000763005717497,
66.50700633030706
],
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"cndr_data": {
"definition": "",
"reference_frame": "",
"nil_values_ids": [
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"label": "",
"type_": "RangeSet",
"bounds_id": "cndr_bounds",
"updatable": True,
"optional": False,
"id": "",
"values_path": "/east/cndr"
},
"ctd_data": {
"description": "",
"element_type_id": "",
"element_count_id": "",
"type_": "DataStream",
"label": "",
"encoding_id": "",
"values": "",
"id": "954645148399444d8ec5b702374cf611"
},
"latitude_bounds": {
"definition": "",
"reference_frame": "",
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "",
"reference": ""
},
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"updatable": True,
"type_": "QuantityRangeElement",
"value_pair": [
-84.78246112453071,
85.60904083751265
],
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
}
},
"stream_resource_id": "954645148399444d8ec5b702374cf611",
"type_": "StreamGranuleContainer",
"data_stream_id": "ctd_data"
}
definition = {
"_id": "091feb6a873a486b99a2fecae604dd7c",
"_rev": "1-595b66fed068c82579fb55048c446fc3",
"identifiables": {
"pressure_data": {
"definition": "http://sweet.jpl.nasa.gov/2.0/physThermo.owl#Pressure",
"reference_frame": "Atmospheric pressure ?",
"nil_values_ids": [
"nan_value"
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "dbar",
"reference": ""
},
"description": "",
"quality_id": "",
"axis": "Pressure",
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "vertex",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
[
0,
10000
]
]
},
"label": "",
"type_": "CoordinateAxis",
"bounds_id": "",
"updatable": True,
"optional": False,
"id": "",
"values_path": "/east/pressure_data"
},
"record_count": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"value": 0,
"updatable": True,
"type_": "CountElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"nan_value": {
"reason": "No value recorded",
"type_": "NilValue",
"value": -999.99
},
"temperature": {
"definition": "http://sweet.jpl.nasa.gov/2.0/physThermo.owl#Temperature",
"description": "",
"domain_id": "time_domain",
"label": "",
"type_": "Coverage",
"range_id": "temp_data",
"updatable": False,
"optional": True,
"id": ""
},
"stream_encoding": {
"sha1": None,
"compression": None,
"encoding_type": "hdf5",
"type_": "Encoding",
"record_dimension": "",
"label": "",
"id": "",
"description": ""
},
"point_timeseries": {
"definition": "",
"description": "",
"number_of_verticies": 1,
"updatable": True,
"type_": "Mesh",
"mesh_type": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "Point Time Series",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"number_of_elements": 1,
"label": "",
"optional": False,
"id": "",
"index_offset": 0,
"values_path": "topology/mesh"
},
"longitude": {
"definition": "http://sweet.jpl.nasa.gov/2.0/spaceCoordinates.owl#Longitude",
"reference_frame": "",
"nil_values_ids": [
"nan_value"
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "deg",
"reference": ""
},
"description": "",
"quality_id": "",
"axis": "Longitude",
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "vertex",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
[
0,
360
]
]
},
"label": "",
"type_": "CoordinateAxis",
"bounds_id": "",
"updatable": True,
"optional": False,
"id": "",
"values_path": "coordinates/longitude"
},
"data_record": {
"definition": "Definition of a data record for a CTD",
"description": "",
"updatable": False,
"type_": "DataRecord",
"field_ids": [
"temperature",
"conductivity",
"pressure"
],
"label": "",
"optional": False,
"id": "",
"domain_ids": [
"time_domain"
]
},
"coordinate_vector": {
"definition": "http://sweet.jpl.nasa.gov/2.0/space.owl#Location",
"reference_frame": "http://www.opengis.net/def/crs/EPSG/0/4326",
"description": "",
"coordinate_ids": [
"longitude",
"latitude",
"pressure_data",
"time"
],
"updatable": True,
"type_": "Vector",
"local_frame": "",
"label": "",
"optional": False,
"id": ""
},
"pressure": {
"definition": "http://sweet.jpl.nasa.gov/2.0/physThermo.owl#Pressure",
"description": "",
"domain_id": "time_domain",
"label": "",
"type_": "Coverage",
"range_id": "pressure_data",
"updatable": False,
"optional": True,
"id": ""
},
"element_type": {
"definition": "Ref to SeaBird data?",
"description": "",
"updatable": False,
"type_": "ElementType",
"data_record_id": "data_record",
"label": "",
"optional": False,
"id": ""
},
"time": {
"definition": "http://www.opengis.net/def/property/OGC/0/SamplingTime",
"reference_frame": "http://www.opengis.net/def/trs/OGC/0/GPS",
"nil_values_ids": [
"nan_value"
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "s",
"reference": ""
},
"description": "",
"quality_id": "",
"axis": "time",
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "vertex",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
]
},
"label": "",
"type_": "CoordinateAxis",
"bounds_id": "",
"updatable": True,
"optional": False,
"id": "",
"values_path": "coordinates/time"
},
"temp_data": {
"definition": "http://sweet.jpl.nasa.gov/2.0/physThermo.owl#Temperature",
"reference_frame": "",
"nil_values_ids": [
"nan_value"
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "Cel",
"reference": ""
},
"description": "",
"quality_id": "",
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "vertex",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
[
-10,
50
]
]
},
"label": "",
"type_": "RangeSet",
"bounds_id": "",
"updatable": True,
"optional": False,
"id": "",
"values_path": "fields/temp_data"
},
"time_domain": {
"definition": "Spec for ctd data time domain",
"coordinate_vector_id": "coordinate_vector",
"description": "",
"label": "",
"type_": "Domain",
"mesh_id": "point_timeseries",
"updatable": "False",
"optional": "False",
"id": ""
},
"latitude": {
"definition": "http://sweet.jpl.nasa.gov/2.0/spaceCoordinates.owl#Latitude",
"reference_frame": "",
"nil_values_ids": [
"nan_value"
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "deg",
"reference": ""
},
"description": "",
"quality_id": "",
"axis": "Latitude",
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "vertex",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
[
-90,
90
]
]
},
"label": "",
"type_": "CoordinateAxis",
"bounds_id": "",
"updatable": True,
"optional": False,
"id": "",
"values_path": "coordinates/latitude"
},
"cndr_data": {
"definition": "http://sweet.jpl.nasa.gov/2.0/physThermo.owl#Conductivity",
"reference_frame": "",
"nil_values_ids": [
"nan_value"
],
"unit_of_measure": {
"type_": "UnitReferenceProperty",
"code": "mS/cm",
"reference": ""
},
"description": "",
"quality_id": "",
"mesh_location": {
"definition": "",
"reference_frame": "",
"description": "",
"quality_id": "",
"axisID": "",
"constraint": {
"pattern": "",
"type_": "AllowedTokens",
"values": [
]
},
"code_space": "",
"value": "vertex",
"updatable": True,
"type_": "CategoryElement",
"nil_value_ids": [
],
"label": "",
"optional": False,
"id": ""
},
"constraint": {
"significant_figures": -1,
"type_": "AllowedValues",
"intervals": [
],
"values": [
[
0,
85
]
]
},
"label": "",
"type_": "RangeSet",
"bounds_id": "",
"updatable": True,
"optional": False,
"id": "",
"values_path": "fields/cndr_data"
},
"ctd_data": {
"description": "Conductivity temperature and depth observations from a Seabird CTD",
"element_type_id": "element_type",
"element_count_id": "record_count",
"type_": "DataStream",
"label": "Seabird CTD Data",
"encoding_id": "stream_encoding",
"values": None,
"id": None
},
"conductivity": {
"definition": "http://sweet.jpl.nasa.gov/2.0/physThermo.owl#Conductivity",
"description": "",
"domain_id": "time_domain",
"label": "",
"type_": "Coverage",
"range_id": "cndr_data",
"updatable": False,
"optional": True,
"id": ""
}
},
"stream_resource_id": "55cf756bb3cc4245bf90f727cc27e984",
"type_": "StreamDefinitionContainer",
"data_stream_id": "ctd_data"
}
def get_hdf_data(path):
hdf = {
'/east/conductivity':[1,2,3,4,5],
'/west/conductivity':[1,2,3,4,5],
'/east/temperature':[48,48,48,48,48],
'/west/temperature':[46,46,46,46,46],
'/east/pressure':[0,20,40,60],
'/west/pressure':[0,20,40,60]
}
return hdf[path]
def records(n, sub):
while True:
yval = []
try:
for i in xrange(sub):
yval = yval + [n.pop(0)]
yield yval
except IndexError:
if yval:
yield yval
break
class tree(dict):
def __init__(self, **kwargs):
for k,v in kwargs.iteritems():
if isinstance(v, dict):
self[k] = tree(**v)
else:
self[k] = v
def __getattr__(self, key):
if not hasattr(self, key):
return None
return self[key]
def __setattr__(self, key,val):
self[key] = val
def __dir__(self):
v = dir(super(tree,self))
return v + self.keys()
def key(definition, key):
return definition['identifiables'][key]
root_node = None
'''
For every
'''
def traverse(definition, node_id):
tmp = key(definition,node_id)
root = tree(**tmp)
for k,v in tmp.iteritems():
if k.endswith('_id'):
if v:
root[v] = traverse(definition,v)
elif k.endswith('_ids'):
for value in v:
if value:
root[value] = traverse(definition,value)
return root
def to_tree(definition):
identifiables = tree(**definition['identifiables'])
data_stream_id = definition['data_stream_id']
data_stream = tree(**identifiables[definition['data_stream_id']])
data_stream.encoding = tree(**identifiables[data_stream.encoding_id])
data_stream.element_type = tree(**identifiables[data_stream.element_type_id])
data_stream.element_count = tree(**identifiables[data_stream.element_count_id])
data_stream.element_type.data_record = tree(**identifiables[data_stream.element_type.data_record_id])
data_record = data_stream.element_type.data_record
data_record.fields = tree()
for field_id in data_record.field_ids:
data_record.fields[field_id] = tree(**identifiables[field_id])
for k,field in data_record.fields.iteritems():
data_record.fields[k].domain = tree(**identifiables[field.domain_id])
data_record.fields[k].range = tree(**identifiables[field.range_id])
return data_stream
if __name__ == '__main__':
from pprint import pprint
root = traverse(definition,definition['data_stream_id'])
pprint(root)
@dstuebe
Copy link

dstuebe commented Feb 24, 2012

Why aren't you using the dot notation?
granule.identifiables ?

@lukecampbell
Copy link
Author

because granule is a dictionary not a dotdict
in this script

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment