Skip to content

Instantly share code, notes, and snippets.

@joefutrelle
Last active May 1, 2018 17:54
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joefutrelle/4968837c10091edb97296e8cfc3ed0ff to your computer and use it in GitHub Desktop.
Save joefutrelle/4968837c10091edb97296e8cfc3ed0ff to your computer and use it in GitHub Desktop.
create CF compliant NetCDF files from MVCO nutrient data
import os
from scipy.io import loadmat
import pandas as pd
from pocean.dsg.timeseriesProfile.om import OrthogonalMultidimensionalTimeseriesProfile as OMTP
MAT_FILE = '/vagrant/nut_data_reps.mat'
OUT_DIR = './output'
mat = loadmat(MAT_FILE, squeeze_me=True)
# construct dataframe
col_map = {
'Event_Number': 'event_number',
'Event_Number_Niskin': 'event_number_niskin',
'Latitude': 'y',
'Longitude': 'x',
'Depth': 'z',
'Nut_a_uM NO2- + NO3-': 'ntra_a',
'Nut_b_uM NO2- + NO3-': 'ntra_b',
'Nut_c_uM NO2- + NO3-': 'ntra_c',
'Nut_a_uM NH4+': 'amon_a',
'Nut_b_uM NH4+': 'amon_b',
'Nut_c_uM NH4+': 'amon_c',
'Nut_a_uM SiO2-': 'slca_a',
'Nut_b_uM SiO2-': 'slca_b',
'Nut_c_uM SiO2-': 'scla_c',
'Nut_a_uM PO43-': 'phos_a',
'Nut_b_uM PO43-': 'phos_b',
'Nut_c_uM PO43-': 'phos_c',
}
cols = mat['header_nut']
d = {}
for i, col in enumerate(cols):
d[col] = pd.Series(list(mat['MVCO_nut_reps'][:,i]))
df = pd.DataFrame(d, columns=cols)
# compute datetimes from start date and incorrect start time cols
dt = []
for d, t in zip(df['Start_Date'], df['Start_Time_UTC']):
dt.append(pd.to_datetime('{}T{}Z'.format(d[:10],t[11:])))
dt = pd.Series(dt)
# add to dataframe
df['t'] = dt
del df['Start_Date']
del df['Start_Time_UTC']
df = df.rename(columns=col_map)
df['z'] = 0 - df['z']
for event_number, sdf in df.groupby('event_number'):
sdf['station'] = [0 for _ in range(len(sdf))]
outpath = os.path.join(OUT_DIR,'{}.nc'.format(event_number))
print('writing {}...'.format(outpath))
OMTP.from_dataframe(sdf, outpath)
<dataset type="EDDTableFromNcCFFiles" datasetID="lter-nutrient" active="true">
<reloadEveryNMinutes>10080</reloadEveryNMinutes>
<updateEveryNMillis>10000</updateEveryNMillis>
<fileDir>/home/vagrant/lter-poc/output/</fileDir>
<fileNameRegex>.*\.nc</fileNameRegex>
<recursive>true</recursive>
<pathRegex>.*</pathRegex>
<metadataFrom>last</metadataFrom>
<preExtractRegex></preExtractRegex>
<postExtractRegex></postExtractRegex>
<extractRegex></extractRegex>
<columnNameForExtract></columnNameForExtract>
<sortFilesBySourceNames></sortFilesBySourceNames>
<fileTableInMemory>false</fileTableInMemory>
<accessibleViaFiles>false</accessibleViaFiles>
<!-- sourceAttributes>
<att name="_NCProperties">version=1|netcdflibversion=4.5.0|hdf5libversion=1.10.1</att>
<att name="cdm_data_type">TimeSeriesProfile</att>
<att name="cdm_timeseries_variables">t, crs</att>
<att name="Conventions">CF-1.6</att>
<att name="date_created">2017-12-15T18:03:00Z</att>
<att name="featureType">timeSeriesProfile</att>
<att name="subsetVariables">t, crs</att>
</sourceAttributes -->
<addAttributes>
<att name="Conventions">CF-1.6, COARDS, ACDD-1.3</att>
<att name="infoUrl">???</att>
<att name="institution">WHOI</att>
<att name="keywords">amon, amon_a, amon_b, amon_c, crs, data, event, event_number, event_number_niskin, identifier, local, niskin, ntra, ntra_a, ntra_b, ntra_c, number, phos_a, phos_b, phos_c, phosphate, scla, scla_c, slca, slca_a, slca_b, source, station, statistics, time</att>
<att name="license">[standard]</att>
<att name="sourceUrl">(local files)</att>
<att name="standard_name_vocabulary">CF Standard Name Table v29</att>
<att name="summary">MVCO nutrient data (2003-present)</att>
<att name="title">MVCO nutrient data (2003-present)</att>
<att name="cdm_data_type">TimeSeriesProfile</att>
<att name="featureType">timeSeriesProfile</att>
<att name="cdm_timeseries_variables">time</att>
<att name="cdm_profile_variables">z</att>
<att name="cdm_altitude_proxy">z</att>
<att name="subsetVariables">time, latitude, longitude, crs</att>
</addAttributes>
<dataVariable>
<sourceName>station</sourceName>
<destinationName>station</destinationName>
<dataType>long</dataType>
<!-- sourceAttributes>
<att name="cf_role">timeseries_id</att>
<att name="long_name">station identifier</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Identifier</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>y</sourceName>
<destinationName>latitude</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="axis">Y</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Location</att>
<att name="long_name">Y</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>x</sourceName>
<destinationName>longitude</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="axis">X</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Location</att>
<att name="long_name">X</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>crs</sourceName>
<destinationName>crs</destinationName>
<dataType>int</dataType>
<!-- sourceAttributes>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Unknown</att>
<att name="long_name">CRS</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>t</sourceName>
<destinationName>time</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="axis">T</att>
<att name="standard_name">time</att>
<att name="units">seconds since 1990-01-01 00:00:00Z</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Time</att>
<att name="long_name">T</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>z</sourceName>
<destinationName>z</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="axis">Z</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Location</att>
<att name="long_name">Z</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>event_number</sourceName>
<destinationName>event_number</destinationName>
<dataType>String</dataType>
<!-- sourceAttributes>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="cf_role">profile_id</att>
<att name="colorBarMaximum" type="double">100.0</att>
<att name="colorBarMinimum" type="double">0.0</att>
<att name="coordinates">null</att>
<att name="ioos_category">Statistics</att>
<att name="long_name">Event Number</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>event_number_niskin</sourceName>
<destinationName>event_number_niskin</destinationName>
<dataType>String</dataType>
<!-- sourceAttributes>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="colorBarMaximum" type="double">100.0</att>
<att name="colorBarMinimum" type="double">0.0</att>
<att name="coordinates">null</att>
<att name="ioos_category">Statistics</att>
<att name="long_name">Event Number Niskin</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>ntra_a</sourceName>
<destinationName>ntra_a</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Ntra A</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>ntra_b</sourceName>
<destinationName>ntra_b</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Ntra B</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>ntra_c</sourceName>
<destinationName>ntra_c</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Ntra C</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>amon_a</sourceName>
<destinationName>amon_a</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Amon A</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>amon_b</sourceName>
<destinationName>amon_b</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Amon B</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>amon_c</sourceName>
<destinationName>amon_c</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Amon C</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>slca_a</sourceName>
<destinationName>slca_a</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Slca A</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>slca_b</sourceName>
<destinationName>slca_b</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Slca B</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>scla_c</sourceName>
<destinationName>scla_c</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Scla C</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>phos_a</sourceName>
<destinationName>phos_a</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Phos A</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>phos_b</sourceName>
<destinationName>phos_b</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Phos B</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>phos_c</sourceName>
<destinationName>phos_c</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Phos C</att>
</addAttributes>
</dataVariable>
</dataset>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment