Created
February 17, 2017 13:41
-
-
Save mmartini-usgs/365e0072dcb5986ecb1a069cb527941e to your computer and use it in GitHub Desktop.
Demonstrating the problem opening large netcdf4 files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This is raw ADCP data, continuous at 2 Hz, 1 m bins deployed at MVCO in 22.5 m\n", | |
"The big file is a netCDF4 file with over 3 million ensembles\n", | |
"The small file is a netCDF4 file with 9999 ensembles\n", | |
"\n", | |
"From my explorations, there is no automatic way to average the variables in this netCDF file\n", | |
"* I can't open the large netcdf file, I suspect it is too large. So I can write it as a large netCDF4, but can't work with it in python. I can open the big file in MATLAB and get data out of it (not the whole thing, but I can very quickly access small slices). I can't do this with python. What am I missing?\n", | |
"* So working with the much smaller file, I am trying to demonstrate proof of concept - is there a python method that will bin-average a netCDF file?\n", | |
" * I learn how to open and inspect a dataset, very convenient\n", | |
" * I can plot - very convenient\n", | |
" * I experiment with rolling, " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import netCDF4 as nc\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import xarray as xr" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#bigfile = 'C:\\\\data\\\\wh767mvco\\\\67DEP000B.cdf'\n", | |
"smallfile = 'C:\\\\data\\\\wh767mvco\\\\67DEP000short.cdf'\n", | |
"deltat = 2 #sec\n", | |
"burstlengthsec = 10*60 # 10 min burst length\n", | |
"burstlengthens = burstlengthsec/deltat\n", | |
"chunksize = burstlengthens" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"41\n" | |
] | |
} | |
], | |
"source": [ | |
"# this got an unknown error\n", | |
"#ds = xr.open_dataset(bigfile)\n", | |
"# this got an uknown error\n", | |
"#ds = xr.open_dataset(bigfile, chunks={'rec': 3600})\n", | |
"# this seemed to open, but puked on the time variable\n", | |
"# ValueError: unable to decode time units 'msec since 0:00 GMT' with the default calendar. \n", | |
"# Try opening your dataset with decode_times=False.\n", | |
"#ds = xr.open_dataset(smallfile, chunks={'rec': 3600})\n", | |
"# this worked\n", | |
"ds = xr.open_dataset(smallfile, chunks={'rec': 3600}, decode_times=False)\n", | |
"print(len(ds))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'xarray.core.dataset.Dataset'>\n", | |
"41\n", | |
"41\n" | |
] | |
} | |
], | |
"source": [ | |
"print(type(ds))\n", | |
"print(len(ds)) # this would be the number of variables, not the record length\n", | |
"# this is because ds is essentially a dictionary, and thus it's lenght is the number of keys\n", | |
"print(len(ds.keys()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.Dataset>\n", | |
"Dimensions: (depth: 23, rec: 9999)\n", | |
"Coordinates:\n", | |
" * rec (rec) float64 5e+04 5e+04 5e+04 5e+04 5.000e+04 5.001e+04 ...\n", | |
" * depth (depth) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ...\n", | |
"Data variables:\n", | |
" time (rec) int32 2457117 2457117 2457117 2457117 2457117 2457117 ...\n", | |
" time2 (rec) int32 13599999 13602000 13603999 13605999 13608000 ...\n", | |
" sv (rec) float64 1.504e+03 1.504e+03 1.504e+03 1.504e+03 ...\n", | |
" vel1 (rec, depth) float64 31.0 3.0 6.0 23.0 51.0 -5.0 9.0 62.0 -7.0 ...\n", | |
" vel2 (rec, depth) float64 -24.0 11.0 -8.0 -8.0 -1.0 -4.0 33.0 7.0 ...\n", | |
" vel3 (rec, depth) float64 -51.0 -2.0 -52.0 -17.0 -37.0 -55.0 -54.0 ...\n", | |
" vel4 (rec, depth) float64 24.0 101.0 46.0 82.0 84.0 54.0 117.0 68.0 ...\n", | |
" cor1 (rec, depth) float64 132.0 120.0 130.0 127.0 129.0 128.0 133.0 ...\n", | |
" cor2 (rec, depth) float64 121.0 137.0 144.0 123.0 130.0 133.0 120.0 ...\n", | |
" cor3 (rec, depth) float64 123.0 113.0 141.0 122.0 119.0 128.0 122.0 ...\n", | |
" cor4 (rec, depth) float64 138.0 116.0 139.0 126.0 117.0 141.0 129.0 ...\n", | |
" AGC1 (rec, depth) float64 137.0 127.0 120.0 119.0 118.0 115.0 112.0 ...\n", | |
" AGC2 (rec, depth) float64 132.0 134.0 131.0 117.0 117.0 111.0 104.0 ...\n", | |
" AGC3 (rec, depth) float64 140.0 134.0 132.0 122.0 122.0 116.0 111.0 ...\n", | |
" AGC4 (rec, depth) float64 146.0 139.0 140.0 121.0 128.0 121.0 113.0 ...\n", | |
" PGd1 (rec, depth) float64 100.0 100.0 100.0 100.0 100.0 100.0 100.0 ...\n", | |
" PGd2 (rec, depth) float64 100.0 100.0 100.0 100.0 100.0 100.0 100.0 ...\n", | |
" PGd3 (rec, depth) float64 100.0 100.0 100.0 100.0 100.0 100.0 100.0 ...\n", | |
" PGd4 (rec, depth) float64 100.0 100.0 100.0 100.0 100.0 100.0 100.0 ...\n", | |
" Hdg (rec) float64 1.207e+04 1.207e+04 1.207e+04 1.208e+04 ...\n", | |
" Ptch (rec) float64 -44.0 -44.0 -43.0 -45.0 -45.0 -44.0 -45.0 -44.0 ...\n", | |
" Roll (rec) float64 51.0 52.0 51.0 52.0 51.0 51.0 51.0 51.0 50.0 ...\n", | |
" HdgSTD (rec) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n", | |
" PtchSTD (rec) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n", | |
" RollSTD (rec) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n", | |
" Tx (rec) float64 1.41e+03 1.411e+03 1.41e+03 1.41e+03 1.412e+03 ...\n", | |
" S (rec) float64 35.0 35.0 35.0 35.0 35.0 35.0 35.0 35.0 35.0 ...\n", | |
" xmitc (rec) float64 155.0 155.0 155.0 155.0 155.0 155.0 155.0 155.0 ...\n", | |
" xmitv (rec) float64 138.0 138.0 138.0 138.0 138.0 138.0 138.0 138.0 ...\n", | |
" dac (rec) float64 100.0 100.0 100.0 100.0 100.0 100.0 100.0 100.0 ...\n", | |
" VDD3 (rec) float64 98.0 98.0 98.0 98.0 98.0 98.0 98.0 98.0 98.0 ...\n", | |
" VDD1 (rec) float64 53.0 53.0 53.0 53.0 53.0 53.0 52.0 52.0 52.0 ...\n", | |
" VDC (rec) float64 95.0 95.0 95.0 95.0 95.0 95.0 95.0 95.0 95.0 ...\n", | |
" EWD1 (rec) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n", | |
" EWD2 (rec) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n", | |
" EWD3 (rec) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n", | |
" EWD4 (rec) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n", | |
" Pressure (rec) float64 2.088e+04 2.089e+04 2.092e+04 2.091e+04 2.09e+04 ...\n", | |
" PressVar (rec) float64 38.0 15.0 19.0 17.0 11.0 26.0 9.0 6.0 14.0 1.0 ...\n", | |
"Attributes:\n", | |
" history: translated to netCDF by adcpcurrents2cdf.py\n", | |
" TRDI_System_Bandwidth: 0\n", | |
" TRDI_Base_Frequency_Index: 0\n", | |
" TRDI_Error_Velocity_Threshold: 2000\n", | |
" TRDI_Speed_of_sound_sensor_available: No\n", | |
" TRDI_Beam_Pattern: Convex\n", | |
" TRDI_Pitch_sensor_available: Yes\n", | |
" TRDI_Uses_EH_from_transducer_heading_sensor: Yes\n", | |
" TRDI_Sensor_Source_Byte: 1111101.0\n", | |
" TRDI_Bin_1_distance_cm: 211\n", | |
" TRDI_3-Beam_Solution_Used: No\n", | |
" TRDI_Tilts_Used: No\n", | |
" TRDI_Low_Corr_Threshold: 64\n", | |
" TRDI_Sensor_Avail_Byte: 111101.0\n", | |
" TRDI_Simulated_Data: 0\n", | |
" TRDI_Uses_ED_from_depth_sensor: Yes\n", | |
" TRDI_Pings_Per_Ensemble: 2\n", | |
" TRDI_No._Code_Reps: 5\n", | |
" TRDI_Heading_Alignment_Hundredths_of_Deg.: 0\n", | |
" TRDI_Uses_ER_from_transducer_roll_sensor: Yes\n", | |
" TRDI_Roll_sensor_available: Yes\n", | |
" TRDI_Time_Between_Ping Groups: 000:01:00\n", | |
" TRDI_System_Configuration_LSB: 11001011.0\n", | |
" TRDI_Beam_Angle: 20\n", | |
" TRDI_Calculate_EC_from_ED_ES_and_ET: Yes\n", | |
" TRDI_Sensor_Configuration: 1\n", | |
" TRDI_System_Power: 255\n", | |
" TRDI_Orientation: Up-facing beams\n", | |
" TRDI_Ref_Lyr_Avg_Starting_cell: 1\n", | |
" TRDI_Blank_after_Transmit_cm: 88\n", | |
" TRDI_Depth_sensor_available: Yes\n", | |
" TRDI_Transmit_lag_distance_cm: 25\n", | |
" TRDI_Temperature_sensor_available: Yes\n", | |
" TRDI_System_Configuration_MSB: 1000001.0\n", | |
" TRDI_System_Frequency: 600\n", | |
" TRDI_False_Target_Threshold: 50\n", | |
" TRDI_Bin_Mapping_Used: Yes\n", | |
" TRDI_Coord_Transform_LSB: 1.0\n", | |
" TRDI_Uses_ES_from_conductivity_sensor: No\n", | |
" TRDI_Depth_Cell_Length_cm: 100\n", | |
" TRDI_Coord_Transform: BEAM\n", | |
" TRDI_Signal_Processing_Mode: 1\n", | |
" TRDI_Uses_ET_from_transducer_temperature_sensor: Yes\n", | |
" TRDI_Heading_Bias_Hundredths_of_Deg.: 0\n", | |
" TRDI_Uses_EP_from_transducer_pitch_sensor: Yes\n", | |
" TRDI_Transducer_Head_Is_Attached: Yes\n", | |
" TRDI_Lag_Length: 53\n", | |
" TRDI_CPU_Version: 51.203\n", | |
" TRDI_Ref_Lyr_Avg_Ending_cell: 5\n", | |
" TRDI_CPU_Board_Serial_Number: 70000d487e09\n", | |
" TRDI_Conductivity_sensor_available: No\n", | |
" TRDI_Beam_Configuration: 4-bm janus\n", | |
" TRDI_Heading_sensor_available: Yes\n", | |
" TRDI_Xmit_pulse_length_cm: 122\n", | |
" TRDI_Number_of_Beams: 4\n", | |
" TRDI_Number_of_Cells: 23\n", | |
" TRDI_PGd_Minimum: 0" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds # prints the properties of the dataset" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda env:IOOS3]", | |
"language": "python", | |
"name": "conda-env-IOOS3-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment