-
-
Save dionhaefner/51ef93980a87d6b6bb557599b79582da to your computer and use it in GitHub Desktop.
Jupyter notebooks generating plots and statistics for the paper "FOWD: A Free Ocean Wave Dataset for Data Mining and Machine Learning" (Häfner et al.)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Assemble FOWD metadata LaTeX table" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline\n", | |
"import math\n", | |
"import os\n", | |
"import json\n", | |
"import glob\n", | |
"from collections import defaultdict\n", | |
"\n", | |
"import matplotlib.pyplot as plt\n", | |
"import numpy as np\n", | |
"import xarray as xr\n", | |
"import seaborn as sns\n", | |
"import tqdm\n", | |
"\n", | |
"sns.set_palette('muted')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"infile = '/groups/ocean/dhaefner/fowd-out-v5/fowd_cdip_098p1.nc'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds = xr.open_dataset(infile).isel(wave_id_local=11726, meta_station_name=0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"variables = (\n", | |
" ['meta_station_name'] + [var for var in ds if var.startswith('meta_')],\n", | |
" ['wave_id_local'] + [var for var in ds if var.startswith('wave_')],\n", | |
" [var for var in ds if var.startswith('sea_state_30m_')],\n", | |
" [var for var in ds if var.startswith('direction_')],\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dagger_vars = (\n", | |
" 'wave_zero_crossing_wavelength',\n", | |
" 'sea_state_30m_significant_wave_height_spectral',\n", | |
" 'sea_state_30m_peak_wave_period',\n", | |
" 'sea_state_30m_peak_wavelength',\n", | |
" 'sea_state_30m_steepness',\n", | |
" 'sea_state_30m_bandwidth_peakedness',\n", | |
" 'sea_state_30m_bandwidth_narrowness',\n", | |
" 'sea_state_30m_benjamin_feir_index_peakedness',\n", | |
" 'sea_state_30m_benjamin_feir_index_narrowness',\n", | |
" 'sea_state_30m_groupiness_spectral',\n", | |
" 'sea_state_30m_energy_in_frequency_interval',\n", | |
" 'sea_state_30m_rel_energy_in_frequency_interval',\n", | |
" 'direction_dominant_spread_in_frequency_interval',\n", | |
" 'direction_dominant_direction_in_frequency_interval',\n", | |
" 'direction_directionality_index',\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pprint_units = {\n", | |
" 'meters': '\\\\si{\\\\metre}',\n", | |
" 'hertz': '\\\\si{\\\\hertz}',\n", | |
" 'm s-1': '\\\\si{\\\\metre\\\\per\\\\second}',\n", | |
" 'watts': '\\\\si{\\\\watt}',\n", | |
" 'degrees_north': '$^\\\\circ$N',\n", | |
" 'degrees_east': '$^\\\\circ$E',\n", | |
" 'degrees': '$^\\\\circ$',\n", | |
" 'J m-2': '\\\\si{\\\\joule\\\\per\\\\metre\\\\squared}',\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\\param{meta\\_station\\_name} & Name of original measurement station & --- & \\param{CDIP\\_098p1} \\\\\n", | |
"\\param{meta\\_source\\_file\\_name} & File name of raw input data file & --- & \\param{098p1\\_d01.nc} \\\\\n", | |
"\\param{meta\\_source\\_file\\_uuid} & UUID of raw input data file & --- & \\param{CC54C8D5\\allowbreak-7B1B\\allowbreak-4170\\allowbreak-9DBA\\allowbreak-EBFD91F26F14} \\\\\n", | |
"\\param{meta\\_deploy\\_latitude} & Deploy latitude of instrument & $^\\circ$N & \\param{21.4156} \\\\\n", | |
"\\param{meta\\_deploy\\_longitude} & Deploy longitude of instrument & $^\\circ$E & \\param{-157.678} \\\\\n", | |
"\\param{meta\\_water\\_depth} & Water depth at deployment location & \\si{\\metre} & \\param{100.0} \\\\\n", | |
"\\param{meta\\_sampling\\_rate} & Measurement sampling frequency in time & \\si{\\hertz} & \\param{1.28} \\\\\n", | |
"\\param{meta\\_frequency\\_band\\_lower} & Lower limit of frequency band & \\si{\\hertz} & \\param{[0.0, 0.05, 0.1, 0.25, 0.08]} \\\\\n", | |
"\\param{meta\\_frequency\\_band\\_upper} & Upper limit of frequency band & \\si{\\hertz} & \\param{[0.05, 0.1, 0.25, 1.5, 0.5]} \\\\\n", | |
"\n", | |
"\n", | |
"\\param{wave\\_id\\_local} & Incrementing wave ID for given station & --- & \\param{11726} \\\\\n", | |
"\\param{wave\\_start\\_time} & Wave start time & --- & \\param{2000-08-10T\\allowbreak12:18:44.220000000} \\\\\n", | |
"\\param{wave\\_end\\_time} & Wave end time & --- & \\param{2000-08-10T\\allowbreak12:18:50.470000000} \\\\\n", | |
"\\param{wave\\_zero\\_crossing\\_period} & Wave zero-crossing period relative to 30m sea surface elevation & \\si{\\second} & \\param{5.644304276} \\\\\n", | |
"\\param{wave\\_zero\\_crossing\\_wavelength}${}^\\dagger$ & Wave zero-crossing wavelength relative to 30m sea surface elevation & \\si{\\metre} & \\param{49.74048} \\\\\n", | |
"\\param{wave\\_raw\\_elevation} & Raw surface elevation relative to 30m sea surface elevation & \\si{\\metre} & \\param{[0.200261, 0.889527, 0.509184, -0.550564, -0.690152, -0.270083, -0.200052]} \\\\\n", | |
"\\param{wave\\_crest\\_height} & Wave crest height relative to 30m sea surface elevation & \\si{\\metre} & \\param{0.889527} \\\\\n", | |
"\\param{wave\\_trough\\_depth} & Wave trough depth relative to 30m sea surface elevation & \\si{\\metre} & \\param{-0.690152} \\\\\n", | |
"\\param{wave\\_height} & Absolute wave height relative to 30m sea surface elevation & \\si{\\metre} & \\param{1.579679} \\\\\n", | |
"\\param{wave\\_ursell\\_number} & Ursell number & 1 & \\param{0.003908} \\\\\n", | |
"\\param{wave\\_maximum\\_elevation\\_slope} & Maximum slope of surface elevation in time & \\si{\\metre\\per\\second} & \\param{0.921658} \\\\\n", | |
"\n", | |
"\n", | |
"\\param{sea\\_state\\_30m\\_start\\_time} & Sea state aggregation start time & --- & \\param{2000-08-10T\\allowbreak11:48:45.000999936} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_end\\_time} & Sea state aggregation end time & --- & \\param{2000-08-10T\\allowbreak12:18:43.438000000} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_significant\\_wave\\_height\\_spectral}${}^\\dagger$ & Significant wave height estimated from wave spectrum (Hm0) & \\si{\\metre} & \\param{1.798395} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_significant\\_wave\\_height\\_direct} & Significant wave height estimated from wave history (H1/3) & \\si{\\metre} & \\param{1.648174} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_maximum\\_wave\\_height} & Maximum wave height estimated from wave history & \\si{\\metre} & \\param{3.18891} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_rel\\_maximum\\_wave\\_height} & Maximum wave height estimated from wave history relative to spectral significant wave height & 1 & \\param{1.773198} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_mean\\_period\\_direct} & Mean zero-crossing period estimated from wave history & \\si{\\second} & \\param{5.133130549} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_mean\\_period\\_spectral} & Mean zero-crossing period estimated from wave spectrum & \\si{\\second} & \\param{5.034029007} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_skewness} & Skewness of sea surface elevation & 1 & \\param{0.010083} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_kurtosis} & Excess kurtosis of sea surface elevation & 1 & \\param{-0.076898} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_valid\\_data\\_ratio} & Ratio of valid measurements to all measurements & 1 & \\param{1.0} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_peak\\_wave\\_period}${}^\\dagger$ & Dominant wave period & \\si{\\second} & \\param{6.841089249} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_peak\\_wavelength}${}^\\dagger$ & Dominant wavelength & \\si{\\metre} & \\param{73.07008} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_steepness}${}^\\dagger$ & Dominant wave steepness & 1 & \\param{0.054674} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_bandwidth\\_peakedness}${}^\\dagger$ & Spectral bandwidth estimated through spectral peakedness (quality factor) & 1 & \\param{0.312186} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_bandwidth\\_narrowness}${}^\\dagger$ & Spectral bandwidth estimated through spectral narrowness & 1 & \\param{0.43569} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_benjamin\\_feir\\_index\\_peakedness}${}^\\dagger$ & Benjamin-Feir index estimated through steepness and peakedness & 1 & \\param{0.164307} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_benjamin\\_feir\\_index\\_narrowness}${}^\\dagger$ & Benjamin-Feir index estimated through steepness and narrowness & 1 & \\param{0.117731} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_crest\\_trough\\_correlation} & Crest-trough correlation parameter (r) estimated from spectral density & 1 & \\param{0.608416} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_energy\\_in\\_frequency\\_interval}${}^\\dagger$ & Total energy density contained in frequency band & \\si{\\joule\\per\\metre\\squared} & \\param{[1.935885, 106.74948, 1620.2413, 301.649, 1926.3574]} \\\\\n", | |
"\\param{sea\\_state\\_30m\\_rel\\_energy\\_in\\_frequency\\_interval}${}^\\dagger$ & Relative energy contained in frequency band & 1 & \\param{[0.000953, 0.052571, 0.797922, 0.148553, 0.948675]} \\\\\n", | |
"\n", | |
"\n", | |
"\\param{direction\\_sampling\\_time} & Time at which directional quantities are sampled & --- & \\param{2000-08-10T\\allowbreak12:11:52.000000000} \\\\\n", | |
"\\param{direction\\_dominant\\_spread\\_in\\_frequency\\_interval}${}^\\dagger$ & Dominant directional spread in frequency band & $^\\circ$ & \\param{[57.965824, 38.118546, 31.54562, 39.30281, 33.07898]} \\\\\n", | |
"\\param{direction\\_dominant\\_direction\\_in\\_frequency\\_interval}${}^\\dagger$ & Dominant wave direction in frequency band & $^\\circ$ & \\param{[83.074, 136.02432, 74.00862, 77.26602, 74.89502]} \\\\\n", | |
"\\param{direction\\_peak\\_wave\\_direction} & Peak wave direction relative to normal-north & $^\\circ$ & \\param{70.46875} \\\\\n", | |
"\\param{direction\\_directionality\\_index}${}^\\dagger$ & Directionality index R (squared ratio of directional spread and spectral bandwidth) & 1 & \\param{0.924404} \\\\\n", | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"def escape(v):\n", | |
" return str(v).replace('_', '\\\\_')\n", | |
"\n", | |
"for var_set in variables:\n", | |
" for var in var_set:\n", | |
" var_esc = escape(var)\n", | |
"\n", | |
" dagger = ''\n", | |
" if var in dagger_vars:\n", | |
" dagger = '${}^\\\\dagger$'\n", | |
"\n", | |
" unit = ds[var].units if hasattr(ds[var], 'units') else '---'\n", | |
" if unit in pprint_units:\n", | |
" unit = pprint_units[unit]\n", | |
" unit = escape(unit)\n", | |
"\n", | |
" value = ds[var].values\n", | |
"\n", | |
" if value.size == 1 and np.issubdtype(value.dtype, np.floating):\n", | |
" value = np.float32(round(float(value), 6))\n", | |
"\n", | |
" elif np.issubdtype(value.dtype, np.timedelta64):\n", | |
" value = value / np.timedelta64(1, 's')\n", | |
" unit = '\\\\si{\\\\second}'\n", | |
"\n", | |
" elif np.issubdtype(value.dtype, np.datetime64):\n", | |
" value = str(value).replace('T', 'T\\\\allowbreak')\n", | |
" \n", | |
" elif value.size != 1:\n", | |
" value = [round(v, 6) for v in value]\n", | |
"\n", | |
" value = escape(value)\n", | |
"\n", | |
" if var == 'meta_source_file_uuid':\n", | |
" value = value.replace('-', '\\\\allowbreak-')\n", | |
"\n", | |
" print(f'\\\\param{{{var_esc}}}{dagger} & {ds[var].long_name} & {unit} & \\\\param{{{value}}} \\\\\\\\')\n", | |
" \n", | |
" print('\\n')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment