Skip to content

Instantly share code, notes, and snippets.

@dionhaefner
Last active October 9, 2023 13:03
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save dionhaefner/51ef93980a87d6b6bb557599b79582da to your computer and use it in GitHub Desktop.
Save dionhaefner/51ef93980a87d6b6bb557599b79582da to your computer and use it in GitHub Desktop.
Jupyter notebooks generating plots and statistics for the paper "FOWD: A Free Ocean Wave Dataset for Data Mining and Machine Learning" (Häfner et al.)
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Assemble FOWD metadata LaTeX table"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import math\n",
"import os\n",
"import json\n",
"import glob\n",
"from collections import defaultdict\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import xarray as xr\n",
"import seaborn as sns\n",
"import tqdm\n",
"\n",
"sns.set_palette('muted')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"infile = '/groups/ocean/dhaefner/fowd-out-v5/fowd_cdip_098p1.nc'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"ds = xr.open_dataset(infile).isel(wave_id_local=11726, meta_station_name=0)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"variables = (\n",
" ['meta_station_name'] + [var for var in ds if var.startswith('meta_')],\n",
" ['wave_id_local'] + [var for var in ds if var.startswith('wave_')],\n",
" [var for var in ds if var.startswith('sea_state_30m_')],\n",
" [var for var in ds if var.startswith('direction_')],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"dagger_vars = (\n",
" 'wave_zero_crossing_wavelength',\n",
" 'sea_state_30m_significant_wave_height_spectral',\n",
" 'sea_state_30m_peak_wave_period',\n",
" 'sea_state_30m_peak_wavelength',\n",
" 'sea_state_30m_steepness',\n",
" 'sea_state_30m_bandwidth_peakedness',\n",
" 'sea_state_30m_bandwidth_narrowness',\n",
" 'sea_state_30m_benjamin_feir_index_peakedness',\n",
" 'sea_state_30m_benjamin_feir_index_narrowness',\n",
" 'sea_state_30m_groupiness_spectral',\n",
" 'sea_state_30m_energy_in_frequency_interval',\n",
" 'sea_state_30m_rel_energy_in_frequency_interval',\n",
" 'direction_dominant_spread_in_frequency_interval',\n",
" 'direction_dominant_direction_in_frequency_interval',\n",
" 'direction_directionality_index',\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"pprint_units = {\n",
" 'meters': '\\\\si{\\\\metre}',\n",
" 'hertz': '\\\\si{\\\\hertz}',\n",
" 'm s-1': '\\\\si{\\\\metre\\\\per\\\\second}',\n",
" 'watts': '\\\\si{\\\\watt}',\n",
" 'degrees_north': '$^\\\\circ$N',\n",
" 'degrees_east': '$^\\\\circ$E',\n",
" 'degrees': '$^\\\\circ$',\n",
" 'J m-2': '\\\\si{\\\\joule\\\\per\\\\metre\\\\squared}',\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\param{meta\\_station\\_name} & Name of original measurement station & --- & \\param{CDIP\\_098p1} \\\\\n",
"\\param{meta\\_source\\_file\\_name} & File name of raw input data file & --- & \\param{098p1\\_d01.nc} \\\\\n",
"\\param{meta\\_source\\_file\\_uuid} & UUID of raw input data file & --- & \\param{CC54C8D5\\allowbreak-7B1B\\allowbreak-4170\\allowbreak-9DBA\\allowbreak-EBFD91F26F14} \\\\\n",
"\\param{meta\\_deploy\\_latitude} & Deploy latitude of instrument & $^\\circ$N & \\param{21.4156} \\\\\n",
"\\param{meta\\_deploy\\_longitude} & Deploy longitude of instrument & $^\\circ$E & \\param{-157.678} \\\\\n",
"\\param{meta\\_water\\_depth} & Water depth at deployment location & \\si{\\metre} & \\param{100.0} \\\\\n",
"\\param{meta\\_sampling\\_rate} & Measurement sampling frequency in time & \\si{\\hertz} & \\param{1.28} \\\\\n",
"\\param{meta\\_frequency\\_band\\_lower} & Lower limit of frequency band & \\si{\\hertz} & \\param{[0.0, 0.05, 0.1, 0.25, 0.08]} \\\\\n",
"\\param{meta\\_frequency\\_band\\_upper} & Upper limit of frequency band & \\si{\\hertz} & \\param{[0.05, 0.1, 0.25, 1.5, 0.5]} \\\\\n",
"\n",
"\n",
"\\param{wave\\_id\\_local} & Incrementing wave ID for given station & --- & \\param{11726} \\\\\n",
"\\param{wave\\_start\\_time} & Wave start time & --- & \\param{2000-08-10T\\allowbreak12:18:44.220000000} \\\\\n",
"\\param{wave\\_end\\_time} & Wave end time & --- & \\param{2000-08-10T\\allowbreak12:18:50.470000000} \\\\\n",
"\\param{wave\\_zero\\_crossing\\_period} & Wave zero-crossing period relative to 30m sea surface elevation & \\si{\\second} & \\param{5.644304276} \\\\\n",
"\\param{wave\\_zero\\_crossing\\_wavelength}${}^\\dagger$ & Wave zero-crossing wavelength relative to 30m sea surface elevation & \\si{\\metre} & \\param{49.74048} \\\\\n",
"\\param{wave\\_raw\\_elevation} & Raw surface elevation relative to 30m sea surface elevation & \\si{\\metre} & \\param{[0.200261, 0.889527, 0.509184, -0.550564, -0.690152, -0.270083, -0.200052]} \\\\\n",
"\\param{wave\\_crest\\_height} & Wave crest height relative to 30m sea surface elevation & \\si{\\metre} & \\param{0.889527} \\\\\n",
"\\param{wave\\_trough\\_depth} & Wave trough depth relative to 30m sea surface elevation & \\si{\\metre} & \\param{-0.690152} \\\\\n",
"\\param{wave\\_height} & Absolute wave height relative to 30m sea surface elevation & \\si{\\metre} & \\param{1.579679} \\\\\n",
"\\param{wave\\_ursell\\_number} & Ursell number & 1 & \\param{0.003908} \\\\\n",
"\\param{wave\\_maximum\\_elevation\\_slope} & Maximum slope of surface elevation in time & \\si{\\metre\\per\\second} & \\param{0.921658} \\\\\n",
"\n",
"\n",
"\\param{sea\\_state\\_30m\\_start\\_time} & Sea state aggregation start time & --- & \\param{2000-08-10T\\allowbreak11:48:45.000999936} \\\\\n",
"\\param{sea\\_state\\_30m\\_end\\_time} & Sea state aggregation end time & --- & \\param{2000-08-10T\\allowbreak12:18:43.438000000} \\\\\n",
"\\param{sea\\_state\\_30m\\_significant\\_wave\\_height\\_spectral}${}^\\dagger$ & Significant wave height estimated from wave spectrum (Hm0) & \\si{\\metre} & \\param{1.798395} \\\\\n",
"\\param{sea\\_state\\_30m\\_significant\\_wave\\_height\\_direct} & Significant wave height estimated from wave history (H1/3) & \\si{\\metre} & \\param{1.648174} \\\\\n",
"\\param{sea\\_state\\_30m\\_maximum\\_wave\\_height} & Maximum wave height estimated from wave history & \\si{\\metre} & \\param{3.18891} \\\\\n",
"\\param{sea\\_state\\_30m\\_rel\\_maximum\\_wave\\_height} & Maximum wave height estimated from wave history relative to spectral significant wave height & 1 & \\param{1.773198} \\\\\n",
"\\param{sea\\_state\\_30m\\_mean\\_period\\_direct} & Mean zero-crossing period estimated from wave history & \\si{\\second} & \\param{5.133130549} \\\\\n",
"\\param{sea\\_state\\_30m\\_mean\\_period\\_spectral} & Mean zero-crossing period estimated from wave spectrum & \\si{\\second} & \\param{5.034029007} \\\\\n",
"\\param{sea\\_state\\_30m\\_skewness} & Skewness of sea surface elevation & 1 & \\param{0.010083} \\\\\n",
"\\param{sea\\_state\\_30m\\_kurtosis} & Excess kurtosis of sea surface elevation & 1 & \\param{-0.076898} \\\\\n",
"\\param{sea\\_state\\_30m\\_valid\\_data\\_ratio} & Ratio of valid measurements to all measurements & 1 & \\param{1.0} \\\\\n",
"\\param{sea\\_state\\_30m\\_peak\\_wave\\_period}${}^\\dagger$ & Dominant wave period & \\si{\\second} & \\param{6.841089249} \\\\\n",
"\\param{sea\\_state\\_30m\\_peak\\_wavelength}${}^\\dagger$ & Dominant wavelength & \\si{\\metre} & \\param{73.07008} \\\\\n",
"\\param{sea\\_state\\_30m\\_steepness}${}^\\dagger$ & Dominant wave steepness & 1 & \\param{0.054674} \\\\\n",
"\\param{sea\\_state\\_30m\\_bandwidth\\_peakedness}${}^\\dagger$ & Spectral bandwidth estimated through spectral peakedness (quality factor) & 1 & \\param{0.312186} \\\\\n",
"\\param{sea\\_state\\_30m\\_bandwidth\\_narrowness}${}^\\dagger$ & Spectral bandwidth estimated through spectral narrowness & 1 & \\param{0.43569} \\\\\n",
"\\param{sea\\_state\\_30m\\_benjamin\\_feir\\_index\\_peakedness}${}^\\dagger$ & Benjamin-Feir index estimated through steepness and peakedness & 1 & \\param{0.164307} \\\\\n",
"\\param{sea\\_state\\_30m\\_benjamin\\_feir\\_index\\_narrowness}${}^\\dagger$ & Benjamin-Feir index estimated through steepness and narrowness & 1 & \\param{0.117731} \\\\\n",
"\\param{sea\\_state\\_30m\\_crest\\_trough\\_correlation} & Crest-trough correlation parameter (r) estimated from spectral density & 1 & \\param{0.608416} \\\\\n",
"\\param{sea\\_state\\_30m\\_energy\\_in\\_frequency\\_interval}${}^\\dagger$ & Total energy density contained in frequency band & \\si{\\joule\\per\\metre\\squared} & \\param{[1.935885, 106.74948, 1620.2413, 301.649, 1926.3574]} \\\\\n",
"\\param{sea\\_state\\_30m\\_rel\\_energy\\_in\\_frequency\\_interval}${}^\\dagger$ & Relative energy contained in frequency band & 1 & \\param{[0.000953, 0.052571, 0.797922, 0.148553, 0.948675]} \\\\\n",
"\n",
"\n",
"\\param{direction\\_sampling\\_time} & Time at which directional quantities are sampled & --- & \\param{2000-08-10T\\allowbreak12:11:52.000000000} \\\\\n",
"\\param{direction\\_dominant\\_spread\\_in\\_frequency\\_interval}${}^\\dagger$ & Dominant directional spread in frequency band & $^\\circ$ & \\param{[57.965824, 38.118546, 31.54562, 39.30281, 33.07898]} \\\\\n",
"\\param{direction\\_dominant\\_direction\\_in\\_frequency\\_interval}${}^\\dagger$ & Dominant wave direction in frequency band & $^\\circ$ & \\param{[83.074, 136.02432, 74.00862, 77.26602, 74.89502]} \\\\\n",
"\\param{direction\\_peak\\_wave\\_direction} & Peak wave direction relative to normal-north & $^\\circ$ & \\param{70.46875} \\\\\n",
"\\param{direction\\_directionality\\_index}${}^\\dagger$ & Directionality index R (squared ratio of directional spread and spectral bandwidth) & 1 & \\param{0.924404} \\\\\n",
"\n",
"\n"
]
}
],
"source": [
"def escape(v):\n",
" return str(v).replace('_', '\\\\_')\n",
"\n",
"for var_set in variables:\n",
" for var in var_set:\n",
" var_esc = escape(var)\n",
"\n",
" dagger = ''\n",
" if var in dagger_vars:\n",
" dagger = '${}^\\\\dagger$'\n",
"\n",
" unit = ds[var].units if hasattr(ds[var], 'units') else '---'\n",
" if unit in pprint_units:\n",
" unit = pprint_units[unit]\n",
" unit = escape(unit)\n",
"\n",
" value = ds[var].values\n",
"\n",
" if value.size == 1 and np.issubdtype(value.dtype, np.floating):\n",
" value = np.float32(round(float(value), 6))\n",
"\n",
" elif np.issubdtype(value.dtype, np.timedelta64):\n",
" value = value / np.timedelta64(1, 's')\n",
" unit = '\\\\si{\\\\second}'\n",
"\n",
" elif np.issubdtype(value.dtype, np.datetime64):\n",
" value = str(value).replace('T', 'T\\\\allowbreak')\n",
" \n",
" elif value.size != 1:\n",
" value = [round(v, 6) for v in value]\n",
"\n",
" value = escape(value)\n",
"\n",
" if var == 'meta_source_file_uuid':\n",
" value = value.replace('-', '\\\\allowbreak-')\n",
"\n",
" print(f'\\\\param{{{var_esc}}}{dagger} & {ds[var].long_name} & {unit} & \\\\param{{{value}}} \\\\\\\\')\n",
" \n",
" print('\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment