-
-
Save dionhaefner/51ef93980a87d6b6bb557599b79582da to your computer and use it in GitHub Desktop.
Jupyter notebooks generating plots and statistics for the paper "FOWD: A Free Ocean Wave Dataset for Data Mining and Machine Learning" (Häfner et al.)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Compute FOWD rogue wave and QC statistics" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline\n", | |
"import math\n", | |
"import os\n", | |
"import json\n", | |
"import glob\n", | |
"from collections import defaultdict\n", | |
"\n", | |
"import matplotlib.pyplot as plt\n", | |
"import numpy as np\n", | |
"import xarray as xr\n", | |
"import seaborn as sns\n", | |
"import tqdm\n", | |
"import pandas as pd\n", | |
"\n", | |
"sns.set_palette('muted')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Find total number of waves and timespan" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fowd_files = sorted(glob.glob(os.path.expanduser('~/fowd-out-v5/fowd_cdip_*p?.nc')))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"161" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(fowd_files)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/groups/ocean/dhaefner/miniconda3/envs/ai/lib/python3.7/site-packages/ipykernel_launcher.py:6: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", | |
"Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", | |
" \n" | |
] | |
}, | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "61bf469e06c8464b8539d6ac81355fed", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=161.0), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"total_waves = 0\n", | |
"total_timespan = 0\n", | |
"depths = {}\n", | |
"max_waveheights = {}\n", | |
"\n", | |
"for f in tqdm.tqdm_notebook(fowd_files):\n", | |
" with xr.open_dataset(f, decode_times=False, drop_variables=['wave_raw_elevation']) as ds:\n", | |
" total_waves += ds.dims['wave_id_local']\n", | |
" total_timespan += ds.wave_zero_crossing_period.sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"4214292475" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"total_waves" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"759.1645615905112" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"total_timespan.values / 86400 / 365" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Count number of waves, number of rogues, double, triple, more than triple" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"infiles = glob.glob('/groups/ocean/dhaefner/fowd-out-v5/fowd_cdip_*_filtered.nc')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/groups/ocean/dhaefner/miniconda3/envs/ai/lib/python3.7/site-packages/ipykernel_launcher.py:11: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", | |
"Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", | |
" # This is added back by InteractiveShellApp.init_path()\n" | |
] | |
}, | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "82fc78908258464988121107d3191a07", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=152.0), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"num_waves = defaultdict(int)\n", | |
"num_ai20 = defaultdict(int)\n", | |
"num_ai22 = defaultdict(int)\n", | |
"num_ai25 = defaultdict(int)\n", | |
"num_multirogues = defaultdict(lambda: defaultdict(int))\n", | |
"\n", | |
"highest_wave = (0, 0)\n", | |
"\n", | |
"time_window = np.timedelta64(30, 's')\n", | |
"\n", | |
"for f in tqdm.tqdm_notebook(infiles):\n", | |
" with xr.open_dataset(f, drop_variables='wave_raw_elevation') as ds:\n", | |
" ds = ds.isel(meta_station_name=0)\n", | |
" time = ds['wave_start_time'].values\n", | |
" wh = ds['wave_height'].values\n", | |
" rel_wh = wh / ds['sea_state_30m_significant_wave_height_spectral'].values\n", | |
"\n", | |
" num_waves[f] = len(time)\n", | |
" num_ai20[f] = np.count_nonzero(rel_wh > 2.0)\n", | |
" num_ai22[f] = np.count_nonzero(rel_wh > 2.2)\n", | |
" num_ai25[f] = np.count_nonzero(rel_wh > 2.5)\n", | |
" \n", | |
" i = np.argmax(wh)\n", | |
" if wh[i] > highest_wave[0]:\n", | |
" highest_wave = (wh[i], rel_wh[i])\n", | |
" \n", | |
" is_rogue_wave = rel_wh > 2.0\n", | |
" for i in np.where(is_rogue_wave)[0]:\n", | |
" iend = np.searchsorted(time, time[i] + time_window)\n", | |
" rogues_in_window = is_rogue_wave[i:iend].sum()\n", | |
" num_multirogues[f][rogues_in_window] += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1383570225" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sum(num_waves.values())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1383488167" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sum(num_waves.values()) - sum(num_ai20.values())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"82058" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sum(num_ai20.values())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"11849" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sum(num_ai22.values())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"564" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sum(num_ai25.values())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(defaultdict(int, {2: 2366, 3: 89}), 2455)" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"total_multirogues = defaultdict(int)\n", | |
"\n", | |
"for nr in num_multirogues.values():\n", | |
" for k, v in nr.items():\n", | |
" if k < 2:\n", | |
" continue\n", | |
" total_multirogues[k] += v\n", | |
" \n", | |
"total_multirogues, sum(total_multirogues.values())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Count number of QC flags fired during processing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/groups/ocean/dhaefner/miniconda3/envs/ai/lib/python3.7/site-packages/ipykernel_launcher.py:6: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", | |
"Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", | |
" \n" | |
] | |
}, | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "51d275967c564a5c8a7e556d6474a5da", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=161.0), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"qcfiles = glob.glob('/groups/ocean/dhaefner/fowd-out-v5/fowd_cdip_*.qc.json')\n", | |
"\n", | |
"flags_fired = {}\n", | |
"waves_failed = 0\n", | |
"\n", | |
"for qcfile in tqdm.tqdm_notebook(qcfiles):\n", | |
" with open(qcfile) as f:\n", | |
" for line in f:\n", | |
" data = json.loads(line)\n", | |
" if data['filename'] not in flags_fired:\n", | |
" flags_fired[data['filename']] = {k: 0 for k in 'abcdefg'}\n", | |
"\n", | |
" for flag in data['flags_fired']:\n", | |
" flags_fired[data['filename']][flag] += 1\n", | |
" \n", | |
" if data['flags_fired']:\n", | |
" waves_failed += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"77371" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"waves_failed" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a 31547\n", | |
"b 18465\n", | |
"c 39470\n", | |
"d 47544\n", | |
"e 0\n", | |
"f 11915\n", | |
"g 4089\n" | |
] | |
} | |
], | |
"source": [ | |
"for flag in 'abcdefg':\n", | |
" n_fired = 0\n", | |
" for _, val in flags_fired.items():\n", | |
" flagval = val.get(flag, 0)\n", | |
" n_fired += flagval\n", | |
" \n", | |
" print(flag, n_fired)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('432p1', 0),\n", | |
" ('171p1', 0),\n", | |
" ('212p1', 0),\n", | |
" ('198p1', 0),\n", | |
" ('211p1', 0),\n", | |
" ('124p1', 0),\n", | |
" ('152p1', 0),\n", | |
" ('235p1', 0),\n", | |
" ('234p1', 0),\n", | |
" ('245p1', 0),\n", | |
" ('232p1', 0),\n", | |
" ('231p1', 0),\n", | |
" ('182p1', 0),\n", | |
" ('133p1', 0),\n", | |
" ('208p1', 0),\n", | |
" ('227p1', 0),\n", | |
" ('225p1', 0),\n", | |
" ('164p1', 0),\n", | |
" ('229p1', 0),\n", | |
" ('188p1', 0),\n", | |
" ('113p1', 0),\n", | |
" ('228p1', 0),\n", | |
" ('114p1', 0),\n", | |
" ('115p1', 0),\n", | |
" ('116p1', 0),\n", | |
" ('149p1', 0),\n", | |
" ('103p1', 1),\n", | |
" ('122p1', 1),\n", | |
" ('246p1', 1),\n", | |
" ('123p1', 1),\n", | |
" ('226p1', 1),\n", | |
" ('207p1', 1),\n", | |
" ('216p1', 2),\n", | |
" ('105p1', 2),\n", | |
" ('104p1', 2),\n", | |
" ('220p1', 2),\n", | |
" ('136p1', 2),\n", | |
" ('135p1', 3),\n", | |
" ('224p1', 3),\n", | |
" ('203p1', 3),\n", | |
" ('242p1', 4),\n", | |
" ('233p1', 4),\n", | |
" ('121p1', 4),\n", | |
" ('202p1', 4),\n", | |
" ('117p1', 4),\n", | |
" ('261p1', 5),\n", | |
" ('238p1', 5),\n", | |
" ('195p1', 5),\n", | |
" ('169p1', 5),\n", | |
" ('128p1', 7),\n", | |
" ('176p1', 7),\n", | |
" ('193p1', 7),\n", | |
" ('138p1', 7),\n", | |
" ('213p1', 8),\n", | |
" ('433p1', 9),\n", | |
" ('186p1', 9),\n", | |
" ('125p1', 10),\n", | |
" ('244p1', 10),\n", | |
" ('209p1', 11),\n", | |
" ('190p1', 12),\n", | |
" ('101p1', 13),\n", | |
" ('210p1', 13),\n", | |
" ('130p1', 13),\n", | |
" ('215p1', 14),\n", | |
" ('174p1', 14),\n", | |
" ('197p1', 14),\n", | |
" ('241p1', 14),\n", | |
" ('236p1', 14),\n", | |
" ('087p1', 14),\n", | |
" ('147p1', 17),\n", | |
" ('118p1', 17),\n", | |
" ('180p1', 18),\n", | |
" ('217p1', 19),\n", | |
" ('129p1', 21),\n", | |
" ('154p1', 21),\n", | |
" ('237p1', 25),\n", | |
" ('150p1', 25),\n", | |
" ('200p1', 25),\n", | |
" ('189p1', 26),\n", | |
" ('247p1', 27),\n", | |
" ('219p1', 27),\n", | |
" ('222p1', 28),\n", | |
" ('170p1', 29),\n", | |
" ('201p1', 29),\n", | |
" ('194p1', 33),\n", | |
" ('161p1', 34),\n", | |
" ('165p1', 36),\n", | |
" ('089p1', 39),\n", | |
" ('179p1', 39),\n", | |
" ('191p1', 39),\n", | |
" ('155p1', 39),\n", | |
" ('175p1', 41),\n", | |
" ('132p1', 50),\n", | |
" ('243p1', 51),\n", | |
" ('192p1', 56),\n", | |
" ('156p1', 57),\n", | |
" ('166p1', 61),\n", | |
" ('131p1', 63),\n", | |
" ('090p1', 68),\n", | |
" ('187p1', 69),\n", | |
" ('160p1', 71),\n", | |
" ('185p1', 73),\n", | |
" ('088p1', 76),\n", | |
" ('107p1', 80),\n", | |
" ('206p1', 87),\n", | |
" ('098p1', 97),\n", | |
" ('081p1', 99),\n", | |
" ('102p1', 101),\n", | |
" ('168p1', 102),\n", | |
" ('146p1', 131),\n", | |
" ('431p1', 132),\n", | |
" ('240p1', 133),\n", | |
" ('134p1', 150),\n", | |
" ('181p1', 152),\n", | |
" ('221p1', 160),\n", | |
" ('139p1', 229),\n", | |
" ('141p1', 230),\n", | |
" ('230p1', 258),\n", | |
" ('028p1', 260),\n", | |
" ('144p1', 266),\n", | |
" ('214p1', 277),\n", | |
" ('172p1', 288),\n", | |
" ('196p1', 288),\n", | |
" ('100p1', 294),\n", | |
" ('095p1', 354),\n", | |
" ('163p1', 356),\n", | |
" ('162p1', 399),\n", | |
" ('142p1', 423),\n", | |
" ('111p1', 440),\n", | |
" ('091p1', 458),\n", | |
" ('126p1', 473),\n", | |
" ('093p1', 489),\n", | |
" ('157p1', 510),\n", | |
" ('204p1', 526),\n", | |
" ('430p1', 547),\n", | |
" ('158p1', 676),\n", | |
" ('109p1', 817),\n", | |
" ('076p1', 855),\n", | |
" ('092p1', 955),\n", | |
" ('143p1', 968),\n", | |
" ('071p1', 1193),\n", | |
" ('067p1', 1437),\n", | |
" ('029p1', 1597),\n", | |
" ('167p1', 2200),\n", | |
" ('045p1', 2407),\n", | |
" ('036p1', 2680),\n", | |
" ('096p1', 5627),\n", | |
" ('043p1', 7886),\n", | |
" ('094p1', 9511),\n", | |
" ('106p1', 11552),\n", | |
" ('205p1', 36782),\n", | |
" ('177p1', 55464)]" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"station_flags = {}\n", | |
"\n", | |
"for deployment, f in flags_fired.items():\n", | |
" station_id = deployment[:5]\n", | |
" if station_id not in station_flags:\n", | |
" station_flags[station_id] = 0\n", | |
" \n", | |
" station_flags[station_id] += sum(f.values())\n", | |
" \n", | |
"sorted(station_flags.items(), key=lambda x: x[1])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Check number of all QC flags fired during processing\n", | |
"(not just big waves, not used in the paper)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'a': 3278396,\n", | |
" 'b': 1231833,\n", | |
" 'c': 6073783,\n", | |
" 'd': 4292495,\n", | |
" 'e': 0,\n", | |
" 'f': 64472178,\n", | |
" 'g': 10761446}" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pickle\n", | |
"\n", | |
"flags_fired = {k: 0 for k in 'abcdefg'}\n", | |
"\n", | |
"for pklfile in glob.glob('/groups/ocean/dhaefner/fowd-out-v5/*.state.pkl'):\n", | |
" with open(pklfile, 'rb') as f:\n", | |
" flags_station = pickle.load(f)['num_flags_fired']\n", | |
" \n", | |
" for flag, val in flags_station.items():\n", | |
" flags_fired[flag] += val\n", | |
" \n", | |
"flags_fired" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment