Skip to content

Instantly share code, notes, and snippets.

@dionhaefner
Last active October 9, 2023 13:03
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save dionhaefner/51ef93980a87d6b6bb557599b79582da to your computer and use it in GitHub Desktop.
Save dionhaefner/51ef93980a87d6b6bb557599b79582da to your computer and use it in GitHub Desktop.
Jupyter notebooks generating plots and statistics for the paper "FOWD: A Free Ocean Wave Dataset for Data Mining and Machine Learning" (Häfner et al.)
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compute FOWD rogue wave and QC statistics"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import math\n",
"import os\n",
"import json\n",
"import glob\n",
"from collections import defaultdict\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import xarray as xr\n",
"import seaborn as sns\n",
"import tqdm\n",
"import pandas as pd\n",
"\n",
"sns.set_palette('muted')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Find total number of waves and timespan"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"fowd_files = sorted(glob.glob(os.path.expanduser('~/fowd-out-v5/fowd_cdip_*p?.nc')))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"161"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(fowd_files)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/groups/ocean/dhaefner/miniconda3/envs/ai/lib/python3.7/site-packages/ipykernel_launcher.py:6: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n",
"Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n",
" \n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "61bf469e06c8464b8539d6ac81355fed",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=161.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"total_waves = 0\n",
"total_timespan = 0\n",
"depths = {}\n",
"max_waveheights = {}\n",
"\n",
"for f in tqdm.tqdm_notebook(fowd_files):\n",
" with xr.open_dataset(f, decode_times=False, drop_variables=['wave_raw_elevation']) as ds:\n",
" total_waves += ds.dims['wave_id_local']\n",
" total_timespan += ds.wave_zero_crossing_period.sum()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4214292475"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_waves"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"759.1645615905112"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_timespan.values / 86400 / 365"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Count number of waves, number of rogues, double, triple, more than triple"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"infiles = glob.glob('/groups/ocean/dhaefner/fowd-out-v5/fowd_cdip_*_filtered.nc')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/groups/ocean/dhaefner/miniconda3/envs/ai/lib/python3.7/site-packages/ipykernel_launcher.py:11: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n",
"Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n",
" # This is added back by InteractiveShellApp.init_path()\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "82fc78908258464988121107d3191a07",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=152.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"num_waves = defaultdict(int)\n",
"num_ai20 = defaultdict(int)\n",
"num_ai22 = defaultdict(int)\n",
"num_ai25 = defaultdict(int)\n",
"num_multirogues = defaultdict(lambda: defaultdict(int))\n",
"\n",
"highest_wave = (0, 0)\n",
"\n",
"time_window = np.timedelta64(30, 's')\n",
"\n",
"for f in tqdm.tqdm_notebook(infiles):\n",
" with xr.open_dataset(f, drop_variables='wave_raw_elevation') as ds:\n",
" ds = ds.isel(meta_station_name=0)\n",
" time = ds['wave_start_time'].values\n",
" wh = ds['wave_height'].values\n",
" rel_wh = wh / ds['sea_state_30m_significant_wave_height_spectral'].values\n",
"\n",
" num_waves[f] = len(time)\n",
" num_ai20[f] = np.count_nonzero(rel_wh > 2.0)\n",
" num_ai22[f] = np.count_nonzero(rel_wh > 2.2)\n",
" num_ai25[f] = np.count_nonzero(rel_wh > 2.5)\n",
" \n",
" i = np.argmax(wh)\n",
" if wh[i] > highest_wave[0]:\n",
" highest_wave = (wh[i], rel_wh[i])\n",
" \n",
" is_rogue_wave = rel_wh > 2.0\n",
" for i in np.where(is_rogue_wave)[0]:\n",
" iend = np.searchsorted(time, time[i] + time_window)\n",
" rogues_in_window = is_rogue_wave[i:iend].sum()\n",
" num_multirogues[f][rogues_in_window] += 1"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1383570225"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(num_waves.values())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1383488167"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(num_waves.values()) - sum(num_ai20.values())"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"82058"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(num_ai20.values())"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"11849"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(num_ai22.values())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"564"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(num_ai25.values())"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(defaultdict(int, {2: 2366, 3: 89}), 2455)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_multirogues = defaultdict(int)\n",
"\n",
"for nr in num_multirogues.values():\n",
" for k, v in nr.items():\n",
" if k < 2:\n",
" continue\n",
" total_multirogues[k] += v\n",
" \n",
"total_multirogues, sum(total_multirogues.values())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Count number of QC flags fired during processing"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/groups/ocean/dhaefner/miniconda3/envs/ai/lib/python3.7/site-packages/ipykernel_launcher.py:6: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n",
"Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n",
" \n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "51d275967c564a5c8a7e556d6474a5da",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=161.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"qcfiles = glob.glob('/groups/ocean/dhaefner/fowd-out-v5/fowd_cdip_*.qc.json')\n",
"\n",
"flags_fired = {}\n",
"waves_failed = 0\n",
"\n",
"for qcfile in tqdm.tqdm_notebook(qcfiles):\n",
" with open(qcfile) as f:\n",
" for line in f:\n",
" data = json.loads(line)\n",
" if data['filename'] not in flags_fired:\n",
" flags_fired[data['filename']] = {k: 0 for k in 'abcdefg'}\n",
"\n",
" for flag in data['flags_fired']:\n",
" flags_fired[data['filename']][flag] += 1\n",
" \n",
" if data['flags_fired']:\n",
" waves_failed += 1"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"77371"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"waves_failed"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a 31547\n",
"b 18465\n",
"c 39470\n",
"d 47544\n",
"e 0\n",
"f 11915\n",
"g 4089\n"
]
}
],
"source": [
"for flag in 'abcdefg':\n",
" n_fired = 0\n",
" for _, val in flags_fired.items():\n",
" flagval = val.get(flag, 0)\n",
" n_fired += flagval\n",
" \n",
" print(flag, n_fired)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('432p1', 0),\n",
" ('171p1', 0),\n",
" ('212p1', 0),\n",
" ('198p1', 0),\n",
" ('211p1', 0),\n",
" ('124p1', 0),\n",
" ('152p1', 0),\n",
" ('235p1', 0),\n",
" ('234p1', 0),\n",
" ('245p1', 0),\n",
" ('232p1', 0),\n",
" ('231p1', 0),\n",
" ('182p1', 0),\n",
" ('133p1', 0),\n",
" ('208p1', 0),\n",
" ('227p1', 0),\n",
" ('225p1', 0),\n",
" ('164p1', 0),\n",
" ('229p1', 0),\n",
" ('188p1', 0),\n",
" ('113p1', 0),\n",
" ('228p1', 0),\n",
" ('114p1', 0),\n",
" ('115p1', 0),\n",
" ('116p1', 0),\n",
" ('149p1', 0),\n",
" ('103p1', 1),\n",
" ('122p1', 1),\n",
" ('246p1', 1),\n",
" ('123p1', 1),\n",
" ('226p1', 1),\n",
" ('207p1', 1),\n",
" ('216p1', 2),\n",
" ('105p1', 2),\n",
" ('104p1', 2),\n",
" ('220p1', 2),\n",
" ('136p1', 2),\n",
" ('135p1', 3),\n",
" ('224p1', 3),\n",
" ('203p1', 3),\n",
" ('242p1', 4),\n",
" ('233p1', 4),\n",
" ('121p1', 4),\n",
" ('202p1', 4),\n",
" ('117p1', 4),\n",
" ('261p1', 5),\n",
" ('238p1', 5),\n",
" ('195p1', 5),\n",
" ('169p1', 5),\n",
" ('128p1', 7),\n",
" ('176p1', 7),\n",
" ('193p1', 7),\n",
" ('138p1', 7),\n",
" ('213p1', 8),\n",
" ('433p1', 9),\n",
" ('186p1', 9),\n",
" ('125p1', 10),\n",
" ('244p1', 10),\n",
" ('209p1', 11),\n",
" ('190p1', 12),\n",
" ('101p1', 13),\n",
" ('210p1', 13),\n",
" ('130p1', 13),\n",
" ('215p1', 14),\n",
" ('174p1', 14),\n",
" ('197p1', 14),\n",
" ('241p1', 14),\n",
" ('236p1', 14),\n",
" ('087p1', 14),\n",
" ('147p1', 17),\n",
" ('118p1', 17),\n",
" ('180p1', 18),\n",
" ('217p1', 19),\n",
" ('129p1', 21),\n",
" ('154p1', 21),\n",
" ('237p1', 25),\n",
" ('150p1', 25),\n",
" ('200p1', 25),\n",
" ('189p1', 26),\n",
" ('247p1', 27),\n",
" ('219p1', 27),\n",
" ('222p1', 28),\n",
" ('170p1', 29),\n",
" ('201p1', 29),\n",
" ('194p1', 33),\n",
" ('161p1', 34),\n",
" ('165p1', 36),\n",
" ('089p1', 39),\n",
" ('179p1', 39),\n",
" ('191p1', 39),\n",
" ('155p1', 39),\n",
" ('175p1', 41),\n",
" ('132p1', 50),\n",
" ('243p1', 51),\n",
" ('192p1', 56),\n",
" ('156p1', 57),\n",
" ('166p1', 61),\n",
" ('131p1', 63),\n",
" ('090p1', 68),\n",
" ('187p1', 69),\n",
" ('160p1', 71),\n",
" ('185p1', 73),\n",
" ('088p1', 76),\n",
" ('107p1', 80),\n",
" ('206p1', 87),\n",
" ('098p1', 97),\n",
" ('081p1', 99),\n",
" ('102p1', 101),\n",
" ('168p1', 102),\n",
" ('146p1', 131),\n",
" ('431p1', 132),\n",
" ('240p1', 133),\n",
" ('134p1', 150),\n",
" ('181p1', 152),\n",
" ('221p1', 160),\n",
" ('139p1', 229),\n",
" ('141p1', 230),\n",
" ('230p1', 258),\n",
" ('028p1', 260),\n",
" ('144p1', 266),\n",
" ('214p1', 277),\n",
" ('172p1', 288),\n",
" ('196p1', 288),\n",
" ('100p1', 294),\n",
" ('095p1', 354),\n",
" ('163p1', 356),\n",
" ('162p1', 399),\n",
" ('142p1', 423),\n",
" ('111p1', 440),\n",
" ('091p1', 458),\n",
" ('126p1', 473),\n",
" ('093p1', 489),\n",
" ('157p1', 510),\n",
" ('204p1', 526),\n",
" ('430p1', 547),\n",
" ('158p1', 676),\n",
" ('109p1', 817),\n",
" ('076p1', 855),\n",
" ('092p1', 955),\n",
" ('143p1', 968),\n",
" ('071p1', 1193),\n",
" ('067p1', 1437),\n",
" ('029p1', 1597),\n",
" ('167p1', 2200),\n",
" ('045p1', 2407),\n",
" ('036p1', 2680),\n",
" ('096p1', 5627),\n",
" ('043p1', 7886),\n",
" ('094p1', 9511),\n",
" ('106p1', 11552),\n",
" ('205p1', 36782),\n",
" ('177p1', 55464)]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"station_flags = {}\n",
"\n",
"for deployment, f in flags_fired.items():\n",
" station_id = deployment[:5]\n",
" if station_id not in station_flags:\n",
" station_flags[station_id] = 0\n",
" \n",
" station_flags[station_id] += sum(f.values())\n",
" \n",
"sorted(station_flags.items(), key=lambda x: x[1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Check number of all QC flags fired during processing\n",
"(not just big waves, not used in the paper)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'a': 3278396,\n",
" 'b': 1231833,\n",
" 'c': 6073783,\n",
" 'd': 4292495,\n",
" 'e': 0,\n",
" 'f': 64472178,\n",
" 'g': 10761446}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pickle\n",
"\n",
"flags_fired = {k: 0 for k in 'abcdefg'}\n",
"\n",
"for pklfile in glob.glob('/groups/ocean/dhaefner/fowd-out-v5/*.state.pkl'):\n",
" with open(pklfile, 'rb') as f:\n",
" flags_station = pickle.load(f)['num_flags_fired']\n",
" \n",
" for flag, val in flags_station.items():\n",
" flags_fired[flag] += val\n",
" \n",
"flags_fired"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment