Created
July 14, 2018 15:44
-
-
Save rsignell-usgs/e4c44eed017420d6295ca8112ea57a2a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Check for gaps in National Water Model data on S3" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Using the British Met Office [Pysssix S3 FUSE driver](https://github.com/informatics-lab/s3-fuse-flex-volume) we can read any publically readable bucket using `/s3/{bucket}`." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"root = 'noaa-nwm-pds'" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We want to see if there are 24 files per day (`t00z, t01z,... t23z`) for this pattern:\n", | |
" `/s3/noaa-nwm-pds/nwm.20180615/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import s3fs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['noaa-nwm-pds/nwm.20180616',\n", | |
" 'noaa-nwm-pds/nwm.20180617',\n", | |
" 'noaa-nwm-pds/nwm.20180618',\n", | |
" 'noaa-nwm-pds/nwm.20180619',\n", | |
" 'noaa-nwm-pds/nwm.20180620',\n", | |
" 'noaa-nwm-pds/nwm.20180621',\n", | |
" 'noaa-nwm-pds/nwm.20180622',\n", | |
" 'noaa-nwm-pds/nwm.20180623',\n", | |
" 'noaa-nwm-pds/nwm.20180624',\n", | |
" 'noaa-nwm-pds/nwm.20180625',\n", | |
" 'noaa-nwm-pds/nwm.20180626',\n", | |
" 'noaa-nwm-pds/nwm.20180627',\n", | |
" 'noaa-nwm-pds/nwm.20180628',\n", | |
" 'noaa-nwm-pds/nwm.20180629',\n", | |
" 'noaa-nwm-pds/nwm.20180630',\n", | |
" 'noaa-nwm-pds/nwm.20180701',\n", | |
" 'noaa-nwm-pds/nwm.20180702',\n", | |
" 'noaa-nwm-pds/nwm.20180703',\n", | |
" 'noaa-nwm-pds/nwm.20180704',\n", | |
" 'noaa-nwm-pds/nwm.20180705',\n", | |
" 'noaa-nwm-pds/nwm.20180706',\n", | |
" 'noaa-nwm-pds/nwm.20180707',\n", | |
" 'noaa-nwm-pds/nwm.20180708',\n", | |
" 'noaa-nwm-pds/nwm.20180709',\n", | |
" 'noaa-nwm-pds/nwm.20180710',\n", | |
" 'noaa-nwm-pds/nwm.20180711',\n", | |
" 'noaa-nwm-pds/nwm.20180712',\n", | |
" 'noaa-nwm-pds/nwm.20180713',\n", | |
" 'noaa-nwm-pds/nwm.20180714']" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"fs = s3fs.S3FileSystem(anon=True)\n", | |
"fs.ls('noaa-nwm-pds')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dates = pd.date_range(start='2018-07-01', end='2018-07-07', freq='D')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def forcing_files(date):\n", | |
" return fs.ls('noaa-nwm-pds/nwm.{}/forcing_short_range'.format(date.strftime('%Y%m%d')))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def daily_files(files, forecast='f001'):\n", | |
" return [f for f in files if forecast in f]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[24, 24, 24, 24, 24, 24, 24]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dates = pd.date_range(start='2018-07-01', end='2018-07-07', freq='D')\n", | |
"d = [forcing_files(date) for date in dates]\n", | |
"f001 = [daily_files(dd,forecast='f001') for dd in d]\n", | |
"[len(file) for file in f001]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[23, 23, 23, 23, 23, 24, 24]" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dates = pd.date_range(start='2018-06-21', end='2018-06-27', freq='D')\n", | |
"d = [forcing_files(date) for date in dates]\n", | |
"f001 = [daily_files(dd,forecast='f001') for dd in d]\n", | |
"[len(file) for file in f001]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t01z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t02z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t03z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t04z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t05z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t06z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t07z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t08z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t09z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t10z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t11z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t12z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t13z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t14z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t15z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t16z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t17z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t18z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t19z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t20z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t21z.short_range.forcing.f001.conus.nc',\n", | |
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t22z.short_range.forcing.f001.conus.nc']" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sorted(f001[0])" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda root]", | |
"language": "python", | |
"name": "conda-root-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment