Skip to content

Instantly share code, notes, and snippets.

@rsignell-usgs
Created July 14, 2018 15:44
Show Gist options
  • Save rsignell-usgs/e4c44eed017420d6295ca8112ea57a2a to your computer and use it in GitHub Desktop.
Save rsignell-usgs/e4c44eed017420d6295ca8112ea57a2a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Check for gaps in National Water Model data on S3"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using the British Met Office [Pysssix S3 FUSE driver](https://github.com/informatics-lab/s3-fuse-flex-volume) we can read any publically readable bucket using `/s3/{bucket}`."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"root = 'noaa-nwm-pds'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We want to see if there are 24 files per day (`t00z, t01z,... t23z`) for this pattern:\n",
" `/s3/noaa-nwm-pds/nwm.20180615/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc`"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import s3fs"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['noaa-nwm-pds/nwm.20180616',\n",
" 'noaa-nwm-pds/nwm.20180617',\n",
" 'noaa-nwm-pds/nwm.20180618',\n",
" 'noaa-nwm-pds/nwm.20180619',\n",
" 'noaa-nwm-pds/nwm.20180620',\n",
" 'noaa-nwm-pds/nwm.20180621',\n",
" 'noaa-nwm-pds/nwm.20180622',\n",
" 'noaa-nwm-pds/nwm.20180623',\n",
" 'noaa-nwm-pds/nwm.20180624',\n",
" 'noaa-nwm-pds/nwm.20180625',\n",
" 'noaa-nwm-pds/nwm.20180626',\n",
" 'noaa-nwm-pds/nwm.20180627',\n",
" 'noaa-nwm-pds/nwm.20180628',\n",
" 'noaa-nwm-pds/nwm.20180629',\n",
" 'noaa-nwm-pds/nwm.20180630',\n",
" 'noaa-nwm-pds/nwm.20180701',\n",
" 'noaa-nwm-pds/nwm.20180702',\n",
" 'noaa-nwm-pds/nwm.20180703',\n",
" 'noaa-nwm-pds/nwm.20180704',\n",
" 'noaa-nwm-pds/nwm.20180705',\n",
" 'noaa-nwm-pds/nwm.20180706',\n",
" 'noaa-nwm-pds/nwm.20180707',\n",
" 'noaa-nwm-pds/nwm.20180708',\n",
" 'noaa-nwm-pds/nwm.20180709',\n",
" 'noaa-nwm-pds/nwm.20180710',\n",
" 'noaa-nwm-pds/nwm.20180711',\n",
" 'noaa-nwm-pds/nwm.20180712',\n",
" 'noaa-nwm-pds/nwm.20180713',\n",
" 'noaa-nwm-pds/nwm.20180714']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fs = s3fs.S3FileSystem(anon=True)\n",
"fs.ls('noaa-nwm-pds')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"dates = pd.date_range(start='2018-07-01', end='2018-07-07', freq='D')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def forcing_files(date):\n",
" return fs.ls('noaa-nwm-pds/nwm.{}/forcing_short_range'.format(date.strftime('%Y%m%d')))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def daily_files(files, forecast='f001'):\n",
" return [f for f in files if forecast in f]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[24, 24, 24, 24, 24, 24, 24]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dates = pd.date_range(start='2018-07-01', end='2018-07-07', freq='D')\n",
"d = [forcing_files(date) for date in dates]\n",
"f001 = [daily_files(dd,forecast='f001') for dd in d]\n",
"[len(file) for file in f001]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[23, 23, 23, 23, 23, 24, 24]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dates = pd.date_range(start='2018-06-21', end='2018-06-27', freq='D')\n",
"d = [forcing_files(date) for date in dates]\n",
"f001 = [daily_files(dd,forecast='f001') for dd in d]\n",
"[len(file) for file in f001]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t01z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t02z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t03z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t04z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t05z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t06z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t07z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t08z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t09z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t10z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t11z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t12z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t13z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t14z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t15z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t16z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t17z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t18z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t19z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t20z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t21z.short_range.forcing.f001.conus.nc',\n",
" 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t22z.short_range.forcing.f001.conus.nc']"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted(f001[0])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment