Created
July 10, 2018 17:03
-
-
Save jreadey/b13d4109595fdc3cd93eb557e77124c8 to your computer and use it in GitHub Desktop.
NCEP3 Loader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"! hsinfo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# read a file containing the contents of the source bucket\n", | |
"# (generated by s3cmd ls s3://hdfgroup/data/ncep3_daily/)\\\n", | |
"s3_files = []\n", | |
"with open(\"/home/jovyan/NCEP3/s3_contents.txt\") as fp:\n", | |
" line = fp.readline()\n", | |
" while line:\n", | |
" fields = line.split()\n", | |
" s3_files.append(fields[3])\n", | |
" line = fp.readline()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"num_files = len(s3_files)\n", | |
"print(f\"{num_files} S3 files will be aggregatted\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"s3_files.sort()\n", | |
"len(s3_files)\n", | |
"s3_files[0]\n", | |
"import os\n", | |
"os.system(f\"ls\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Create initial domain by downloading GSSTF_NCEP.3.1987.07.01.he5 and running the following:\n", | |
"`$ hsload --nodata GSSTF_NCEP.3.1987.07.01.he5 /shared/NASA/NCEP3/ncep3.he5`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data_group = f[\"/HDFEOS/GRIDS/NCEP/Data Fields\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"list(data_group)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for link in data_group:\n", | |
" del data_group[link]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dsets = ['/HDFEOS/GRIDS/NCEP/Data Fields/Psea_level', \n", | |
" '/HDFEOS/GRIDS/NCEP/Data Fields/Qsat', \n", | |
" '/HDFEOS/GRIDS/NCEP/Data Fields/SST', \n", | |
" '/HDFEOS/GRIDS/NCEP/Data Fields/Tair_2m']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"num_files = 7850" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for ds_path in dsets:\n", | |
" ds = f.create_dataset(ds_path,\n", | |
" dtype='f4', \n", | |
" shape=(num_files, 720, 1440), \n", | |
" chunks=(1, 720, 1440))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"! hsls -r /shared/NASA/NCEP3/ncep3.he5" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import h5py\n", | |
"file_counter = 0\n", | |
"for s3path in s3_files:\n", | |
" \n", | |
" print(f\"Processing {s3path}\")\n", | |
" # download file from s3 so we can open locally\n", | |
" os.system(f\"s3cmd get {s3path}\")\n", | |
" index = s3path.rfind('/') + 1\n", | |
" fname = s3path[index:] # the downloaded filename will be text after last slash\n", | |
" with h5py.File(fname, 'r') as he5f: \n", | |
" for ds_path in dsets:\n", | |
" src_ds = he5f[ds_path]\n", | |
" aggr_dset = f[ds_path]\n", | |
" print(f\"setting {ds_path}[{file_counter},:,:]\")\n", | |
" aggr_dset[file_counter,:,:] = src_ds[...]\n", | |
" file_counter += 1\n", | |
" os.system(f\"rm {fname}\") " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment