Skip to content

Instantly share code, notes, and snippets.

@naomi-henderson
Last active November 21, 2020 23:18
Show Gist options
  • Save naomi-henderson/a09fb2aad408faa295f7dade1dce30f1 to your computer and use it in GitHub Desktop.
Save naomi-henderson/a09fb2aad408faa295f7dade1dce30f1 to your computer and use it in GitHub Desktop.
Read Cloud CMIP6 data and store locally
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read CMIP6 datasets and store locally"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import xarray as xr\n",
"import os\n",
"import gcsfs\n",
"from glob import glob"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# specify a local path to put the netcdf files\n",
"local_path = 'files_nc'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def search_df(df, verbose= False, **search):\n",
" '''search by keywords - if list, then match exactly, otherwise match as substring'''\n",
" keys = ['activity_id','institution_id','source_id','experiment_id','member_id', 'table_id', 'variable_id', 'grid_label']\n",
" d = df\n",
" for skey in search.keys():\n",
" if isinstance(search[skey], str): # match a string as a substring\n",
" d = d[d[skey].str.contains(search[skey])]\n",
" else:\n",
" dk = []\n",
" for key in search[skey]: # match a list of strings exactly\n",
" dk += [d[d[skey]==key]]\n",
" d = pd.concat(dk)\n",
" keys.remove(skey)\n",
" if verbose:\n",
" for key in keys:\n",
" print(key,' = ',list(d[key].unique())) \n",
" return d"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_zid(gsurl):\n",
" ''' given a GCS zarr location, return the dataset_id'''\n",
" assert gsurl[:10] == 'gs://cmip6'\n",
" return gsurl[11:-1].split('/')\n",
"\n",
"def get_zdict(gsurl):\n",
" ''' given a GCS zarr location, return a dictionary of keywords'''\n",
" zid = get_zid(gsurl)\n",
" keys = ['activity_id','institution_id','source_id','experiment_id','member_id','table_id','variable_id','grid_label']\n",
" values = list(zid)\n",
" return dict(zip(keys,values)) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_cloud = pd.read_csv('https://cmip6.storage.googleapis.com/cmip6-zarr-consolidated-stores.csv', dtype='unicode')\n",
"\n",
"fs = gcsfs.GCSFileSystem(token='anon', access='read_only')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Here we search the CMIP6 data for the datasets you need - using the same keywords as at the ESGF sites\n",
"# https://esgf-node.llnl.gov/search/cmip6/\n",
"\n",
"search = {}\n",
"search['table_id'] = 'Amon'\n",
"search['experiment_id'] = ['historical','ssp370']\n",
"search['variable_id'] = ['tas']\n",
"search['institution_id'] = ['NOAA-GFDL']\n",
" \n",
"df_available = search_df(df_cloud, **search)\n",
"\n",
"print('number of matching datasets',len(df_available))\n",
"df_available.zstore.values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gsurls = df_available.zstore.values\n",
"\n",
"for gsurl in gsurls:\n",
" print(gsurl)\n",
" zdict = get_zdict(gsurl)\n",
" ncdir = local_path + gsurl[10:]\n",
" \n",
" model = zdict['source_id']\n",
" variable = zdict['variable_id']\n",
" \n",
" ncfiles = glob(f'{ncdir}{variable}*.nc')\n",
" if len(ncfiles) > 0:\n",
" print(ncfiles, 'already exists')\n",
" continue\n",
" \n",
" ds = xr.open_zarr(fs.get_mapper(gsurl),consolidated=True)\n",
"\n",
" ncfile = f'{ncdir}{variable}.nc'\n",
" os.system(f'mkdir -p {ncdir}')\n",
" ds.to_netcdf(ncfile,mode='w',unlimited_dims='time') \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! tree -L 9 files_nc"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "pangeo-Oct2019",
"language": "python",
"name": "pangeo-oct2019"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment