Skip to content

Instantly share code, notes, and snippets.

@jbusecke
Created February 13, 2024 16:47
Show Gist options
  • Save jbusecke/e34468a14015dcf136033e4a423e94d1 to your computer and use it in GitHub Desktop.
Save jbusecke/e34468a14015dcf136033e4a423e94d1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "ec620c8b-9a8f-4ee2-8886-62c811954a74",
"metadata": {},
"source": [
"# Check if all files for a particular iid are cached\n",
"\n",
"This is related to Rich Signell's high res request, which is currently incredibly slowly advancing...(1 worker with 4% CPU and 25% RAM)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "aa10929a-7f0a-4a26-bada-b0c9fc3ec784",
"metadata": {},
"outputs": [],
"source": [
"import gcsfs\n",
"import cftime\n",
"import numpy as np\n",
"import datetime\n",
"fs = gcsfs.GCSFileSystem()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "e9c2c2ac-277c-4817-9e38-b0e8da84d13e",
"metadata": {},
"outputs": [],
"source": [
"iids = ['CMIP6.HighResMIP.MOHC.HadGEM3-GC31-HH.highres-future.r1i1p1f1.Omon.so.gn.v20200514',\n",
" 'CMIP6.HighResMIP.NERC.HadGEM3-GC31-HH.hist-1950.r1i1p1f1.Omon.so.gn.v20200514',\n",
" 'CMIP6.HighResMIP.NERC.HadGEM3-GC31-HH.hist-1950.r1i1p1f1.Omon.thetao.gn.v20200514',\n",
" 'CMIP6.HighResMIP.MOHC.HadGEM3-GC31-HH.highres-future.r1i1p1f1.Omon.thetao.gn.v20200514']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "fc192eeb-78af-4a52-b18d-1d7e1c10f3d3",
"metadata": {},
"outputs": [],
"source": [
"iid = iids[0]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "9760b70b-f262-4e12-b70f-d744c473c3fe",
"metadata": {},
"outputs": [],
"source": [
"all_cached = fs.ls('gs://leap-scratch/data-library/cmip6-pgf-ingestion/cache')"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "96e5a791-c4a9-4a3b-8455-f36a7bc02d72",
"metadata": {},
"outputs": [],
"source": [
"def check_all_cached(iid:str, all_cached:list[str]) -> bool:\n",
" iid_modified = iid.lower().replace('.', '_')\n",
" cached_iid = [c for c in all_cached if iid_modified in c]\n",
" start_date_str_iid = [c.split('_')[-1].replace('.nc', '').split('-')[0] for c in cached_iid]\n",
" start_dates = [cftime.Datetime360Day(int(s[0:4]), int(s[4:6]), 1) for s in sorted(start_date_str_iid)]\n",
" start_dates_diff = np.diff(start_dates)\n",
" assert np.all(start_dates_diff==datetime.timedelta(days=30))\n",
" pri"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "b7b2e87c-ffd3-45fc-bc08-7127b6360b60",
"metadata": {},
"outputs": [],
"source": [
"for iid in iids:\n",
" check_all_cached(iid, all_cached)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ab4c5b5-9ee5-495c-b16a-ae6402331a4a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment