Skip to content

Instantly share code, notes, and snippets.

@jbusecke
Created September 27, 2023 20:29
Show Gist options
  • Save jbusecke/aa83b8141c17d42c80d08f13483fe1f8 to your computer and use it in GitHub Desktop.
Save jbusecke/aa83b8141c17d42c80d08f13483fe1f8 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "bb89df8d-3a0b-478c-a7fc-f846b673d819",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# !pip install pip install git+https://github.com/jbusecke/pangeo-forge-esgf.git@beam-refactor"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "929af61e-0d0b-4a18-a1c7-7953d1c35faf",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import gcsfs\n",
"import zarr\n",
"from typing import List\n",
"cache_bucket = \"leap-scratch/data-library/cache\"\n",
"target_bucket = \"leap-persistent-ro/data-library/cmip6-testing\"\n",
"\n",
"\n",
"def get_job_details(jobid: str):\n",
" pass\n",
"\n",
"def get_cached_files(iid: str) -> List[str]:\n",
" fs = gcsfs.GCSFileSystem()\n",
" glob_wildcard = '*'.join(['']+iid.lower().split('.')[2:]+[''])\n",
" # this takes quite long...\n",
" all_files = fs.glob(f\"{cache_bucket}/{glob_wildcard}\")\n",
" return [f\"gs://{f}\" for f in all_files]\n",
"\n",
"def get_written_store(jobid: str, iid: str) -> str:\n",
" subfolder = '-'.join(jobid.split('-')[:-1])\n",
" return f\"gs://{target_bucket}/{subfolder}/{iid}.zarr\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "48b89aa4-c348-4904-a67f-04a396ba0e18",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"iids = ['CMIP6.ScenarioMIP.CSIRO-ARCCSS.ACCESS-CM2.ssp585.r1i1p1f1.day.pr.gn.v20210317',\n",
"'CMIP6.ScenarioMIP.CSIRO.ACCESS-ESM1-5.ssp585.r4i1p1f1.day.sfcWind.gn.v20210318',\n",
"'CMIP6.ScenarioMIP.CSIRO.ACCESS-ESM1-5.ssp585.r6i1p1f1.day.sfcWind.gn.v20210318',\n",
"'CMIP6.ScenarioMIP.CSIRO.ACCESS-ESM1-5.ssp585.r4i1p1f1.day.pr.gn.v20210318',\n",
"'CMIP6.ScenarioMIP.CSIRO.ACCESS-ESM1-5.ssp585.r10i1p1f1.day.pr.gn.v20210318',\n",
"'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r2i1p1f1.day.pr.gn.v20190603',\n",
"'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r3i1p1f1.day.pr.gn.v20190603',\n",
"'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r4i1p1f1.day.pr.gn.v20190603',\n",
"'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r5i1p1f1.day.pr.gn.v20190603',\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e892d48f-a0b1-42b1-9e8f-268fd4fe8bfe",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pangeo_forge_esgf.recipe_inputs - INFO - Checking responsiveness of search_nodes=['http://esgf-node.llnl.gov/esg-search/search', 'http://esgf-data.dkrz.de/esg-search/search', 'http://esgf-node.ipsl.upmc.fr/esg-search/search', 'http://esgf-index1.ceda.ac.uk/esg-search/search', 'http://esg-dn1.nsc.liu.se/esg-search/search', 'http://esgf.nci.org.au/esg-search/search']\n",
"pangeo_forge_esgf.recipe_inputs - INFO - responsive_search_nodes=['http://esgf-node.llnl.gov/esg-search/search', 'http://esgf-data.dkrz.de/esg-search/search', 'http://esgf-node.ipsl.upmc.fr/esg-search/search', 'http://esg-dn1.nsc.liu.se/esg-search/search', 'http://esgf.nci.org.au/esg-search/search']\n",
"pangeo_forge_esgf.recipe_inputs - INFO - Requesting urls\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 45/45 [00:10<00:00, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"pangeo_forge_esgf.recipe_inputs - INFO - Processing responses\n",
"pangeo_forge_esgf.recipe_inputs - INFO - Processing responses: Expected files per iid\n",
"pangeo_forge_esgf.recipe_inputs - INFO - Processing responses: Check for missing iids\n",
"pangeo_forge_esgf.recipe_inputs - WARNING - Not able to find results for the following 4 iids: ['CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r5i1p1f1.day.pr.gn.v20190603', 'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r3i1p1f1.day.pr.gn.v20190603', 'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r2i1p1f1.day.pr.gn.v20190603', 'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r4i1p1f1.day.pr.gn.v20190603']\n",
"pangeo_forge_esgf.recipe_inputs - INFO - Processing responses: Flatten results\n",
"pangeo_forge_esgf.recipe_inputs - INFO - Processing responses: Group results\n",
"pangeo_forge_esgf.recipe_inputs - INFO - Choosing one url per file\n",
"pangeo_forge_esgf.recipe_inputs - WARNING - This method seems to be unreliable for getting many urls. \n",
"If you are getting less datasets than you expect, try 'first' instead.\n",
"pangeo_forge_esgf.recipe_inputs - INFO - Find first responsive url for each file\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"100%|██████████| 8/8 [00:00<00:00, 12.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"pangeo_forge_esgf.recipe_inputs - WARNING - Was not able to construct url list for (4/9) iids\n",
"pangeo_forge_esgf.recipe_inputs - INFO - Was not able to construct url list for the following iids:\n",
"pangeo_forge_esgf.recipe_inputs - INFO - {'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r5i1p1f1.day.pr.gn.v20190603', 'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r3i1p1f1.day.pr.gn.v20190603', 'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r2i1p1f1.day.pr.gn.v20190603', 'CMIP6.CMIP.MRI.MRI-ESM2-0.historical.r4i1p1f1.day.pr.gn.v20190603'}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"# query pangeo-forge-esgf for the urls\n",
"# TODO: I need to write the order of urls into the attrs\n",
"import logging\n",
"import pangeo_forge_esgf\n",
"from pangeo_forge_esgf import get_urls_from_esgf, setup_logging\n",
"\n",
"# setup_logging('DEBUG')\n",
"setup_logging('INFO')\n",
"\n",
"url_dict = await get_urls_from_esgf(iids, limit_per_host=50, max_concurrency=50, choose_url='first_responsive')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "8f290281-ba3a-45cf-b436-112170ec3439",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'CMIP6.ScenarioMIP.CSIRO-ARCCSS.ACCESS-CM2.ssp585.r1i1p1f1.day.pr.gn.v20210317': ['https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/CSIRO-ARCCSS/ACCESS-CM2/ssp585/r1i1p1f1/day/pr/gn/v20210317/pr_day_ACCESS-CM2_ssp585_r1i1p1f1_gn_22510101-23001231.nc'],\n",
" 'CMIP6.ScenarioMIP.CSIRO.ACCESS-ESM1-5.ssp585.r4i1p1f1.day.sfcWind.gn.v20210318': ['https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp585/r4i1p1f1/day/sfcWind/gn/v20210318/sfcWind_day_ACCESS-ESM1-5_ssp585_r4i1p1f1_gn_22510101-23001231.nc'],\n",
" 'CMIP6.ScenarioMIP.CSIRO.ACCESS-ESM1-5.ssp585.r6i1p1f1.day.sfcWind.gn.v20210318': ['https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp585/r6i1p1f1/day/sfcWind/gn/v20210318/sfcWind_day_ACCESS-ESM1-5_ssp585_r6i1p1f1_gn_22010101-22501231.nc',\n",
" 'https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp585/r6i1p1f1/day/sfcWind/gn/v20210318/sfcWind_day_ACCESS-ESM1-5_ssp585_r6i1p1f1_gn_22510101-23001231.nc'],\n",
" 'CMIP6.ScenarioMIP.CSIRO.ACCESS-ESM1-5.ssp585.r4i1p1f1.day.pr.gn.v20210318': ['https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp585/r4i1p1f1/day/pr/gn/v20210318/pr_day_ACCESS-ESM1-5_ssp585_r4i1p1f1_gn_22010101-22501231.nc',\n",
" 'https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp585/r4i1p1f1/day/pr/gn/v20210318/pr_day_ACCESS-ESM1-5_ssp585_r4i1p1f1_gn_22510101-23001231.nc'],\n",
" 'CMIP6.ScenarioMIP.CSIRO.ACCESS-ESM1-5.ssp585.r10i1p1f1.day.pr.gn.v20210318': ['https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp585/r10i1p1f1/day/pr/gn/v20210318/pr_day_ACCESS-ESM1-5_ssp585_r10i1p1f1_gn_22010101-22501231.nc',\n",
" 'https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp585/r10i1p1f1/day/pr/gn/v20210318/pr_day_ACCESS-ESM1-5_ssp585_r10i1p1f1_gn_22510101-23001231.nc']}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url_dict"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c7f4e29-cbed-4ebd-9e49-9e2e3b76d8e1",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment