Skip to content

Instantly share code, notes, and snippets.

@dgergel
Last active January 5, 2022 01:00
Show Gist options
  • Save dgergel/bc7e1a3f0011aca3d82c4d2603a35777 to your computer and use it in GitHub Desktop.
Save dgergel/bc7e1a3f0011aca3d82c4d2603a35777 to your computer and use it in GitHub Desktop.
validate ERA-5 precip for a single gridcell
# install packages not already on impactlab server
# !pip install xclim
# ! pip install cdsapi
%matplotlib inline
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import os
import gcsfs
from matplotlib import cm
import warnings
import cdsapi
# wrapper function for passing impactlab token to gcsfs
def read_gcs_zarr(zarr_url, token='/opt/gcsfuse_tokens/impactlab-data.json', check=False):
"""
takes in a GCSFS zarr url, bucket token, and returns a dataset
Note that you will need to have the proper bucket authentication.
"""
fs = gcsfs.GCSFileSystem(token=token)
store_path = fs.get_mapper(zarr_url, check=check)
ds = xr.open_zarr(store_path)
return ds
# load ERA-5 versions
# ERA-5 "coarse" and "fine" (both at 1/4 degree) used in downscaling
pr_coarse_ref = read_gcs_zarr('gs://scratch-170cd6ec/91da8e47-b396-4360-b397-ece89f1b777e/e2e-miroc6-pr-8rn7f-2846959676/rechunked.zarr')
pr_fine_ref = read_gcs_zarr('gs://scratch-170cd6ec/91da8e47-b396-4360-b397-ece89f1b777e/e2e-miroc6-pr-8rn7f-587431548/rechunked.zarr')
# ERA-5 at regular Gaussian resolution, "cleaned" by renaming variable/dims
pr_cleaned_ref = read_gcs_zarr('gs://clean-b1dbca25/reanalysis/ERA-5/F320/pr.1995-2015.F320.zarr')
# ERA-5 at regular Gaussian resolution
pr_raw_ref = read_gcs_zarr('gs://impactlab-data/climate/source_data/ERA-5/downscaling/pr.1994-2015.F320.v5.zarr')
# define Seattle lat/lon
target_lat = 47.608013
target_lon = -122.335167
# download some era-5 data. Note this won't work if you don't have credentials,
# so you'll have to go online to ECMWF and get them if you want to replicate this.
# Otherwise you can grab the file at this location on impactlab-data.
filename = '/gcs/impactlab-data/climate/source_data/ERA-5/era5_pr_monthly_download_debug.nc'
c = cdsapi.Client()
# define small geographic area around Seattle
c.retrieve(
'reanalysis-era5-single-levels-monthly-means',
{
'format': 'netcdf',
'product_type': 'monthly_averaged_reanalysis',
'variable': 'total_precipitation',
'year': [
'2000', '2001', '2002',
'2003', '2004', '2005',
'2006', '2007', '2008',
'2009', '2010',
],
'month': [
'01', '02', '03',
'04', '05', '06',
'07', '08', '09',
'10', '11', '12',
],
'time': '00:00',
'area': [
49, -124, 46,
-121,
],
},
filename)
# open the new precip from CDS
new_pr_monthly = xr.open_dataset(filename)
# now get Seattle timeseries from each of these ERA-5 versions
pr_seattle_pipeline = pr_cleaned_ref['pr'].sel(lon=target_lon, lat=target_lat, method="nearest").load()
pr_seattle_pipeline_coarse = pr_coarse_ref['pr'].sel(lon=target_lon, lat=target_lat, method="nearest").load()
pr_seattle_pipeline_fine = pr_fine_ref['pr'].sel(lon=target_lon, lat=target_lat, method="nearest").load()
pr_seattle_pipeline_raw = pr_raw_ref['tp'].sel(longitude=target_lon, latitude=target_lat, method="nearest").load()
# note CDS precip is in m so we're also converting it to mm
pr_seattle_cds_mo_mean = new_pr_monthly['tp'].sel(longitude=target_lon, latitude=target_lat, method="nearest") * 1000
# ERA-5 data is monthly average of daily mean, so each month value is not a "monthly mean" but an avg daily value for that month. So here we convert each "avg daily value for a month" by the number of days in that month to get a monthly total precip
pr_seattle_cds_mo_total = pr_seattle_cds_mo_mean * new_pr_monthly.time.dt.days_in_month
# take a look at the different resolutions of ERA-5 versions
print(pr_coarse_ref['pr'].shape)
print(pr_fine_ref['pr'].shape)
print(pr_raw_ref['tp'].shape)
print(pr_cleaned_ref['pr'].shape)
print(new_pr_monthly['tp'].shape)
# plot Seattle annual total precip
plt.figure(figsize=(14, 4))
pr_seattle_cds_mo_total.groupby('time.year').sum().plot(label='new CDS ERA-5 data')
pr_seattle_pipeline.groupby('time.year').sum().plot(label='our pipeline, cleaned')
pr_seattle_pipeline.groupby('time.year').sum().plot(label='our pipeline, raw', linestyle=':')
pr_seattle_pipeline_coarse.groupby('time.year').sum().plot(label='our pipeline, coarse')
pr_seattle_pipeline_fine.groupby('time.year').sum().plot(label='our pipeline, fine')
plt.legend(bbox_to_anchor=(1.1, 1.05))
plt.ylabel('precip (mm)')
plt.title('Seattle annual precip')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment