Created
November 29, 2023 15:35
-
-
Save barronh/526d5163877e6095cfbdd89243b37276 to your computer and use it in GitHub Desktop.
Retrieve HRRR via Open Data on AWS and Zarr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__doc__ = """ | |
# Overview | |
This example shows how to open High Resolution Rapid Refresh (HRRR) meteorology | |
"surfaces." These surfaces are slices in vertical space to create single level | |
maps of variables like temperature, wind speed components, pressure, etc. Each | |
surface is opened using the Zarr archive described in Amazon's opendata registry | |
described at https://registry.opendata.aws/noaa-hrrr-pds/. | |
# Examples | |
## HRRR surface temperature | |
This example simply retrieves all surface temperature values and prints the | |
statistics. Note that the original value is float16 and is being cast to float32 | |
to ensure that statistics are calculated in higher precision. | |
ds = open_hrrrsfc('2023-11-23T20', varkeys=['TMP']) | |
df = ds['TMP'].to_dataframe().astype('f') | |
print(df.describe().round(2).to_csv()) | |
# Output: | |
# ,TMP | |
# count,1905141.0 | |
# mean,286.27 | |
# std,10.6 | |
# min,251.38 | |
# 25%,278.25 | |
# 50%,287.75 | |
# 75%,295.5 | |
# max,310.75 | |
# Requirements: | |
- Python3 | |
- s3fs | |
- xarray | |
- pandas | |
- zarr | |
# Install requirements with pip | |
pip install s3fs pandas xarray zarr | |
""" | |
__version__ = '0.1.0' | |
def open_hrrrsfc(date, varkeys, layer='surface', simtype='anl'): | |
""" | |
Open HRRR hourly meteorology surface from a date (including hour) and variable | |
keys. Optionally, specify the layer and simulation type (anl: analysis or | |
fcst: forecast). | |
Arguments | |
--------- | |
date : str or datetime | |
Any date construct accepted by pandas.to_datetime. Should include hour. | |
varkeys : str or list | |
List of variables from HRRR layer see https://hrrrzarr.s3.amazonaws.com/index.html | |
layer : str | |
2-dimensional slice name (default surface): | |
slice: surface, mean_sea_level, top_of_atmosphere | |
integration: entire_atmosphere_single_layer | |
*mb (e.g., 1000mb): 925, 850, 700, 500, 300, 250 | |
*m_above_ground (e.g., 1m_above_ground): 2, 8, 10, 80, 1000, 4000 | |
0_*m_above_ground: 500, 1000, 3000, 6000, | |
for more options see an example day | |
https://hrrrzarr.s3.amazonaws.com/index.html#sfc/20231127/20231127_22z_anl.zarr/ | |
simtype : str | |
Options: anl (analysis) or fcst (forecast) | |
Returns | |
------- | |
ds : xarray.Dataset | |
Dataset with all varkeys specified. If using dask, data will be retrieved | |
on demand, which allows for subsetting so as not to require all data. | |
""" | |
import s3fs | |
import xarray as xr | |
import pandas as pd | |
s3 = s3fs.S3FileSystem(anon=True) | |
# Copied from projparams.json | |
# https://hrrrzarr.s3.amazonaws.com/grid/projparams.json | |
# proj_kw = { | |
# 'proj': 'lcc', 'a': 6371229, 'b': 6371229, | |
# 'lon_0': 262.5, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5 | |
# } | |
# proj = pyproj.Proj(proj_kw) | |
# projstr = proj.srs | |
# crsattrs = proj.crs.to_cf() | |
# Hard coded to eliminate pyproj dependency | |
crs_wkt = ( | |
'PROJCRS["unknown",BASEGEOGCRS["unknown",DATUM["unknown",' | |
+ 'ELLIPSOID["unknown",6371229,0,LENGTHUNIT["metre",1,ID["EPSG",9001]]]],' | |
+ 'PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433],' | |
+ 'ID["EPSG",8901]]],CONVERSION["unknown",' | |
+ 'METHOD["Lambert Conic Conformal (2SP)",ID["EPSG",9802]],' | |
+ 'PARAMETER["Latitude of false origin",38.5,' | |
+ 'ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8821]],' | |
+ 'PARAMETER["Longitude of false origin",262.5,' | |
+ 'ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8822]],' | |
+ 'PARAMETER["Latitude of 1st standard parallel",38.5,' | |
+ 'ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8823]],' | |
+ 'PARAMETER["Latitude of 2nd standard parallel",38.5,' | |
+ 'ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8824]],' | |
+ 'PARAMETER["Easting at false origin",0,LENGTHUNIT["metre",1],' | |
+ 'ID["EPSG",8826]],PARAMETER["Northing at false origin",0,' | |
+ 'LENGTHUNIT["metre",1],ID["EPSG",8827]]],CS[Cartesian,2],' | |
+ 'AXIS["(E)",east,ORDER[1],LENGTHUNIT["metre",1,ID["EPSG",9001]]],' | |
+ 'AXIS["(N)",north,ORDER[2],LENGTHUNIT["metre",1,ID["EPSG",9001]]]]' | |
) | |
crs_proj4 = ( | |
'+proj=lcc +lat_0=38.5 +lon_0=262.5 +lat_1=38.5 +lat_2=38.5 +x_0=0 +y_0=0' | |
+ ' +R=6371229 +units=m +no_defs' | |
) | |
crsattrs = dict( | |
grid_mapping_name='lambert_conformal_conic', | |
semi_major_axis=6371229.0, semi_minor_axis=6371229.0, | |
longitude_of_prime_meridian=0.0, standard_parallel=(38.5, 38.5), | |
latitude_of_projection_origin=38.5, longitude_of_central_meridian=262.5, | |
false_easting=0.0, false_northing=0.0, | |
crs_proj4=crs_proj4, crs_wkt=crs_wkt | |
) | |
if isinstance(varkeys, str): | |
varkeys = [varkeys] | |
date = pd.to_datetime(date) | |
dpath = f'{date:hrrrzarr/sfc/%Y%m%d/%Y%m%d_%Hz}_{simtype}.zarr' | |
gridpath = s3fs.S3Map('hrrrzarr/grid/HRRR_chunk_index.zarr', s3=s3) | |
gridds = xr.open_dataset(gridpath, engine='zarr') | |
vards = [gridds[['latitude', 'longitude']]] | |
# vards = [] | |
for varkey in varkeys: | |
metapath = s3fs.S3Map(f'{dpath}/{layer}/{varkey}', s3=s3) | |
datapath = s3fs.S3Map(f'{dpath}/{layer}/{varkey}/{layer}', s3=s3) | |
ds = xr.open_mfdataset([metapath, datapath], engine='zarr') | |
vards.append(ds.rename(projection_x_coordinate='x', projection_y_coordinate='y')) | |
ds = xr.merge(vards) | |
ds['crs'] = xr.DataArray(0, name='crs', dims=(), attrs=crsattrs) | |
return ds | |
if __name__ == '__main__': | |
import numpy as np | |
print('Testing 20231120T20 80m_above_ground UGRD VGRD') | |
try: | |
ds = open_hrrrsfc('20231120T20', ['UGRD', 'VGRD'], layer='80m_above_ground') | |
print(ds) | |
print('Test succeeded: Review output above to confirm') | |
print('Checks below test basic expectations:') | |
chks = { | |
'xmin': (ds['x'].min(), -2697520.14252193), | |
'xmax': (ds['x'].max(), 2696479.85747807), | |
'ymin': (ds['y'].min(), -1587306.15255666), | |
'ymax': (ds['y'].max(), 1586693.84744334), | |
'UGRD shape': (ds['UGRD'].shape, (1059, 1799)), | |
'UGRD units': (ds['UGRD'].units.strip(), 'm s-1'), | |
'VGRD shape': (ds['VGRD'].shape, (1059, 1799)), | |
'VGRD units': (ds['VGRD'].units.strip(), 'm s-1'), | |
'forecast_reference_time': ( | |
ds['forecast_reference_time'].astype('d'), 1.7005104e+18 | |
) | |
} | |
for ckname, (got, val) in chks.items(): | |
if isinstance(got, str): | |
same = got == val | |
else: | |
same = np.allclose([got], [val]) | |
print(' -', ckname, {True: 'Pass', False: 'Fail'}[same]) | |
except Exception: | |
print('**Test failed**') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Basic installation and usage is shown below
Install prerequisites in bash:
Create plot in python