Skip to content

Instantly share code, notes, and snippets.

@mrayson
Created October 29, 2020 01:58
Show Gist options
  • Save mrayson/75878edafad8bfba3a7b2cdae6504f86 to your computer and use it in GitHub Desktop.
Save mrayson/75878edafad8bfba3a7b2cdae6504f86 to your computer and use it in GitHub Desktop.
# Snippet of python code for converting a bunch of netcdf files to zarr
# Example
sorted(glob.glob('/home/mrayson/scratch/HIMAWARI/raw/*.nc'))%%time
# Convert the dataset to zarr (to this once only)
compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2)
outpath = '/home/mrayson/scratch/HIMAWARI/raw_zarr'
mode='w'
append_dim=None
encoding = {'sea_surface_temperature': {'compressor': compressor}}
# mode = 'a'
# append_dim='time'
# encoding=None
for year in [2016,2017,2018,2019]:
for mo in range(1,13):
# Skip a few bad section
#if year==2017 and mo == 10:
# continue
filestr = '/home/mrayson/scratch/HIMAWARI/raw/%d%02d*.nc'%(year,mo)
print(filestr)
ds = xr.open_mfdataset(sorted(glob.glob(filestr)), combine='by_coords', parallel=True)
# Mask bad flagged data
sst = ds['sea_surface_temperature'].where(ds['quality_level']>4, np.nan)
sst=sst.assign_coords({'lon': (((sst.lon ) % 360))})
# Save to zarr format
dsout = xr.Dataset({'sea_surface_temperature':sst}, attrs=ds.attrs)
dsout.to_zarr(outpath, encoding=encoding,\
mode=mode, append_dim=append_dim)
mode='a'
append_dim='time'
encoding=None
# concat_dim='time', # data_vars=['sea_surface_temperature','quality_level'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment