Created
June 18, 2024 07:45
-
-
Save lbesnard/4aae3a3c074965c5f59184bd5e183fb5 to your computer and use it in GitHub Desktop.
mfdataset issue
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import s3fs | |
import xarray as xr | |
s3 = s3fs.S3FileSystem(anon=True) | |
# This generates a list of strings with filenames | |
s3path = 's3://imos-data/IMOS/SRS/SST/ghrsst/L3S-1d/dn/2023/*' | |
remote_files = s3.glob(s3path) | |
# create a fileset | |
fileset = [s3.open(file) for file in remote_files] | |
# locate batch of files with issue | |
subset = fileset[170:180] | |
# ISSUE is with file 0, when added to another file | |
subset[0:2] | |
# opening those 2 files together consumes a lot more memory than other files | |
ds = xr.open_mfdataset( | |
subset[0:2], | |
engine='h5netcdf', | |
concat_characters=True, | |
mask_and_scale=True, | |
decode_cf=True, | |
decode_times=True, | |
use_cftime=True, | |
parallel=True, | |
decode_coords=True, | |
) | |
output_file = '/tmp/dataset.nc' | |
# Save the dataset to NetCDF file, but consumes all memory and dies | |
ds.to_netcdf(output_file, | |
mode='w') | |
# on the coiled cluster, because of this file, my dask graph is all red, and fails. | |
# I cant find what is wrong with this file and how to handle it/ check the files before having them in mfdataset |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment