Skip to content

Instantly share code, notes, and snippets.

@kathoef
Last active May 18, 2021 07:58
Show Gist options
  • Save kathoef/5a87ebac4afe2b899213dba481526b36 to your computer and use it in GitHub Desktop.
Save kathoef/5a87ebac4afe2b899213dba481526b36 to your computer and use it in GitHub Desktop.
parallel download of ERA5 reanalysis data
import cdsapi
import multiprocessing
client = cdsapi.Client(url='xxx',key='xxx:xxx')
def cdsapi_worker(dataset):
result = client.retrieve('reanalysis-era5-single-levels', dataset)
result.download(dataset['file_name'])
def single_dataset(calendar_year, file_prefix):
return {
'product_type': 'reanalysis',
'variable': '151', # sea level pressure
'area': '60.0/10.5/60.0/10.5', # Oslo
'time': ['00:00','03:00','06:00','09:00','12:00','15:00','18:00','21:00'],
'date': str(calendar_year)+'-01-01/'+str(calendar_year)+'-12-31',
'format': 'netcdf',
# Arbitrary entries to this dict are possible...
'file_name': file_prefix+'_'+str(calendar_year)+'.nc'
}
datasets = [single_dataset(calendar_year, 'oslo') for calendar_year in range(2007, 2011)]
# https://cds.climate.copernicus.eu/live/queue
# per-user requests that access online CDS data is 3 at maximum
# Let's overbook this by one...
pool = multiprocessing.Pool(processes=4)
pool.map(cdsapi_worker, datasets)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment