Skip to content

Instantly share code, notes, and snippets.

@aidanheerdegen
Last active August 29, 2015 14:27
Show Gist options
  • Save aidanheerdegen/b21c7b127d8a12371945 to your computer and use it in GitHub Desktop.
Save aidanheerdegen/b21c7b127d8a12371945 to your computer and use it in GitHub Desktop.
Multiprocessing version of rhozmoc.py
from netCDF4 import *
from numpy import *
from scipy.stats import binned_statistic
# Can't use multiprocessing.dummy as binned_statistic doesn't release the GIL
import multiprocessing
def binsum(binarray, valarray, bins):
summed, bin_edges, bin_number = binned_statistic(binarray, valarray, sum, bins, (min(bins),max(bins)) )
return summed
# Overturning in rho z coords
def rhozmoc (data_file,grid_file,minval=1002,maxval=1030,inc=0.1):
print 'Reading file ',grid_file
input1 = Dataset(grid_file, 'r')
area_t = input1.variables['area_t'][:]
print 'area_t at 1000,100 = ',area_t[1000,1000]
print 'Reading file' + data_file
input2 = Dataset(data_file, 'r')
rho = input2.variables['pot_rho_0'][:]
wt = input2.variables['wt'][:]
dzt = input2.variables['dzt'][:]
time = input2.variables['time'][:]
lat = input2.variables['yt_ocean'][:]
lon = input2.variables['xt_ocean'][:]
print 'shape rho = ',rho.shape
nlevels = rho.shape[1]
# Compute transport
wt = wt * area_t
rhobin = arange(minval-inc*2, maxval+inc*2, inc)
print len(rhobin),rhobin
# Reshape the data to only have a levels dimensions
rho_levels = reshape(rho,(nlevels,-1))
wt_levels = reshape(wt,(nlevels,-1))
# Create the output data array
ta = zeros((nlevels,size(rhobin)-1))
# Set NCPUS environment variable, or it will use maximum available
count = int(os.environ.get('NCPUS',multiprocessing.cpu_count()))
pool = multiprocessing.Pool(processes=count)
print 'Using ',count,' cpus'
# The magic happens here. The routine binned_statistic will bin rho_levels based on
# rhobin, and then apply the statistic (sum in this case) to the similaryl shaped
# array wt_levels. I specified min and max values so that any value outside this
# range is ignored.
print 'Binning'
results = []
for i in range(nlevels):
results.append(pool.apply_async(binsum,args=(rho_levels[i],wt_levels[i],rhobin)))
pool.close()
pool.join()
# Not matter what order the asynchronous jobs finished in, the results array is in
# order of the start of execution
for i,result in enumerate(results):
ta[i,:] = result.get()
ta = ta*1.e-6 # transport in Sv
#sum up the transport (ta) to obtain overturning (tb)
#in depth (depth) and density (rhobin) space
tb = cumsum(ta,1)
# Output to NetCDF file
output_file = 'rho_z_moc.nc'
output = Dataset(output_file, 'w', format='NETCDF4')
output.createDimension('st_ocean', nlevels)
z_var = output.createVariable('st_ocean', 'f8', ('st_ocean',))
z_var.units = 'meters'
z_var = dzt
output.createDimension('rhobin', len(rhobin)-1)
rhobin_var = output.createVariable('rhobin', 'f8', ('rhobin',))
rhobin_var.units = 'kg/m'
rhobin_var = rhobin[:-1]
mx_var = output.createVariable('rho_z_trans', 'f8', ('st_ocean', 'rhobin',))
mx_var.units = 'Sv'
mx_var[:,:] = tb
print 'finished \n'
if __name__ == "__main__":
data_file = '/short/v45/pas561/mom/archive/gfdl_nyf_1080_cp/5yrs_5day/output201/ocean__201_003.nc'
grid_file = '/short/v45/pas561/mom/archive/gfdl_nyf_1080/output100/ocean_grid.nc'
rhozmoc(data_file,grid_file)
@aidanheerdegen
Copy link
Author

NCPUS = 16

real 0m11.782s
user 0m50.596s
sys 0m6.410s

NCPUS=8

real 0m13.058s
user 0m44.586s
sys 0m4.814s

NCPUS=4

real 0m17.230s
user 0m43.279s
sys 0m4.098s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment