aidanheerdegen/rhozmoc_mp.py

## rhozmoc_mp.py
from netCDF4 import *
from numpy import *
from scipy.stats import binned_statistic

# Can't use multiprocessing.dummy as binned_statistic doesn't release the GIL
import multiprocessing

def binsum(binarray, valarray, bins):

    summed, bin_edges, bin_number = binned_statistic(binarray, valarray, sum, bins, (min(bins),max(bins)) )

    return summed

# Overturning in rho z coords

def rhozmoc (data_file,grid_file,minval=1002,maxval=1030,inc=0.1):

    print 'Reading file ',grid_file
    input1 = Dataset(grid_file, 'r')
    area_t = input1.variables['area_t'][:]
    print 'area_t at 1000,100 = ',area_t[1000,1000]

    print 'Reading file' + data_file
    input2 = Dataset(data_file, 'r')
    rho = input2.variables['pot_rho_0'][:]
    wt = input2.variables['wt'][:]
    dzt = input2.variables['dzt'][:]
    time = input2.variables['time'][:]
    lat = input2.variables['yt_ocean'][:]
    lon = input2.variables['xt_ocean'][:]
    print 'shape rho = ',rho.shape

    nlevels = rho.shape[1]

    # Compute transport
    wt = wt * area_t

    rhobin = arange(minval-inc*2, maxval+inc*2, inc)

    print len(rhobin),rhobin

    # Reshape the data to only have a levels dimensions
    rho_levels = reshape(rho,(nlevels,-1))
    wt_levels = reshape(wt,(nlevels,-1))

    # Create the output data array
    ta = zeros((nlevels,size(rhobin)-1))

    # Set NCPUS environment variable, or it will use maximum available
    count = int(os.environ.get('NCPUS',multiprocessing.cpu_count()))
    pool = multiprocessing.Pool(processes=count)

    print 'Using ',count,' cpus'

    # The magic happens here. The routine binned_statistic will bin rho_levels based on
    # rhobin, and then apply the statistic (sum in this case) to the similaryl shaped
    # array wt_levels. I specified min and max values so that any value outside this
    # range is ignored.
    print 'Binning'
    results = []
    for i in range(nlevels):
        results.append(pool.apply_async(binsum,args=(rho_levels[i],wt_levels[i],rhobin)))

    pool.close()
    pool.join()

    # Not matter what order the asynchronous jobs finished in, the results array is in
    # order of the start of execution
    for i,result in enumerate(results):
        ta[i,:] = result.get()

    ta = ta*1.e-6   # transport in Sv

    #sum up the transport (ta) to obtain overturning (tb)
    #in depth (depth) and density (rhobin) space
    tb = cumsum(ta,1)

    # Output to NetCDF file
    output_file = 'rho_z_moc.nc'
    output = Dataset(output_file, 'w', format='NETCDF4')

    output.createDimension('st_ocean', nlevels)
    z_var = output.createVariable('st_ocean', 'f8', ('st_ocean',))
    z_var.units = 'meters'
    z_var = dzt

    output.createDimension('rhobin', len(rhobin)-1)
    rhobin_var = output.createVariable('rhobin', 'f8', ('rhobin',))
    rhobin_var.units = 'kg/m'
    rhobin_var = rhobin[:-1]

    mx_var = output.createVariable('rho_z_trans', 'f8', ('st_ocean', 'rhobin',))
    mx_var.units = 'Sv'
    mx_var[:,:] = tb

    print 'finished \n'

if __name__ == "__main__":

    data_file = '/short/v45/pas561/mom/archive/gfdl_nyf_1080_cp/5yrs_5day/output201/ocean__201_003.nc'
    grid_file = '/short/v45/pas561/mom/archive/gfdl_nyf_1080/output100/ocean_grid.nc'

    rhozmoc(data_file,grid_file)
	from netCDF4 import *
	from numpy import *
	from scipy.stats import binned_statistic

	# Can't use multiprocessing.dummy as binned_statistic doesn't release the GIL
	import multiprocessing

	def binsum(binarray, valarray, bins):

	summed, bin_edges, bin_number = binned_statistic(binarray, valarray, sum, bins, (min(bins),max(bins)) )

	return summed

	# Overturning in rho z coords

	def rhozmoc (data_file,grid_file,minval=1002,maxval=1030,inc=0.1):

	print 'Reading file ',grid_file
	input1 = Dataset(grid_file, 'r')
	area_t = input1.variables['area_t'][:]
	print 'area_t at 1000,100 = ',area_t[1000,1000]

	print 'Reading file' + data_file
	input2 = Dataset(data_file, 'r')
	rho = input2.variables['pot_rho_0'][:]
	wt = input2.variables['wt'][:]
	dzt = input2.variables['dzt'][:]
	time = input2.variables['time'][:]
	lat = input2.variables['yt_ocean'][:]
	lon = input2.variables['xt_ocean'][:]
	print 'shape rho = ',rho.shape

	nlevels = rho.shape[1]

	# Compute transport
	wt = wt * area_t

	rhobin = arange(minval-inc2, maxval+inc2, inc)

	print len(rhobin),rhobin

	# Reshape the data to only have a levels dimensions
	rho_levels = reshape(rho,(nlevels,-1))
	wt_levels = reshape(wt,(nlevels,-1))

	# Create the output data array
	ta = zeros((nlevels,size(rhobin)-1))

	# Set NCPUS environment variable, or it will use maximum available
	count = int(os.environ.get('NCPUS',multiprocessing.cpu_count()))
	pool = multiprocessing.Pool(processes=count)

	print 'Using ',count,' cpus'

	# The magic happens here. The routine binned_statistic will bin rho_levels based on
	# rhobin, and then apply the statistic (sum in this case) to the similaryl shaped
	# array wt_levels. I specified min and max values so that any value outside this
	# range is ignored.
	print 'Binning'
	results = []
	for i in range(nlevels):
	results.append(pool.apply_async(binsum,args=(rho_levels[i],wt_levels[i],rhobin)))

	pool.close()
	pool.join()

	# Not matter what order the asynchronous jobs finished in, the results array is in
	# order of the start of execution
	for i,result in enumerate(results):
	ta[i,:] = result.get()

	ta = ta*1.e-6 # transport in Sv

	#sum up the transport (ta) to obtain overturning (tb)
	#in depth (depth) and density (rhobin) space
	tb = cumsum(ta,1)

	# Output to NetCDF file
	output_file = 'rho_z_moc.nc'
	output = Dataset(output_file, 'w', format='NETCDF4')

	output.createDimension('st_ocean', nlevels)
	z_var = output.createVariable('st_ocean', 'f8', ('st_ocean',))
	z_var.units = 'meters'
	z_var = dzt

	output.createDimension('rhobin', len(rhobin)-1)
	rhobin_var = output.createVariable('rhobin', 'f8', ('rhobin',))
	rhobin_var.units = 'kg/m'
	rhobin_var = rhobin[:-1]

	mx_var = output.createVariable('rho_z_trans', 'f8', ('st_ocean', 'rhobin',))
	mx_var.units = 'Sv'
	mx_var[:,:] = tb

	print 'finished \n'

	if __name__ == "__main__":

	data_file = '/short/v45/pas561/mom/archive/gfdl_nyf_1080_cp/5yrs_5day/output201/ocean__201_003.nc'
	grid_file = '/short/v45/pas561/mom/archive/gfdl_nyf_1080/output100/ocean_grid.nc'

	rhozmoc(data_file,grid_file)