jimsrc/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Run for example:
./build_long.prof.py -- --years 2006 2015  --fname_inp ./test_iv.h5  --nbin 1200  --obs wAoP_wPrs  -fig ./test.png
where:


wAoP_wPrs is one of the available fields. Others are: wAoP (i.e. only AoP correction),
wAoP_wPrs_wGh (with AoP, pressure, and geopotential height) ,aop (AoP values), etc.


-fig refers to filename of output figure.


For more info on the arguments,
./build_long.prof.py -- -h

Important:

Note that, in order to run consistenly, you'll need to place these 2 scripts with this structure:
|
|- build_long.prof.py
|- shared_funcs/
   |- funcs.py
   |- __init__.py  #<--- this is an empty file

  
## build_long.prof.py
#!/usr/bin/env ipython
# -*- coding: utf-8 -*-
import numpy as np
from numpy import (
nanmean, nanstd,
)
import argparse, os
from h5py import File as h5
from datetime import datetime, timedelta
from shared_funcs.funcs import (
My2DArray, date2utc, utc2date, rebine, ncep_gh, nans
)
import itertools
#--- we need X to build the figure
if 'DISPLAY' in os.environ:
    from pylab import figure, show, close
else:
    raise SystemExit(
    " >>> ERROR: the DISPLAY environment variable doesn't exist!"
    )

#--- retrieve args
parser = argparse.ArgumentParser(
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'-y', '--years',
type=int,
nargs=2,
default=[2006,2013],
help='years for period of data-processing. So that the profile\
 will be built from YearIni/Jan/01 until YearEnd/Dec/31)',
)
parser.add_argument(
'-fid', '--fname_inp',
type=str,
default='./test.h5',
help='filename of .h5 input (contains geopotential-height-corrected Scalers)',
)
parser.add_argument(
'-obs', '--obs',
type=str,
nargs='+',
default='wAoP_wPrs_wGh',
help='observables to process; can be more than one, and must be one\
 of the keys inside the .h5 file. They will be plotted in the\
 same figure in superposed fashion.',
)
parser.add_argument(
'-nb', '--nbin',
type=int,
default=100,
help='total number of bins for time domain in the long-term profile'
)
parser.add_argument(
'-fig', '--fname_fig',
type=str,
default='test.png',
help='filename for output figure. Or "screen" for interactive plot.'
)

pa = parser.parse_args()

#---
finp_d   = h5(pa.fname_inp,'r')
date_ini = datetime(pa.years[0],  1,  1)
date_end = datetime(pa.years[1], 12, 31)
assert date_end>date_ini, '`date_end` must be greater than `date_ini`!!'

#---
nbin = pa.nbin

class d(object):
    """
    data buffer for each bin of the profile
    """
    def __init__(self):
        self.n = 0 # number of original data points contributing
        self.rates = My2DArray(1, dtype=np.float32)
        self.mean = np.nan
        self.std  = np.nan

#obs = pa.obs #'wAoP_wPrs_wGh'
utc_ini = date2utc(datetime(1970,1,1,0,0,0))
ini,end=datetime(pa.years[0],1,1),datetime(pa.years[1],12,31)
Dt  = (end-ini).total_seconds()
dt  = Dt/nbin

# list of all pairs (year,month) of interest
YYYY_MM = list(itertools.product(
    range(pa.years[0],pa.years[1]+1,1),
    range(1,12+1,1),
))

dd_ = {} # several dd's
for obs in pa.obs:
    # buffer for each observable
    dd  = [ d() for i in range(nbin) ]
    for yyyy, mm in YYYY_MM:
        rpath='%04d/%02d' % (yyyy,mm)
        tutc = finp_d['t_utc/'+rpath][...] # [utc sec]
        r    = finp_d[obs+'/'+rpath][...]  # rate
        ind  = np.array((tutc-date2utc(ini))/dt, dtype=int) # (*)
        ind_list = np.unique(ind) # (**)
        print yyyy, mm, ind_list
        if ind_list.size>1:
            assert ind_list[0]<ind_list[1],"wrong."# consistency
        for i in ind_list:
            cc = ind==i   # collect all with similar "time bins"
            ri = r[cc]    # rates of this i-block
            n = dd[i].n
            dd[i].n += ri.size
            dd[i].rates.resize_rows(n+ri.size)
            dd[i].rates[n:n+ri.size] = ri

        if len(ind_list)==1: # [likely]
            continue
        else:                # [unlikely]
            for j in ind_list[:-1]:
                n = dd[j].n
                dd[j].rates = dd[j].rates[:n]
                dd[j].mean  = nanmean(dd[j].rates)
                dd[j].std   = nanstd(dd[j].rates)
                dd[j].rates = None # delete data
        dd_[obs] = dd
"""
(*): list of integers corresponding to the nearest integer value
     of the utc-sec discretized in units of `dt` time-windows.
     For example, taking utc-sec="equivalent to 09/nov/2008 05:40",
     `ini=datetime(2008,nov,1,0,0)`, and `dt=1day` results in
     an `ind` value of 9.
(**): to collect all with similar `ind` values
"""

# build the figure
do = {}
fig = figure(1, figsize=(8,4))
ax  = fig.add_subplot(111)
for obs in pa.obs:
    prof = np.array([dd_[obs][n].mean for n in range(nbin)])
    t, r = np.arange(prof.size)*dt/(86400.*365), 100.*prof
    do[obs] = np.array([t,r]).T
    ax.plot(t, r, '-o', ms=2, alpha=.7, label=obs)
    np.savetxt('test_%s.txt'%obs,do[obs],fmt='%12.2f')

ax.legend(loc='best')
ax.grid(True)
ax.set_xlabel('years since '+date_ini.strftime('%b/%Y'))
ax.set_ylabel('count rate')
if pa.fname_fig=='screen':
    print " >>> showing interactively..."
    show()
else:
    fig.savefig(pa.fname_fig, dpi=135, bbox_inches='tight')
    print "\n ---> we generated: "+pa.fname_fig

close(fig)


#EOF

## funcs.py
#!/usr/bin/env ipython
# -*- coding: utf-8 -*-
import numpy as np
import os, sys
from scipy.io.netcdf import netcdf_file
from h5py import File as h5
from numpy import (
    loadtxt, size, mean, isnan, array,
    ones, zeros, empty, nanmean, nanmedian, nanstd,
    concatenate,
)
from glob import glob
from os.path import isfile, isdir
from datetime import datetime, timedelta
from scipy.interpolate import (
    splrep,     # Spline interpolation
    splev)      # Spline evaluator
import argparse
if 'DISPLAY' in os.environ:
    from pylab import pause, find


#--- globals
HOME = os.environ['HOME']

class arg_to_datetime(argparse.Action):
    """
    argparse-action to handle command-line arguments of
    the form "dd/mm/yyyy" (string type), and converts
    it to datetime object.
    NOTE: can be used even when `nargs`>1.
    """
    def __init__(self, option_strings, dest, **kwargs):
        #if nargs is not None:
        #    raise ValueError("nargs not allowed")
        super(arg_to_datetime, self).__init__(option_strings, dest, **kwargs)
    def __call__(self, parser, namespace, values, option_string=None):
        #print '%r %r %r' % (namespace, values, option_string)
        if len(values)==1:
            dd,mm,yyyy = map(int, values.split('/'))
            value = datetime(yyyy,mm,dd)
        elif len(values)>1:
            value = []
            for val in values:
                dd,mm,yyyy = map(int, val.split('/'))
                value += [ datetime(yyyy,mm,dd) ]
        setattr(namespace, self.dest, value)


class arg_to_utcsec(argparse.Action):
    """
    argparse-action to handle command-line arguments of
    the form "dd/mm/yyyy" (string type), and converts
    it to UTC-seconds.
    """
    def __init__(self, option_strings, dest, nargs=None, **kwargs):
        if nargs is not None:
            raise ValueError("nargs not allowed")
        super(arg_to_utcsec, self).__init__(option_strings, dest, **kwargs)
    def __call__(self, parser, namespace, values, option_string=None):
        #print '%r %r %r' % (namespace, values, option_string)
        dd,mm,yyyy = map(int, values.split('/'))
        value = (datetime(yyyy,mm,dd)-datetime(1970,1,1)).total_seconds()
        setattr(namespace, self.dest, value)

def nans(shape, dtype=np.float32):
    return np.nan*np.ones(shape, dtype=dtype)


def ncep_gh(yyyy, mm, level=None, dir_src=None, sync=True, tsync=None):
    """
    input:
    tsync: in utc-sec
    """
    if dir_src is None:
        dir_src='../out/out.build_weather/ncep' #HOME+'/data_gdas'
    if level is None:
        lname = 'level_%04d'%100 # level in [hPa]
    fname_inp = dir_src+'/test_%04d.h5'%yyyy
    assert isfile(fname_inp), ' --> NO EXISTE: '+fname_inp
    f = h5(fname_inp,'r')
    dpath = lname + '/%04d-%02d'%(yyyy,mm)
    #print " ---> ", f[dpath].keys()
    g_t = f[dpath+'/utcsec'].value   # utc sec
    g_h = f[dpath+'/h'].value        # geop height
    if sync:
        tck = splrep(g_t, g_h, s=0)
        gh  = splev(tsync, tck, der=0)  # der: order of derivative to comp
        return gh
    else:
        return g_h


def equi_days(dini, dend, n):
    """
    returns most equi-partitioned tuple of number
    of days between date-objects 'dini' and 'dend'
    """
    days = (dend - dini).days
    days_part = np.zeros(n, dtype=np.int)
    resid = np.mod(days, n)
    for i in range(n-resid):
        days_part[i] = days/n
    # last positions where I put residuals
    last = np.arange(start=-1,stop=-resid-1,step=-1)
    for i in last:
        days_part[i] = days/n+1

    assert np.sum(days_part)==days, \
        " --> somethng went wrong!  :/ "

    return days_part

def equi_dates(dini, dend, n):
    """
    returns:
    - inis: numpy-array-of-dates, of size (n+1); where,
      - inis[:-1]: 'n' start-dates of most equi-partitioned
                 contiguous blocks of dates between 'dini'
                 and 'end' (object dates).
      - inis[-1]: 'dend' (max date of whole block)
    """
    days_part = equi_days(dini, dend, n)
    inis = np.empty(n+1, dtype=datetime)
    inis[0]  = dini
    inis[-1] = dend
    for i in range(1, inis.size-1):
        inis[i] = inis[i-1] + timedelta(days=days_part[i-1])

    return inis

def equi_dates_RoundMonth(dini,dend,n):
    """ same as equi_dates() but rounded at 01/month """
    assert dini.day==1, " ERROR: dini.day must be 1."
    inis  = equi_dates(dini,dend,n)
    inis_ = np.empty(n+1, dtype=datetime)
    inis_[-1] = dend
    for i, i_ in zip(inis, range(inis_.size-1)):
        inis_[i_] = datetime(i.year,i.month,1)
    return inis_

def rebine_mats(a, b): # original version
    """
    Rebining of 'b' to resolution of 'a'.
    where:
    b[0] is denser than a[0] (IMPORTANT!)
    a[0], a[1]: x and y components of a
    b[0], b[1]: x and y components of b
    NOTE: For this to make sense (*), the following should
          be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La,
          where La=(ax[-1]-ax[0]); in other words,
          that "the borders of 'bx' must *not* be very
          different from those of 'ax'."
    """
    ax, ay = a[0,:], a[1,:]
    bx, by = b[0,:], b[1,:]
    by_binned = np.zeros(ax.size, dtype=np.float64)
    for j in range(ax.size):
        if(j==0):
            ax_lo  = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros
            cc     = bx <= ax_lo # (*)
        elif(j==ax.size-1):
            ax_hi  = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos
            cc     = bx >= ax_hi # (*)
        else:
            ax_min = 0.5*(ax[j-1] + ax[j])
            ax_max = 0.5*(ax[j] + ax[j+1])
            cc     = (bx>ax_min) & (bx<ax_max)
        by_binned[j] = np.nanmean(by[cc])
    return by_binned


def rebine(ax, bx, by_data, nbef=1, naft=1):
    """
    Rebining of `b` to resolution of `ax`.
    **IMPORTANT**: b[0] is denser than `ax`!
    inputs:
    ax      : time domain to which `by_data` will be adapted/rebined/synced.
    bx      : time domain of `by_data`.
    by_data : is assumed of shape (:,:) or (:).
    NOTE: For this to make sense (*), the following should
          be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La,
          where La=(ax[-1]-ax[0]); in other words,
          that "the borders of `bx` must *not* be very
          different from those of `ax`."
    """
    nbsh = len(by_data.shape) # should be 1 or 2.
    if nbsh==2:
        nb_col = by_data.shape[1]
        by_binned = np.zeros((ax.size,nb_col), dtype=np.float32)
        by = by_data
    elif nbsh==1:
        by_binned = np.zeros((ax.size,1), dtype=np.float32)
        by = np.reshape(by_data, (by_data.size,1))
    else:
        print " ERROR: len(b.shape) should be 1 or 2."
        raise SystemExit
    #--- first row
    ax_lo  = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros
    cc     = bx <= ax_lo # (*)
    by_binned[0,:] = np.nanmean(by[cc,:])
    #--- last row
    ax_hi  = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos
    cc     = bx >= ax_hi # (*)
    by_binned[ax.size-1,:] = np.nanmean(by[cc,:])
    #--- rest of rows
    for j in range(1,ax.size-1):
        ax_min = 0.5*(ax[j-1] + ax[j])
        ax_max = 0.5*(ax[j] + ax[j+1])
        cc     = (bx>ax_min) & (bx<ax_max)
        by_binned[j,:] = np.nanmean(by[cc,:])
    # output must have the same no of columns as input 'by'
    if nbsh==1:
        return by_binned[:,0]
    else:
        return by_binned


def utc2date(t):
    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
    date = date_utc + timedelta(days=(t/86400.))
    return date


def date2utc(date):
    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
    utcsec = (date - date_utc).total_seconds() # [utc sec]
    return utcsec


def gps2date(t):
    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
    t_utc = t + 315964800 # [utc sec]
    date = date_utc + timedelta(days=(t_utc/86400.))
    return date


def date2gps(date):
    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
    utcsec = (date - date_utc).total_seconds() # [utc sec]
    gpssec = utcsec - 315964800  # [gps sec]
    return gpssec


class My2DArray(object):
    """
    wrapper around numpy array with:
    - flexible number of rows
    - records the maximum nrow requested
    - `nrow`: max number of rows queried.
    NOTE:
    This was test for 1D and 2D arrays.
    TODO:
    try with `numpy.array` instead of `object` above to
    make the wrapper more natural.
    """

    def __init__(self, shape, dtype=np.float32):
        self.this  = np.empty(shape, dtype=dtype)
        setattr(self, '__array__', self.this.__array__)
        self.nrow = 0

    def resize_rows(self, nx_new=None):
        """ Increment TWICE the size of axis=0, **without**
        losing data.
        """
        sh_new = np.copy(self.this.shape)
        nx     = self.this.shape[0]
        if nx_new is None:
            sh_new[0] = 2*sh_new[0]
        elif nx_new<=nx:
            return 0 # nothing to do
        else:
            sh_new[0] = nx_new

        tmp    = self.this.copy()
        #print "----> tmp: ", tmp.shape
        new    = np.nan*np.ones(sh_new,dtype=self.this.dtype)
        new[:nx] = tmp
        self.this = new
        """
        for some reason (probably due to numpy
        implementation), if we don't do this, the:
        >>> print self.__array__()

        stucks truncated to the original size that was
        set in __init__() time.
        So we need to tell numpy our new resized shape!
        """
        setattr(self, '__array__', self.this.__array__)


    def __get__(self, instance, owner):
        return self.this #.__array__()

    def __getitem__(self, i):
        """
        to retrieve non-trivial content:
        >>> print m[...]
        >>> print m[:]

        to retrieve all the allocated array:
        >>> print m.this[...]
        >>> print m[:-1]
        >>> print m[:-1,:]
        """
        if not(i is Ellipsis or i==slice(None,None,None)):
            return self.this[i]
        else:
            #returns only non-trivial content
            return self.this[:self.nrow]

    def __setitem__(self, i, value):
        """
        We can safely use:
        >>> ma[n:n+m,:] = [...]

        assuming n+m is greater than our size in axis=0.
        NOTE: `i` can be:
        - scalar
        - list
        - tuple of (slice,scalar), (slice,slice),
          (scalar,slice), etc.
        - slice
        """
        # we need to define `stop` to check if we are
        # out of rows.
        if type(i)==slice:
            """
            case:
            >>> m[2:5]     = ...
            """
            stop = i.stop
            _ind = i #slice(i.start,i.stop,i.step)
            self.nrow = np.max([stop,self.nrow])

        elif type(i)==tuple and type(i[0])==slice:
            """
            cases:
            >>> m[n:n+m,:] = ... # (*)
            >>> m[n:,:]    = ... # (**)
            >>> m[n:,4]    = ...
            >>> m[:,:]     = ... # (***)
            >>> m[:,4]     = ...
            """
            if i[0].start is None and \
                i[0].stop is None and \
                i[0].step is None: # (***)

                _ind = (slice(None,None,None),i[1])
                stop = 0
                self.nrow = np.max([np.size(value,axis=0), self.nrow])

            else:
                stop_aux = i[0].start + np.size(value,axis=0) # (*)
                stop = stop_aux if i[0].stop is None else i[0].stop #(**) & (*)
                _ind = (slice(i[0].start,stop,i[0].step), i[1])
                self.nrow = np.max([stop,self.nrow])

        elif type(i)==tuple and type(i[0]) in (list,int):
            """
            case:
            >>> m[k,:]      = ... # type=int
            >>> m[[3,4],:]  = ... # type=list
            >>> m[[3,4],k]  = ... # type=list
            """
            stop = np.max(i[0])  # TEST
            _ind = i
            self.nrow = np.max([stop+1,self.nrow])

        else:
            raise IndexError(' --> don\'t know how \
                handle %r' % i)

        # check if we are short of rows, in which case, we
        # will double size in axis=0.
        if stop >= self.this.shape[0]:
            nx_new = self.this.shape[0]
            while nx_new<=stop:
                nx_new *= 2
            self.resize_rows(nx_new)

        # finnally assign
        self.this[_ind] = value

        #--- register the maximum nrow requested.
        # NOTE here we are referring to size, and *not* row-index.
        #self.max_nrow_used = stop+1 if stop+1>self.max_nrow_used else self.max_nrow_used

    def __getattr__(self, attnm):
        return getattr(self.this, attnm)


#EOF
	#!/usr/bin/env ipython
	# -- coding: utf-8 --
	import numpy as np
	from numpy import (
	nanmean, nanstd,
	)
	import argparse, os
	from h5py import File as h5
	from datetime import datetime, timedelta
	from shared_funcs.funcs import (
	My2DArray, date2utc, utc2date, rebine, ncep_gh, nans
	)
	import itertools
	#--- we need X to build the figure
	if 'DISPLAY' in os.environ:
	from pylab import figure, show, close
	else:
	raise SystemExit(
	" >>> ERROR: the DISPLAY environment variable doesn't exist!"
	)

	#--- retrieve args
	parser = argparse.ArgumentParser(
	formatter_class=argparse.ArgumentDefaultsHelpFormatter
	)
	parser.add_argument(
	'-y', '--years',
	type=int,
	nargs=2,
	default=[2006,2013],
	help='years for period of data-processing. So that the profile\
	will be built from YearIni/Jan/01 until YearEnd/Dec/31)',
	)
	parser.add_argument(
	'-fid', '--fname_inp',
	type=str,
	default='./test.h5',
	help='filename of .h5 input (contains geopotential-height-corrected Scalers)',
	)
	parser.add_argument(
	'-obs', '--obs',
	type=str,
	nargs='+',
	default='wAoP_wPrs_wGh',
	help='observables to process; can be more than one, and must be one\
	of the keys inside the .h5 file. They will be plotted in the\
	same figure in superposed fashion.',
	)
	parser.add_argument(
	'-nb', '--nbin',
	type=int,
	default=100,
	help='total number of bins for time domain in the long-term profile'
	)
	parser.add_argument(
	'-fig', '--fname_fig',
	type=str,
	default='test.png',
	help='filename for output figure. Or "screen" for interactive plot.'
	)

	pa = parser.parse_args()

	#---
	finp_d = h5(pa.fname_inp,'r')
	date_ini = datetime(pa.years[0], 1, 1)
	date_end = datetime(pa.years[1], 12, 31)
	assert date_end>date_ini, '`date_end` must be greater than `date_ini`!!'

	#---
	nbin = pa.nbin

	class d(object):
	"""
	data buffer for each bin of the profile
	"""
	def __init__(self):
	self.n = 0 # number of original data points contributing
	self.rates = My2DArray(1, dtype=np.float32)
	self.mean = np.nan
	self.std = np.nan

	#obs = pa.obs #'wAoP_wPrs_wGh'
	utc_ini = date2utc(datetime(1970,1,1,0,0,0))
	ini,end=datetime(pa.years[0],1,1),datetime(pa.years[1],12,31)
	Dt = (end-ini).total_seconds()
	dt = Dt/nbin

	# list of all pairs (year,month) of interest
	YYYY_MM = list(itertools.product(
	range(pa.years[0],pa.years[1]+1,1),
	range(1,12+1,1),
	))

	dd_ = {} # several dd's
	for obs in pa.obs:
	# buffer for each observable
	dd = [ d() for i in range(nbin) ]
	for yyyy, mm in YYYY_MM:
	rpath='%04d/%02d' % (yyyy,mm)
	tutc = finp_d['t_utc/'+rpath][...] # [utc sec]
	r = finp_d[obs+'/'+rpath][...] # rate
	ind = np.array((tutc-date2utc(ini))/dt, dtype=int) # (*)
	ind_list = np.unique(ind) # (**)
	print yyyy, mm, ind_list
	if ind_list.size>1:
	assert ind_list[0]<ind_list[1],"wrong."# consistency
	for i in ind_list:
	cc = ind==i # collect all with similar "time bins"
	ri = r[cc] # rates of this i-block
	n = dd[i].n
	dd[i].n += ri.size
	dd[i].rates.resize_rows(n+ri.size)
	dd[i].rates[n:n+ri.size] = ri

	if len(ind_list)==1: # [likely]
	continue
	else: # [unlikely]
	for j in ind_list[:-1]:
	n = dd[j].n
	dd[j].rates = dd[j].rates[:n]
	dd[j].mean = nanmean(dd[j].rates)
	dd[j].std = nanstd(dd[j].rates)
	dd[j].rates = None # delete data
	dd_[obs] = dd
	"""
	(*): list of integers corresponding to the nearest integer value
	of the utc-sec discretized in units of `dt` time-windows.
	For example, taking utc-sec="equivalent to 09/nov/2008 05:40",
	`ini=datetime(2008,nov,1,0,0)`, and `dt=1day` results in
	an `ind` value of 9.
	(**): to collect all with similar `ind` values
	"""

	# build the figure
	do = {}
	fig = figure(1, figsize=(8,4))
	ax = fig.add_subplot(111)
	for obs in pa.obs:
	prof = np.array([dd_[obs][n].mean for n in range(nbin)])
	t, r = np.arange(prof.size)dt/(86400.365), 100.*prof
	do[obs] = np.array([t,r]).T
	ax.plot(t, r, '-o', ms=2, alpha=.7, label=obs)
	np.savetxt('test_%s.txt'%obs,do[obs],fmt='%12.2f')

	ax.legend(loc='best')
	ax.grid(True)
	ax.set_xlabel('years since '+date_ini.strftime('%b/%Y'))
	ax.set_ylabel('count rate')
	if pa.fname_fig=='screen':
	print " >>> showing interactively..."
	show()
	else:
	fig.savefig(pa.fname_fig, dpi=135, bbox_inches='tight')
	print "\n ---> we generated: "+pa.fname_fig

	close(fig)


	#EOF