Skip to content

Instantly share code, notes, and snippets.

@jimsrc
Last active November 23, 2016 18:36
Show Gist options
  • Save jimsrc/d9c3ba19f9808d002fc9c37fee65a15e to your computer and use it in GitHub Desktop.
Save jimsrc/d9c3ba19f9808d002fc9c37fee65a15e to your computer and use it in GitHub Desktop.
Auger Scalers

Run for example:

./build_long.prof.py -- --years 2006 2015  --fname_inp ./test_iv.h5  --nbin 1200  --obs wAoP_wPrs  -fig ./test.png

where:

  • wAoP_wPrs is one of the available fields. Others are: wAoP (i.e. only AoP correction), wAoP_wPrs_wGh (with AoP, pressure, and geopotential height) ,aop (AoP values), etc.

  • -fig refers to filename of output figure.


For more info on the arguments,

./build_long.prof.py -- -h

Important:

Note that, in order to run consistenly, you'll need to place these 2 scripts with this structure:

|
|- build_long.prof.py
|- shared_funcs/
   |- funcs.py
   |- __init__.py  #<--- this is an empty file
#!/usr/bin/env ipython
# -*- coding: utf-8 -*-
import numpy as np
from numpy import (
nanmean, nanstd,
)
import argparse, os
from h5py import File as h5
from datetime import datetime, timedelta
from shared_funcs.funcs import (
My2DArray, date2utc, utc2date, rebine, ncep_gh, nans
)
import itertools
#--- we need X to build the figure
if 'DISPLAY' in os.environ:
from pylab import figure, show, close
else:
raise SystemExit(
" >>> ERROR: the DISPLAY environment variable doesn't exist!"
)
#--- retrieve args
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'-y', '--years',
type=int,
nargs=2,
default=[2006,2013],
help='years for period of data-processing. So that the profile\
will be built from YearIni/Jan/01 until YearEnd/Dec/31)',
)
parser.add_argument(
'-fid', '--fname_inp',
type=str,
default='./test.h5',
help='filename of .h5 input (contains geopotential-height-corrected Scalers)',
)
parser.add_argument(
'-obs', '--obs',
type=str,
nargs='+',
default='wAoP_wPrs_wGh',
help='observables to process; can be more than one, and must be one\
of the keys inside the .h5 file. They will be plotted in the\
same figure in superposed fashion.',
)
parser.add_argument(
'-nb', '--nbin',
type=int,
default=100,
help='total number of bins for time domain in the long-term profile'
)
parser.add_argument(
'-fig', '--fname_fig',
type=str,
default='test.png',
help='filename for output figure. Or "screen" for interactive plot.'
)
pa = parser.parse_args()
#---
finp_d = h5(pa.fname_inp,'r')
date_ini = datetime(pa.years[0], 1, 1)
date_end = datetime(pa.years[1], 12, 31)
assert date_end>date_ini, '`date_end` must be greater than `date_ini`!!'
#---
nbin = pa.nbin
class d(object):
"""
data buffer for each bin of the profile
"""
def __init__(self):
self.n = 0 # number of original data points contributing
self.rates = My2DArray(1, dtype=np.float32)
self.mean = np.nan
self.std = np.nan
#obs = pa.obs #'wAoP_wPrs_wGh'
utc_ini = date2utc(datetime(1970,1,1,0,0,0))
ini,end=datetime(pa.years[0],1,1),datetime(pa.years[1],12,31)
Dt = (end-ini).total_seconds()
dt = Dt/nbin
# list of all pairs (year,month) of interest
YYYY_MM = list(itertools.product(
range(pa.years[0],pa.years[1]+1,1),
range(1,12+1,1),
))
dd_ = {} # several dd's
for obs in pa.obs:
# buffer for each observable
dd = [ d() for i in range(nbin) ]
for yyyy, mm in YYYY_MM:
rpath='%04d/%02d' % (yyyy,mm)
tutc = finp_d['t_utc/'+rpath][...] # [utc sec]
r = finp_d[obs+'/'+rpath][...] # rate
ind = np.array((tutc-date2utc(ini))/dt, dtype=int) # (*)
ind_list = np.unique(ind) # (**)
print yyyy, mm, ind_list
if ind_list.size>1:
assert ind_list[0]<ind_list[1],"wrong."# consistency
for i in ind_list:
cc = ind==i # collect all with similar "time bins"
ri = r[cc] # rates of this i-block
n = dd[i].n
dd[i].n += ri.size
dd[i].rates.resize_rows(n+ri.size)
dd[i].rates[n:n+ri.size] = ri
if len(ind_list)==1: # [likely]
continue
else: # [unlikely]
for j in ind_list[:-1]:
n = dd[j].n
dd[j].rates = dd[j].rates[:n]
dd[j].mean = nanmean(dd[j].rates)
dd[j].std = nanstd(dd[j].rates)
dd[j].rates = None # delete data
dd_[obs] = dd
"""
(*): list of integers corresponding to the nearest integer value
of the utc-sec discretized in units of `dt` time-windows.
For example, taking utc-sec="equivalent to 09/nov/2008 05:40",
`ini=datetime(2008,nov,1,0,0)`, and `dt=1day` results in
an `ind` value of 9.
(**): to collect all with similar `ind` values
"""
# build the figure
do = {}
fig = figure(1, figsize=(8,4))
ax = fig.add_subplot(111)
for obs in pa.obs:
prof = np.array([dd_[obs][n].mean for n in range(nbin)])
t, r = np.arange(prof.size)*dt/(86400.*365), 100.*prof
do[obs] = np.array([t,r]).T
ax.plot(t, r, '-o', ms=2, alpha=.7, label=obs)
np.savetxt('test_%s.txt'%obs,do[obs],fmt='%12.2f')
ax.legend(loc='best')
ax.grid(True)
ax.set_xlabel('years since '+date_ini.strftime('%b/%Y'))
ax.set_ylabel('count rate')
if pa.fname_fig=='screen':
print " >>> showing interactively..."
show()
else:
fig.savefig(pa.fname_fig, dpi=135, bbox_inches='tight')
print "\n ---> we generated: "+pa.fname_fig
close(fig)
#EOF
#!/usr/bin/env ipython
# -*- coding: utf-8 -*-
import numpy as np
import os, sys
from scipy.io.netcdf import netcdf_file
from h5py import File as h5
from numpy import (
loadtxt, size, mean, isnan, array,
ones, zeros, empty, nanmean, nanmedian, nanstd,
concatenate,
)
from glob import glob
from os.path import isfile, isdir
from datetime import datetime, timedelta
from scipy.interpolate import (
splrep, # Spline interpolation
splev) # Spline evaluator
import argparse
if 'DISPLAY' in os.environ:
from pylab import pause, find
#--- globals
HOME = os.environ['HOME']
class arg_to_datetime(argparse.Action):
"""
argparse-action to handle command-line arguments of
the form "dd/mm/yyyy" (string type), and converts
it to datetime object.
NOTE: can be used even when `nargs`>1.
"""
def __init__(self, option_strings, dest, **kwargs):
#if nargs is not None:
# raise ValueError("nargs not allowed")
super(arg_to_datetime, self).__init__(option_strings, dest, **kwargs)
def __call__(self, parser, namespace, values, option_string=None):
#print '%r %r %r' % (namespace, values, option_string)
if len(values)==1:
dd,mm,yyyy = map(int, values.split('/'))
value = datetime(yyyy,mm,dd)
elif len(values)>1:
value = []
for val in values:
dd,mm,yyyy = map(int, val.split('/'))
value += [ datetime(yyyy,mm,dd) ]
setattr(namespace, self.dest, value)
class arg_to_utcsec(argparse.Action):
"""
argparse-action to handle command-line arguments of
the form "dd/mm/yyyy" (string type), and converts
it to UTC-seconds.
"""
def __init__(self, option_strings, dest, nargs=None, **kwargs):
if nargs is not None:
raise ValueError("nargs not allowed")
super(arg_to_utcsec, self).__init__(option_strings, dest, **kwargs)
def __call__(self, parser, namespace, values, option_string=None):
#print '%r %r %r' % (namespace, values, option_string)
dd,mm,yyyy = map(int, values.split('/'))
value = (datetime(yyyy,mm,dd)-datetime(1970,1,1)).total_seconds()
setattr(namespace, self.dest, value)
def nans(shape, dtype=np.float32):
return np.nan*np.ones(shape, dtype=dtype)
def ncep_gh(yyyy, mm, level=None, dir_src=None, sync=True, tsync=None):
"""
input:
tsync: in utc-sec
"""
if dir_src is None:
dir_src='../out/out.build_weather/ncep' #HOME+'/data_gdas'
if level is None:
lname = 'level_%04d'%100 # level in [hPa]
fname_inp = dir_src+'/test_%04d.h5'%yyyy
assert isfile(fname_inp), ' --> NO EXISTE: '+fname_inp
f = h5(fname_inp,'r')
dpath = lname + '/%04d-%02d'%(yyyy,mm)
#print " ---> ", f[dpath].keys()
g_t = f[dpath+'/utcsec'].value # utc sec
g_h = f[dpath+'/h'].value # geop height
if sync:
tck = splrep(g_t, g_h, s=0)
gh = splev(tsync, tck, der=0) # der: order of derivative to comp
return gh
else:
return g_h
def equi_days(dini, dend, n):
"""
returns most equi-partitioned tuple of number
of days between date-objects 'dini' and 'dend'
"""
days = (dend - dini).days
days_part = np.zeros(n, dtype=np.int)
resid = np.mod(days, n)
for i in range(n-resid):
days_part[i] = days/n
# last positions where I put residuals
last = np.arange(start=-1,stop=-resid-1,step=-1)
for i in last:
days_part[i] = days/n+1
assert np.sum(days_part)==days, \
" --> somethng went wrong! :/ "
return days_part
def equi_dates(dini, dend, n):
"""
returns:
- inis: numpy-array-of-dates, of size (n+1); where,
- inis[:-1]: 'n' start-dates of most equi-partitioned
contiguous blocks of dates between 'dini'
and 'end' (object dates).
- inis[-1]: 'dend' (max date of whole block)
"""
days_part = equi_days(dini, dend, n)
inis = np.empty(n+1, dtype=datetime)
inis[0] = dini
inis[-1] = dend
for i in range(1, inis.size-1):
inis[i] = inis[i-1] + timedelta(days=days_part[i-1])
return inis
def equi_dates_RoundMonth(dini,dend,n):
""" same as equi_dates() but rounded at 01/month """
assert dini.day==1, " ERROR: dini.day must be 1."
inis = equi_dates(dini,dend,n)
inis_ = np.empty(n+1, dtype=datetime)
inis_[-1] = dend
for i, i_ in zip(inis, range(inis_.size-1)):
inis_[i_] = datetime(i.year,i.month,1)
return inis_
def rebine_mats(a, b): # original version
"""
Rebining of 'b' to resolution of 'a'.
where:
b[0] is denser than a[0] (IMPORTANT!)
a[0], a[1]: x and y components of a
b[0], b[1]: x and y components of b
NOTE: For this to make sense (*), the following should
be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La,
where La=(ax[-1]-ax[0]); in other words,
that "the borders of 'bx' must *not* be very
different from those of 'ax'."
"""
ax, ay = a[0,:], a[1,:]
bx, by = b[0,:], b[1,:]
by_binned = np.zeros(ax.size, dtype=np.float64)
for j in range(ax.size):
if(j==0):
ax_lo = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros
cc = bx <= ax_lo # (*)
elif(j==ax.size-1):
ax_hi = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos
cc = bx >= ax_hi # (*)
else:
ax_min = 0.5*(ax[j-1] + ax[j])
ax_max = 0.5*(ax[j] + ax[j+1])
cc = (bx>ax_min) & (bx<ax_max)
by_binned[j] = np.nanmean(by[cc])
return by_binned
def rebine(ax, bx, by_data, nbef=1, naft=1):
"""
Rebining of `b` to resolution of `ax`.
**IMPORTANT**: b[0] is denser than `ax`!
inputs:
ax : time domain to which `by_data` will be adapted/rebined/synced.
bx : time domain of `by_data`.
by_data : is assumed of shape (:,:) or (:).
NOTE: For this to make sense (*), the following should
be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La,
where La=(ax[-1]-ax[0]); in other words,
that "the borders of `bx` must *not* be very
different from those of `ax`."
"""
nbsh = len(by_data.shape) # should be 1 or 2.
if nbsh==2:
nb_col = by_data.shape[1]
by_binned = np.zeros((ax.size,nb_col), dtype=np.float32)
by = by_data
elif nbsh==1:
by_binned = np.zeros((ax.size,1), dtype=np.float32)
by = np.reshape(by_data, (by_data.size,1))
else:
print " ERROR: len(b.shape) should be 1 or 2."
raise SystemExit
#--- first row
ax_lo = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros
cc = bx <= ax_lo # (*)
by_binned[0,:] = np.nanmean(by[cc,:])
#--- last row
ax_hi = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos
cc = bx >= ax_hi # (*)
by_binned[ax.size-1,:] = np.nanmean(by[cc,:])
#--- rest of rows
for j in range(1,ax.size-1):
ax_min = 0.5*(ax[j-1] + ax[j])
ax_max = 0.5*(ax[j] + ax[j+1])
cc = (bx>ax_min) & (bx<ax_max)
by_binned[j,:] = np.nanmean(by[cc,:])
# output must have the same no of columns as input 'by'
if nbsh==1:
return by_binned[:,0]
else:
return by_binned
def utc2date(t):
date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
date = date_utc + timedelta(days=(t/86400.))
return date
def date2utc(date):
date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
utcsec = (date - date_utc).total_seconds() # [utc sec]
return utcsec
def gps2date(t):
date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
t_utc = t + 315964800 # [utc sec]
date = date_utc + timedelta(days=(t_utc/86400.))
return date
def date2gps(date):
date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
utcsec = (date - date_utc).total_seconds() # [utc sec]
gpssec = utcsec - 315964800 # [gps sec]
return gpssec
class My2DArray(object):
"""
wrapper around numpy array with:
- flexible number of rows
- records the maximum nrow requested
- `nrow`: max number of rows queried.
NOTE:
This was test for 1D and 2D arrays.
TODO:
try with `numpy.array` instead of `object` above to
make the wrapper more natural.
"""
def __init__(self, shape, dtype=np.float32):
self.this = np.empty(shape, dtype=dtype)
setattr(self, '__array__', self.this.__array__)
self.nrow = 0
def resize_rows(self, nx_new=None):
""" Increment TWICE the size of axis=0, **without**
losing data.
"""
sh_new = np.copy(self.this.shape)
nx = self.this.shape[0]
if nx_new is None:
sh_new[0] = 2*sh_new[0]
elif nx_new<=nx:
return 0 # nothing to do
else:
sh_new[0] = nx_new
tmp = self.this.copy()
#print "----> tmp: ", tmp.shape
new = np.nan*np.ones(sh_new,dtype=self.this.dtype)
new[:nx] = tmp
self.this = new
"""
for some reason (probably due to numpy
implementation), if we don't do this, the:
>>> print self.__array__()
stucks truncated to the original size that was
set in __init__() time.
So we need to tell numpy our new resized shape!
"""
setattr(self, '__array__', self.this.__array__)
def __get__(self, instance, owner):
return self.this #.__array__()
def __getitem__(self, i):
"""
to retrieve non-trivial content:
>>> print m[...]
>>> print m[:]
to retrieve all the allocated array:
>>> print m.this[...]
>>> print m[:-1]
>>> print m[:-1,:]
"""
if not(i is Ellipsis or i==slice(None,None,None)):
return self.this[i]
else:
#returns only non-trivial content
return self.this[:self.nrow]
def __setitem__(self, i, value):
"""
We can safely use:
>>> ma[n:n+m,:] = [...]
assuming n+m is greater than our size in axis=0.
NOTE: `i` can be:
- scalar
- list
- tuple of (slice,scalar), (slice,slice),
(scalar,slice), etc.
- slice
"""
# we need to define `stop` to check if we are
# out of rows.
if type(i)==slice:
"""
case:
>>> m[2:5] = ...
"""
stop = i.stop
_ind = i #slice(i.start,i.stop,i.step)
self.nrow = np.max([stop,self.nrow])
elif type(i)==tuple and type(i[0])==slice:
"""
cases:
>>> m[n:n+m,:] = ... # (*)
>>> m[n:,:] = ... # (**)
>>> m[n:,4] = ...
>>> m[:,:] = ... # (***)
>>> m[:,4] = ...
"""
if i[0].start is None and \
i[0].stop is None and \
i[0].step is None: # (***)
_ind = (slice(None,None,None),i[1])
stop = 0
self.nrow = np.max([np.size(value,axis=0), self.nrow])
else:
stop_aux = i[0].start + np.size(value,axis=0) # (*)
stop = stop_aux if i[0].stop is None else i[0].stop #(**) & (*)
_ind = (slice(i[0].start,stop,i[0].step), i[1])
self.nrow = np.max([stop,self.nrow])
elif type(i)==tuple and type(i[0]) in (list,int):
"""
case:
>>> m[k,:] = ... # type=int
>>> m[[3,4],:] = ... # type=list
>>> m[[3,4],k] = ... # type=list
"""
stop = np.max(i[0]) # TEST
_ind = i
self.nrow = np.max([stop+1,self.nrow])
else:
raise IndexError(' --> don\'t know how \
handle %r' % i)
# check if we are short of rows, in which case, we
# will double size in axis=0.
if stop >= self.this.shape[0]:
nx_new = self.this.shape[0]
while nx_new<=stop:
nx_new *= 2
self.resize_rows(nx_new)
# finnally assign
self.this[_ind] = value
#--- register the maximum nrow requested.
# NOTE here we are referring to size, and *not* row-index.
#self.max_nrow_used = stop+1 if stop+1>self.max_nrow_used else self.max_nrow_used
def __getattr__(self, attnm):
return getattr(self.this, attnm)
#EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment