Skip to content

Instantly share code, notes, and snippets.

@louismullie
Created January 5, 2015 01:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save louismullie/1cb1cae74ef76513827a to your computer and use it in GitHub Desktop.
Save louismullie/1cb1cae74ef76513827a to your computer and use it in GitHub Desktop.
""" Fast algorithm for spectral analysis of unevenly sampled data
The Lomb-Scargle method performs spectral analysis on unevenly sampled
data and is known to be a powerful way to find, and test the
significance of, weak periodic signals. The method has previously been
thought to be 'slow', requiring of order 10(2)N(2) operations to analyze
N data points. We show that Fast Fourier Transforms (FFTs) can be used
in a novel way to make the computation of order 10(2)N log N. Despite
its use of the FFT, the algorithm is in no way equivalent to
conventional FFT periodogram analysis.
Keywords:
DATA SAMPLING, FAST FOURIER TRANSFORMATIONS,
SPECTRUM ANALYSIS, SIGNAL PROCESSING
Example:
> import numpy
> import lomb
> x = numpy.arange(10)
> y = numpy.sin(x)
> fx,fy, nout, jmax, prob = lomb.fasper(x,y, 6., 6.)
Reference:
Press, W. H. & Rybicki, G. B. 1989
ApJ vol. 338, p. 277-280.
Fast algorithm for spectral analysis of unevenly sampled data
bib code: 1989ApJ...338..277P
"""
from numpy import *
from numpy.fft import *
def __spread__(y, yy, n, x, m):
"""
Given an array yy(0:n-1), extirpolate (spread) a value y into
m actual array elements that best approximate the "fictional"
(i.e., possible noninteger) array element number x. The weights
used are coefficients of the Lagrange interpolating polynomial
Arguments:
y :
yy :
n :
x :
m :
Returns:
"""
nfac=[0,1,1,2,6,24,120,720,5040,40320,362880]
if m > 10. :
print 'factorial table too small in spread'
return
ix=long(x)
if x == float(ix):
yy[ix]=yy[ix]+y
else:
ilo = long(x-0.5*float(m)+1.0)
ilo = min( max( ilo , 1 ), n-m+1 )
ihi = ilo+m-1
nden = nfac[m]
fac=x-ilo
for j in range(ilo+1,ihi+1): fac = fac*(x-j)
yy[ihi] = yy[ihi] + y*fac/(nden*(x-ihi))
for j in range(ihi-1,ilo-1,-1):
nden=(nden/(j+1-ilo))*(j-ihi)
yy[j] = yy[j] + y*fac/(nden*(x-j))
def fasper(x,y,ofac,hifac, MACC=4):
""" function fasper
Given abscissas x (which need not be equally spaced) and ordinates
y, and given a desired oversampling factor ofac (a typical value
being 4 or larger). this routine creates an array wk1 with a
sequence of nout increasing frequencies (not angular frequencies)
up to hifac times the "average" Nyquist frequency, and creates
an array wk2 with the values of the Lomb normalized periodogram at
those frequencies. The arrays x and y are not altered. This
routine also returns jmax such that wk2(jmax) is the maximum
element in wk2, and prob, an estimate of the significance of that
maximum against the hypothesis of random noise. A small value of prob
indicates that a significant periodic signal is present.
Reference:
Press, W. H. & Rybicki, G. B. 1989
ApJ vol. 338, p. 277-280.
Fast algorithm for spectral analysis of unevenly sampled data
(1989ApJ...338..277P)
Arguments:
X : Abscissas array, (e.g. an array of times).
Y : Ordinates array, (e.g. corresponding counts).
Ofac : Oversampling factor.
Hifac : Hifac * "average" Nyquist frequency = highest frequency
for which values of the Lomb normalized periodogram will
be calculated.
Returns:
Wk1 : An array of Lomb periodogram frequencies.
Wk2 : An array of corresponding values of the Lomb periodogram.
Nout : Wk1 & Wk2 dimensions (number of calculated frequencies)
Jmax : The array index corresponding to the MAX( Wk2 ).
Prob : False Alarm Probability of the largest Periodogram value
MACC : Number of interpolation points per 1/4 cycle
of highest frequency
History:
02/23/2009, v1.0, MF
Translation of IDL code (orig. Numerical recipies)
"""
#Check dimensions of input arrays
n = long(len(x))
if n != len(y):
print 'Incompatible arrays.'
return
nout = 0.5*ofac*hifac*n
nfreqt = long(ofac*hifac*n*MACC) #Size the FFT as next power
nfreq = 64L # of 2 above nfreqt.
while nfreq < nfreqt:
nfreq = 2*nfreq
ndim = long(2*nfreq)
#Compute the mean, variance
ave = y.mean()
##sample variance because the divisor is N-1
var = ((y-y.mean())**2).sum()/(len(y)-1)
# and range of the data.
xmin = x.min()
xmax = x.max()
xdif = xmax-xmin
#extirpolate the data into the workspaces
wk1 = zeros(ndim, dtype='complex')
wk2 = zeros(ndim, dtype='complex')
fac = ndim/(xdif*ofac)
fndim = ndim
ck = ((x-xmin)*fac) % fndim
ckk = (2.0*ck) % fndim
for j in range(0L, n):
__spread__(y[j]-ave,wk1,ndim,ck[j],MACC)
__spread__(1.0,wk2,ndim,ckk[j],MACC)
#Take the Fast Fourier Transforms
wk1 = ifft( wk1 )*len(wk1)
wk2 = ifft( wk2 )*len(wk1)
wk1 = wk1[1:nout+1]
wk2 = wk2[1:nout+1]
rwk1 = wk1.real
iwk1 = wk1.imag
rwk2 = wk2.real
iwk2 = wk2.imag
df = 1.0/(xdif*ofac)
#Compute the Lomb value for each frequency
hypo2 = 2.0 * abs( wk2 )
hc2wt = rwk2/hypo2
hs2wt = iwk2/hypo2
cwt = sqrt(0.5+hc2wt)
swt = sign(hs2wt)*(sqrt(0.5-hc2wt))
den = 0.5*n+hc2wt*rwk2+hs2wt*iwk2
cterm = (cwt*rwk1+swt*iwk1)**2./den
sterm = (cwt*iwk1-swt*rwk1)**2./(n-den)
wk1 = df*(arange(nout, dtype='float')+1.)
wk2 = (cterm+sterm)/(2.0*var)
pmax = wk2.max()
jmax = wk2.argmax()
#Significance estimation
#expy = exp(-wk2)
#effm = 2.0*(nout)/ofac
#sig = effm*expy
#ind = (sig > 0.01).nonzero()
#sig[ind] = 1.0-(1.0-expy[ind])**effm
#Estimate significance of largest peak value
expy = exp(-pmax)
effm = 2.0*(nout)/ofac
prob = effm*expy
if prob > 0.01:
prob = 1.0-(1.0-expy)**effm
return wk1,wk2,nout,jmax,prob
def getSignificance(wk1, wk2, nout, ofac):
""" returns the peak false alarm probabilities
Hence the lower is the probability and the more significant is the peak
"""
expy = exp(-wk2)
effm = 2.0*(nout)/ofac
sig = effm*expy
ind = (sig > 0.01).nonzero()
sig[ind] = 1.0-(1.0-expy[ind])**effm
return sig
""" Fast algorithm for spectral analysis of unevenly sampled data
The Lomb-Scargle method performs spectral analysis on unevenly sampled
data and is known to be a powerful way to find, and test the
significance of, weak periodic signals. The method has previously been
thought to be 'slow', requiring of order 10(2)N(2) operations to analyze
N data points. We show that Fast Fourier Transforms (FFTs) can be used
in a novel way to make the computation of order 10(2)N log N. Despite
its use of the FFT, the algorithm is in no way equivalent to
conventional FFT periodogram analysis.
Keywords:
DATA SAMPLING, FAST FOURIER TRANSFORMATIONS,
SPECTRUM ANALYSIS, SIGNAL PROCESSING
Example:
> import numpy
> import lomb
> x = numpy.arange(10)
> y = numpy.sin(x)
> fx,fy, nout, jmax, prob = lomb.fasper(x,y, 6., 6.)
Reference:
Press, W. H. & Rybicki, G. B. 1989
ApJ vol. 338, p. 277-280.
Fast algorithm for spectral analysis of unevenly sampled data
bib code: 1989ApJ...338..277P
"""
from numpy import *
from numpy.fft import *
def __spread__(y, yy, n, x, m):
"""
Given an array yy(0:n-1), extirpolate (spread) a value y into
m actual array elements that best approximate the "fictional"
(i.e., possible noninteger) array element number x. The weights
used are coefficients of the Lagrange interpolating polynomial
Arguments:
y :
yy :
n :
x :
m :
Returns:
"""
nfac=[0,1,1,2,6,24,120,720,5040,40320,362880]
if m > 10. :
print 'factorial table too small in spread'
return
ix=long(x)
if x == float(ix):
yy[ix]=yy[ix]+y
else:
ilo = long(x-0.5*float(m)+1.0)
ilo = min( max( ilo , 1 ), n-m+1 )
ihi = ilo+m-1
nden = nfac[m]
fac=x-ilo
for j in range(ilo+1,ihi+1): fac = fac*(x-j)
yy[ihi] = yy[ihi] + y*fac/(nden*(x-ihi))
for j in range(ihi-1,ilo-1,-1):
nden=(nden/(j+1-ilo))*(j-ihi)
yy[j] = yy[j] + y*fac/(nden*(x-j))
def fasper(x,y,ofac,hifac, MACC=4):
""" function fasper
Given abscissas x (which need not be equally spaced) and ordinates
y, and given a desired oversampling factor ofac (a typical value
being 4 or larger). this routine creates an array wk1 with a
sequence of nout increasing frequencies (not angular frequencies)
up to hifac times the "average" Nyquist frequency, and creates
an array wk2 with the values of the Lomb normalized periodogram at
those frequencies. The arrays x and y are not altered. This
routine also returns jmax such that wk2(jmax) is the maximum
element in wk2, and prob, an estimate of the significance of that
maximum against the hypothesis of random noise. A small value of prob
indicates that a significant periodic signal is present.
Reference:
Press, W. H. & Rybicki, G. B. 1989
ApJ vol. 338, p. 277-280.
Fast algorithm for spectral analysis of unevenly sampled data
(1989ApJ...338..277P)
Arguments:
X : Abscissas array, (e.g. an array of times).
Y : Ordinates array, (e.g. corresponding counts).
Ofac : Oversampling factor.
Hifac : Hifac * "average" Nyquist frequency = highest frequency
for which values of the Lomb normalized periodogram will
be calculated.
Returns:
Wk1 : An array of Lomb periodogram frequencies.
Wk2 : An array of corresponding values of the Lomb periodogram.
Nout : Wk1 & Wk2 dimensions (number of calculated frequencies)
Jmax : The array index corresponding to the MAX( Wk2 ).
Prob : False Alarm Probability of the largest Periodogram value
MACC : Number of interpolation points per 1/4 cycle
of highest frequency
History:
02/23/2009, v1.0, MF
Translation of IDL code (orig. Numerical recipies)
"""
#Check dimensions of input arrays
n = long(len(x))
if n != len(y):
print 'Incompatible arrays.'
return
nout = 0.5*ofac*hifac*n
nfreqt = long(ofac*hifac*n*MACC) #Size the FFT as next power
nfreq = 64L # of 2 above nfreqt.
while nfreq < nfreqt:
nfreq = 2*nfreq
ndim = long(2*nfreq)
#Compute the mean, variance
ave = y.mean()
##sample variance because the divisor is N-1
var = ((y-y.mean())**2).sum()/(len(y)-1)
# and range of the data.
xmin = x.min()
xmax = x.max()
xdif = xmax-xmin
#extirpolate the data into the workspaces
wk1 = zeros(ndim, dtype='complex')
wk2 = zeros(ndim, dtype='complex')
fac = ndim/(xdif*ofac)
fndim = ndim
ck = ((x-xmin)*fac) % fndim
ckk = (2.0*ck) % fndim
for j in range(0L, n):
__spread__(y[j]-ave,wk1,ndim,ck[j],MACC)
__spread__(1.0,wk2,ndim,ckk[j],MACC)
#Take the Fast Fourier Transforms
wk1 = ifft( wk1 )*len(wk1)
wk2 = ifft( wk2 )*len(wk1)
wk1 = wk1[1:nout+1]
wk2 = wk2[1:nout+1]
rwk1 = wk1.real
iwk1 = wk1.imag
rwk2 = wk2.real
iwk2 = wk2.imag
df = 1.0/(xdif*ofac)
#Compute the Lomb value for each frequency
hypo2 = 2.0 * abs( wk2 )
hc2wt = rwk2/hypo2
hs2wt = iwk2/hypo2
cwt = sqrt(0.5+hc2wt)
swt = sign(hs2wt)*(sqrt(0.5-hc2wt))
den = 0.5*n+hc2wt*rwk2+hs2wt*iwk2
cterm = (cwt*rwk1+swt*iwk1)**2./den
sterm = (cwt*iwk1-swt*rwk1)**2./(n-den)
wk1 = df*(arange(nout, dtype='float')+1.)
wk2 = (cterm+sterm)/(2.0*var)
pmax = wk2.max()
jmax = wk2.argmax()
#Significance estimation
#expy = exp(-wk2)
#effm = 2.0*(nout)/ofac
#sig = effm*expy
#ind = (sig > 0.01).nonzero()
#sig[ind] = 1.0-(1.0-expy[ind])**effm
#Estimate significance of largest peak value
expy = exp(-pmax)
effm = 2.0*(nout)/ofac
prob = effm*expy
if prob > 0.01:
prob = 1.0-(1.0-expy)**effm
return wk1,wk2,nout,jmax,prob
def getSignificance(wk1, wk2, nout, ofac):
""" returns the peak false alarm probabilities
Hence the lower is the probability and the more significant is the peak
"""
expy = exp(-wk2)
effm = 2.0*(nout)/ofac
sig = effm*expy
ind = (sig > 0.01).nonzero()
sig[ind] = 1.0-(1.0-expy[ind])**effm
return sig
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment