Skip to content

Instantly share code, notes, and snippets.

@mattpitkin
Last active May 22, 2017 09:48
Show Gist options
  • Save mattpitkin/dfe0f8ef38effb101fafb79e1679f516 to your computer and use it in GitHub Desktop.
Save mattpitkin/dfe0f8ef38effb101fafb79e1679f516 to your computer and use it in GitHub Desktop.
Using GSL integration functions in Cython when integrating a KDE function obtained from samples
"""
Example of integration of a function f(x) (for which a set of samples and integral estimate exist
e.g. through use of the Nested Sampling algorithm) multiplied by some other function g(x). NOTE: one
could use the expectatation value for this, but in some cases where there is sparse sampling over
the full allowed range of the function can cause problems.
The samples are passed through a KDE and then the kde object is passed to the GSL CQUAD
integration function
This requires the Cython, numpy, scikit-learn (v0.18 or greater) and the GSL library.
"""
import cython
cimport cython
import numpy as np
cimport numpy as np
# get Kernel Density estimator and use cross validation to get bandwidth (see http://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/#Bandwidth-Cross-Validation-in-Scikit-Learn)
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
from libc.math cimport exp, sqrt
cdef extern from "gsl/gsl_math.h":
ctypedef struct gsl_function:
double (* function) (double x, void * params)
void * params
cdef extern from "gsl/gsl_integration.h":
ctypedef struct gsl_integration_cquad_workspace
gsl_integration_cquad_workspace * gsl_integration_cquad_workspace_alloc (size_t n)
void gsl_integration_cquad_workspace_free (gsl_integration_cquad_workspace * w)
void gsl_integration_cquad_workspace_free (gsl_integration_cquad_workspace * w)
int gsl_integration_cquad (gsl_function * f, double a, double b, double epsabs, double epsrel, gsl_integration_cquad_workspace * workspace, double * result, double * abserr, size_t * nevals)
# structure for passing as the integration function parameter
ctypedef struct int_kde_params:
double Z # the integral of the function f(x)
double mu # the mean of a Gaussian function (g(x) in this example)
double sig # the standard devaition of a Gaussian function (g(x) in this example)
void *kdefunc # the KDE function object to be integrated
ctypedef int_kde_params * params_ptr
# example function to generate samples and convert to KDE (using cross validation to get KDE bandwidth) and then
# integrating it
cpdef example():
# generate some Gaussian samples (zero mean and unit variance) truncated at zero!
samples = np.abs(np.random.randn(100))
# get set of samples containing reflections to make smooth KDE over zero (remember this is just my example, but not a generality)
refsamps = np.concatenate((samples, -samples))
# get KDE fitting for bandwidth size (range is based on the original, not reflected samples in case it has strong bimodality from a large signal)
sigr = np.std(samples)
# see e.g. http://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/#Bandwidth-Cross-Validation-in-Scikit-Learn for cross validation example
grid = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(0.01*sigr, 10.0*sigr, 50)}, cv=20) # 20-fold cross-validation
grid.fit(refsamps[:,None])
bandwidth = grid.best_params_['bandwidth']
kde = grid.best_estimator_ # get the KDE (scikit-learn KernelDensity object)
# integrate!
cdef double epsabs = 0. # absolute integration error
cdef double epsrel = 1e-7 # relative integration error
cdef double mu = 1. # g(x) mean in this example
cdef double sig = 0.1 # g(x) standard deviation in this example
cdef double Z = 10. # known integral under f(x)
cdef double intmin = 0. # lower bound of integration
cdef double intmax = 5. # upper bound of integration
A = integrate(kde, mu, intmin, intmax, mu, sig, Z)
# the integration function
cdef intgrate(object kde, double mu, double intmin, double intmax, double mu, double sig, double Z):
cdef gsl_integration_cquad_workspace *w = gsl_integration_cquad_workspace_alloc (100)
cdef double result, error
cdef size_t nevals
cdef int_kde_params params
params.mu = mu
params.sig = sig
params.Z = Z
params.kdefunc = <void*>kde # pass KDE object to structure as a void pointer
# set GSL function for passing to GSL CQUAD integration routine
cdef gsl_function F
F.function = &integrand_kde
F.params = &params
# run integration
gsl_integration_cquad(&F, intmin, intmax, epsabs, epsrel, w, &result, &error, &nevals)
gsl_integration_cquad_workspace_free (w)
return result
# define the integrand function
cdef double integrand_kde(double x, void * p):
cdef int_kde_params * params = <params_ptr>p
# extract things from structure including KDE object
cdef double mu = params.mu
cdef double sig = params.sig
cdef double Z = params.Z
kde = <object>params.kdefunc
cdef double logL = 2.*exp(kde.score(x)) # evaluate KDE at x (scikit-learn score returns to log value, so use exp - the additional factor of two is due to reflection of samples to create KDE
logL *= (1./(sqrt(2.*np.pi)*sig))*exp(-0.5*((x-mu)/sig)**2)
return logL * Z # multiply by Z
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment