Last active
May 22, 2017 09:48
-
-
Save mattpitkin/dfe0f8ef38effb101fafb79e1679f516 to your computer and use it in GitHub Desktop.
Using GSL integration functions in Cython when integrating a KDE function obtained from samples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Example of integration of a function f(x) (for which a set of samples and integral estimate exist | |
e.g. through use of the Nested Sampling algorithm) multiplied by some other function g(x). NOTE: one | |
could use the expectatation value for this, but in some cases where there is sparse sampling over | |
the full allowed range of the function can cause problems. | |
The samples are passed through a KDE and then the kde object is passed to the GSL CQUAD | |
integration function | |
This requires the Cython, numpy, scikit-learn (v0.18 or greater) and the GSL library. | |
""" | |
import cython | |
cimport cython | |
import numpy as np | |
cimport numpy as np | |
# get Kernel Density estimator and use cross validation to get bandwidth (see http://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/#Bandwidth-Cross-Validation-in-Scikit-Learn) | |
from sklearn.neighbors import KernelDensity | |
from sklearn.model_selection import GridSearchCV | |
from libc.math cimport exp, sqrt | |
cdef extern from "gsl/gsl_math.h": | |
ctypedef struct gsl_function: | |
double (* function) (double x, void * params) | |
void * params | |
cdef extern from "gsl/gsl_integration.h": | |
ctypedef struct gsl_integration_cquad_workspace | |
gsl_integration_cquad_workspace * gsl_integration_cquad_workspace_alloc (size_t n) | |
void gsl_integration_cquad_workspace_free (gsl_integration_cquad_workspace * w) | |
void gsl_integration_cquad_workspace_free (gsl_integration_cquad_workspace * w) | |
int gsl_integration_cquad (gsl_function * f, double a, double b, double epsabs, double epsrel, gsl_integration_cquad_workspace * workspace, double * result, double * abserr, size_t * nevals) | |
# structure for passing as the integration function parameter | |
ctypedef struct int_kde_params: | |
double Z # the integral of the function f(x) | |
double mu # the mean of a Gaussian function (g(x) in this example) | |
double sig # the standard devaition of a Gaussian function (g(x) in this example) | |
void *kdefunc # the KDE function object to be integrated | |
ctypedef int_kde_params * params_ptr | |
# example function to generate samples and convert to KDE (using cross validation to get KDE bandwidth) and then | |
# integrating it | |
cpdef example(): | |
# generate some Gaussian samples (zero mean and unit variance) truncated at zero! | |
samples = np.abs(np.random.randn(100)) | |
# get set of samples containing reflections to make smooth KDE over zero (remember this is just my example, but not a generality) | |
refsamps = np.concatenate((samples, -samples)) | |
# get KDE fitting for bandwidth size (range is based on the original, not reflected samples in case it has strong bimodality from a large signal) | |
sigr = np.std(samples) | |
# see e.g. http://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/#Bandwidth-Cross-Validation-in-Scikit-Learn for cross validation example | |
grid = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(0.01*sigr, 10.0*sigr, 50)}, cv=20) # 20-fold cross-validation | |
grid.fit(refsamps[:,None]) | |
bandwidth = grid.best_params_['bandwidth'] | |
kde = grid.best_estimator_ # get the KDE (scikit-learn KernelDensity object) | |
# integrate! | |
cdef double epsabs = 0. # absolute integration error | |
cdef double epsrel = 1e-7 # relative integration error | |
cdef double mu = 1. # g(x) mean in this example | |
cdef double sig = 0.1 # g(x) standard deviation in this example | |
cdef double Z = 10. # known integral under f(x) | |
cdef double intmin = 0. # lower bound of integration | |
cdef double intmax = 5. # upper bound of integration | |
A = integrate(kde, mu, intmin, intmax, mu, sig, Z) | |
# the integration function | |
cdef intgrate(object kde, double mu, double intmin, double intmax, double mu, double sig, double Z): | |
cdef gsl_integration_cquad_workspace *w = gsl_integration_cquad_workspace_alloc (100) | |
cdef double result, error | |
cdef size_t nevals | |
cdef int_kde_params params | |
params.mu = mu | |
params.sig = sig | |
params.Z = Z | |
params.kdefunc = <void*>kde # pass KDE object to structure as a void pointer | |
# set GSL function for passing to GSL CQUAD integration routine | |
cdef gsl_function F | |
F.function = &integrand_kde | |
F.params = ¶ms | |
# run integration | |
gsl_integration_cquad(&F, intmin, intmax, epsabs, epsrel, w, &result, &error, &nevals) | |
gsl_integration_cquad_workspace_free (w) | |
return result | |
# define the integrand function | |
cdef double integrand_kde(double x, void * p): | |
cdef int_kde_params * params = <params_ptr>p | |
# extract things from structure including KDE object | |
cdef double mu = params.mu | |
cdef double sig = params.sig | |
cdef double Z = params.Z | |
kde = <object>params.kdefunc | |
cdef double logL = 2.*exp(kde.score(x)) # evaluate KDE at x (scikit-learn score returns to log value, so use exp - the additional factor of two is due to reflection of samples to create KDE | |
logL *= (1./(sqrt(2.*np.pi)*sig))*exp(-0.5*((x-mu)/sig)**2) | |
return logL * Z # multiply by Z |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment