Last active
August 29, 2015 14:14
-
-
Save ian-ross/2446201c7d92298a0ac3 to your computer and use it in GitHub Desktop.
Haskell data analysis: kernel density estimation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Prelude hiding (enumFromThenTo, length, map, mapM_, replicate, zipWith) | |
import Data.Vector hiding ((++)) | |
import System.Random | |
import System.IO | |
-- Number of sample points. | |
n :: Int | |
n = 10 | |
-- Kernel bandwidth. | |
h :: Double | |
h = 2.0 | |
-- Ranges for data generation and output. | |
xgenmin, xgenmax, xmin, xmax :: Double | |
xgenmin = 1 ; xgenmax = 9 | |
xmin = xgenmin - h ; xmax = xgenmax + h | |
-- Output step. | |
dx :: Double | |
dx = 0.01 | |
main :: IO () | |
main = do | |
-- Generate sample points. | |
samplexs <- replicateM n $ randomRIO (xgenmin, xgenmax) | |
withFile "xs.dat" WriteMode $ \h -> forM_ samplexs (hPutStrLn h . show) | |
let outxs = enumFromThenTo xmin (xmin + dx) xmax | |
-- Calculate values for a single kernel. | |
doone n h xs x0 = map (/ (fromIntegral n)) $ map (kernel h x0) xs | |
-- Kernels for all sample points. | |
kernels = map (doone n h outxs) samplexs | |
-- Combined density estimate. | |
pdf = foldl1' (zipWith (+)) kernels | |
pr h x s = hPutStrLn h $ (show x) ++ " " ++ (show s) | |
kpr h k = do | |
zipWithM_ (pr h) outxs k | |
hPutStrLn h "" | |
withFile "kernels.dat" WriteMode $ \h -> mapM_ (kpr h) kernels | |
withFile "kde.dat" WriteMode $ \h -> zipWithM_ (pr h) outxs pdf | |
-- Epanechnikov kernel. | |
kernel :: Double -> Double -> Double -> Double | |
kernel h x0 x | |
| abs u <= 1 = 0.75 * (1 - u^2) | |
| otherwise = 0 | |
where u = (x - x0) / h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Control.Applicative ((<$>)) | |
import Control.Monad (forM_) | |
import Control.Loop | |
import Control.Monad.ST | |
import System.FilePath | |
import Data.NetCDF | |
import Data.NetCDF.HMatrix | |
import Foreign.C | |
import qualified Data.Map as M | |
import qualified Data.NetCDF.Vector as V | |
import qualified Data.Vector.Storable as SV | |
import Numeric.LinearAlgebra.HMatrix | |
import Numeric.LinearAlgebra.Devel | |
import Debug.Trace | |
type HMatrixRet a = IO (Either NcError (HMatrix a)) | |
basedir, workdir, pcadir :: FilePath | |
basedir = "/big/project-storage/haskell-data-analysis/atmos-non-diffusive" | |
workdir = basedir </> "spherical-pdf" | |
pcadir = basedir </> "pca" | |
type V = (Double, Double, Double) | |
type M = Matrix Double | |
-- Use a regular 2.5 deg. x 2.5 deg. theta/phi grid on unit sphere. | |
ntheta, nphi :: Int | |
nphi = 2 * 360 `div` 5 ; ntheta = 2 * 180 `div` 5 | |
-- Integration steps. | |
dtheta, dphi :: Double | |
dphi = 2.0 * pi / fromIntegral nphi ; dtheta = pi / fromIntegral ntheta | |
-- Degree-to-radian conversion factor. | |
degToRad :: Double | |
degToRad = pi / 180.0 | |
-- Density kernel bandwidth. | |
bandwidth :: Double | |
bandwidth = pi / 6.0 | |
main :: IO () | |
main = do | |
-- Open projected points data file for input. | |
Right innc <- openFile $ workdir </> "sphere-proj.nc" | |
let Just ntime = ncDimLength <$> ncDim innc "time" | |
let (Just projvar) = ncVar innc "proj" | |
Right (HMatrix projsin) <- | |
getA innc projvar [0, 0] [ntime, 2] :: HMatrixRet CDouble | |
-- Convert projections to 3-D points. | |
let dataPts = map projToPt $ toRows $ cmap realToFrac projsin | |
-- Calculate kernel values for each grid point/data point | |
-- combination and accumulate into grid. | |
let unnorm :: Matrix Double | |
unnorm = build (ntheta, nphi) $ \ith iph -> | |
let k = kernel (grid ith iph) | |
in sum $ map k dataPts | |
-- Normalise. | |
let ths = [(0.5 + fromIntegral i) * dtheta | i <- [0..ntheta-1] ] | |
phs = [fromIntegral i * dphi | i <- [0..nphi-1] ] | |
sinths = map sin ths | |
doone x v = sumElements $ cmap (x *) v | |
int = dtheta * dphi * sum (zipWith doone sinths (toRows unnorm)) | |
norm = cmap (realToFrac . (/ int)) unnorm :: Matrix CDouble | |
norm1 = cmap ((/ int)) unnorm | |
tst = dtheta * dphi * sum (zipWith doone sinths (toRows norm1)) | |
-- Set up output file. | |
let outthdim = NcDim "theta" ntheta False | |
outthvar = NcVar "theta" NcDouble [outthdim] M.empty | |
outphdim = NcDim "phi" nphi False | |
outphvar = NcVar "phi" NcDouble [outphdim] M.empty | |
outpdfvar = NcVar "pdf" NcDouble [outthdim, outphdim] M.empty | |
outncinfo = | |
emptyNcInfo (workdir </> "sphere-pdf.nc") # | |
addNcDim outthdim # addNcDim outphdim # | |
addNcVar outthvar # addNcVar outphvar # | |
addNcVar outpdfvar | |
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $ | |
\outnc -> do | |
-- Write coordinate variable values. | |
put outnc outthvar $ | |
(SV.fromList $ map realToFrac ths :: SV.Vector CDouble) | |
put outnc outphvar $ | |
(SV.fromList $ map realToFrac phs :: SV.Vector CDouble) | |
put outnc outpdfvar $ HMatrix norm | |
return () | |
-- Calculate kernel value at one point centred at another point. | |
kernel :: V -> V -> Double | |
kernel x y | |
| d < 1 = 1 - d^2 | |
| d >= 1 = 0 | |
where d = distance x y / bandwidth | |
-- Angular distance between two unit vectors. | |
distance :: V -> V -> Double | |
distance (x1, y1, z1) (x2, y2, z2) = acos $ x1 * x2 + y1 * y2 + z1 * z2 | |
-- Convert (theta, phi) coordinates to 3-D unit vector. | |
projToPt :: Vector Double -> V | |
projToPt p = sphPt (p ! 0) (p ! 1) | |
-- Convert (theta, phi) coordinates to 3-D unit vector. | |
sphPt :: Double -> Double -> V | |
sphPt th ph = (sin th * cos ph, sin th * sin ph, cos th) | |
-- Generate grid point. | |
grid :: Double -> Double -> V | |
grid ith iph = sphPt th ph | |
where th = (0.5 + ith) * dtheta | |
ph = iph * dphi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment