Skip to content

Instantly share code, notes, and snippets.

@ian-ross
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ian-ross/2446201c7d92298a0ac3 to your computer and use it in GitHub Desktop.
Save ian-ross/2446201c7d92298a0ac3 to your computer and use it in GitHub Desktop.
Haskell data analysis: kernel density estimation
module Main where
import Prelude hiding (enumFromThenTo, length, map, mapM_, replicate, zipWith)
import Data.Vector hiding ((++))
import System.Random
import System.IO
-- Number of sample points.
n :: Int
n = 10
-- Kernel bandwidth.
h :: Double
h = 2.0
-- Ranges for data generation and output.
xgenmin, xgenmax, xmin, xmax :: Double
xgenmin = 1 ; xgenmax = 9
xmin = xgenmin - h ; xmax = xgenmax + h
-- Output step.
dx :: Double
dx = 0.01
main :: IO ()
main = do
-- Generate sample points.
samplexs <- replicateM n $ randomRIO (xgenmin, xgenmax)
withFile "xs.dat" WriteMode $ \h -> forM_ samplexs (hPutStrLn h . show)
let outxs = enumFromThenTo xmin (xmin + dx) xmax
-- Calculate values for a single kernel.
doone n h xs x0 = map (/ (fromIntegral n)) $ map (kernel h x0) xs
-- Kernels for all sample points.
kernels = map (doone n h outxs) samplexs
-- Combined density estimate.
pdf = foldl1' (zipWith (+)) kernels
pr h x s = hPutStrLn h $ (show x) ++ " " ++ (show s)
kpr h k = do
zipWithM_ (pr h) outxs k
hPutStrLn h ""
withFile "kernels.dat" WriteMode $ \h -> mapM_ (kpr h) kernels
withFile "kde.dat" WriteMode $ \h -> zipWithM_ (pr h) outxs pdf
-- Epanechnikov kernel.
kernel :: Double -> Double -> Double -> Double
kernel h x0 x
| abs u <= 1 = 0.75 * (1 - u^2)
| otherwise = 0
where u = (x - x0) / h
module Main where
import Control.Applicative ((<$>))
import Control.Monad (forM_)
import Control.Loop
import Control.Monad.ST
import System.FilePath
import Data.NetCDF
import Data.NetCDF.HMatrix
import Foreign.C
import qualified Data.Map as M
import qualified Data.NetCDF.Vector as V
import qualified Data.Vector.Storable as SV
import Numeric.LinearAlgebra.HMatrix
import Numeric.LinearAlgebra.Devel
import Debug.Trace
type HMatrixRet a = IO (Either NcError (HMatrix a))
basedir, workdir, pcadir :: FilePath
basedir = "/big/project-storage/haskell-data-analysis/atmos-non-diffusive"
workdir = basedir </> "spherical-pdf"
pcadir = basedir </> "pca"
type V = (Double, Double, Double)
type M = Matrix Double
-- Use a regular 2.5 deg. x 2.5 deg. theta/phi grid on unit sphere.
ntheta, nphi :: Int
nphi = 2 * 360 `div` 5 ; ntheta = 2 * 180 `div` 5
-- Integration steps.
dtheta, dphi :: Double
dphi = 2.0 * pi / fromIntegral nphi ; dtheta = pi / fromIntegral ntheta
-- Degree-to-radian conversion factor.
degToRad :: Double
degToRad = pi / 180.0
-- Density kernel bandwidth.
bandwidth :: Double
bandwidth = pi / 6.0
main :: IO ()
main = do
-- Open projected points data file for input.
Right innc <- openFile $ workdir </> "sphere-proj.nc"
let Just ntime = ncDimLength <$> ncDim innc "time"
let (Just projvar) = ncVar innc "proj"
Right (HMatrix projsin) <-
getA innc projvar [0, 0] [ntime, 2] :: HMatrixRet CDouble
-- Convert projections to 3-D points.
let dataPts = map projToPt $ toRows $ cmap realToFrac projsin
-- Calculate kernel values for each grid point/data point
-- combination and accumulate into grid.
let unnorm :: Matrix Double
unnorm = build (ntheta, nphi) $ \ith iph ->
let k = kernel (grid ith iph)
in sum $ map k dataPts
-- Normalise.
let ths = [(0.5 + fromIntegral i) * dtheta | i <- [0..ntheta-1] ]
phs = [fromIntegral i * dphi | i <- [0..nphi-1] ]
sinths = map sin ths
doone x v = sumElements $ cmap (x *) v
int = dtheta * dphi * sum (zipWith doone sinths (toRows unnorm))
norm = cmap (realToFrac . (/ int)) unnorm :: Matrix CDouble
norm1 = cmap ((/ int)) unnorm
tst = dtheta * dphi * sum (zipWith doone sinths (toRows norm1))
-- Set up output file.
let outthdim = NcDim "theta" ntheta False
outthvar = NcVar "theta" NcDouble [outthdim] M.empty
outphdim = NcDim "phi" nphi False
outphvar = NcVar "phi" NcDouble [outphdim] M.empty
outpdfvar = NcVar "pdf" NcDouble [outthdim, outphdim] M.empty
outncinfo =
emptyNcInfo (workdir </> "sphere-pdf.nc") #
addNcDim outthdim # addNcDim outphdim #
addNcVar outthvar # addNcVar outphvar #
addNcVar outpdfvar
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $
\outnc -> do
-- Write coordinate variable values.
put outnc outthvar $
(SV.fromList $ map realToFrac ths :: SV.Vector CDouble)
put outnc outphvar $
(SV.fromList $ map realToFrac phs :: SV.Vector CDouble)
put outnc outpdfvar $ HMatrix norm
return ()
-- Calculate kernel value at one point centred at another point.
kernel :: V -> V -> Double
kernel x y
| d < 1 = 1 - d^2
| d >= 1 = 0
where d = distance x y / bandwidth
-- Angular distance between two unit vectors.
distance :: V -> V -> Double
distance (x1, y1, z1) (x2, y2, z2) = acos $ x1 * x2 + y1 * y2 + z1 * z2
-- Convert (theta, phi) coordinates to 3-D unit vector.
projToPt :: Vector Double -> V
projToPt p = sphPt (p ! 0) (p ! 1)
-- Convert (theta, phi) coordinates to 3-D unit vector.
sphPt :: Double -> Double -> V
sphPt th ph = (sin th * cos ph, sin th * sin ph, cos th)
-- Generate grid point.
grid :: Double -> Double -> V
grid ith iph = sphPt th ph
where th = (0.5 + ith) * dtheta
ph = iph * dphi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment