Skip to content

Instantly share code, notes, and snippets.

@ian-ross
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ian-ross/e748bc06fa115796bbf7 to your computer and use it in GitHub Desktop.
Save ian-ross/e748bc06fa115796bbf7 to your computer and use it in GitHub Desktop.
Haskell data analysis: spherical PDF significance testing
module Main where
import Numeric
import Control.Applicative ((<$>))
import Control.Monad
import System.FilePath
import Data.NetCDF
import Data.NetCDF.HMatrix
import Data.NetCDF.Vector
import Foreign.C
import qualified Data.Map as M
import qualified Data.Vector.Storable as SV
import qualified Data.Vector.Storable.Mutable as SVM
import Numeric.LinearAlgebra.HMatrix
import qualified Data.ByteString.Char8 as B
import qualified Foreign.CUDA.Driver as CUDA
import System.Random.MWC
import System.Random.MWC.Distributions
import Statistics.Sample.Histogram
type HMatrixRet a = IO (Either NcError (HMatrix a))
basedir, workdir :: FilePath
basedir = "/big/project-storage/haskell-data-analysis/atmos-non-diffusive"
workdir = basedir </> "spherical-pdf"
type V = SV.Vector Double
-- Use a regular 2.5 deg. x 2.5 deg. theta/phi grid on unit sphere.
ntheta, nphi :: Int
nphi = 2 * 360 `div` 5 ; ntheta = 2 * 180 `div` 5
-- Integration steps.
dtheta, dphi :: Double
dphi = 2.0 * pi / fromIntegral nphi ; dtheta = pi / fromIntegral ntheta
-- Realisations used for bootstrapping.
nrealisations :: Int
nrealisations = 10000
-- Number of data points for each PDF realisation.
nData :: Int
nData = 9966
-- Number of histogram bins.
nbins :: Int
nbins = 100
main :: IO ()
main = do
-- CUDA initialisation.
CUDA.initialise []
dev <- CUDA.device 0
ctx <- CUDA.create dev []
ptx <- B.readFile "make_pdf.ptx"
CUDA.JITResult time log mdl <- CUDA.loadDataEx ptx []
fun <- CUDA.getFun mdl "make_pdf_kernel"
-- Random data point generation.
gen <- create
let randPt gen = do
unnorm <- SV.replicateM 3 (standard gen)
let mag = sqrt $ unnorm `dot` unnorm
return $ scale (1.0 / mag) unnorm
let ths = [(0.5 + fromIntegral i) * dtheta | i <- [0..ntheta-1] ]
phs = [fromIntegral i * dphi | i <- [0..nphi-1] ]
sinths = map sin ths
doone x v = sumElements $ cmap (x *) v
-- CUDA data setup.
dDataPts <- CUDA.mallocArray $ 3 * nData
dPdf <- CUDA.mallocArray $ ntheta * nphi :: IO (CUDA.DevicePtr Double)
let threadsx = 32
threadsy = 16
blockSize = (threadsx, threadsy, 1)
gridSize = (nphi `div` threadsx, ntheta `div` threadsy, 1)
-- Generate PDF realisations.
pdfs <- forM [1..nrealisations] $ \r -> do
putStrLn $ "REALISATION: " ++ show r
-- Create random data points.
dataPts <- SV.concat <$> replicateM nData (randPt gen)
SV.unsafeWith dataPts $ \p -> CUDA.pokeArray (3 * nData) p dDataPts
-- Calculate kernel values for each grid point/data point
-- combination and accumulate into grid.
CUDA.launchKernel fun gridSize blockSize 0 Nothing
[CUDA.IArg (fromIntegral nData), CUDA.VArg dDataPts, CUDA.VArg dPdf]
CUDA.sync
res <- SVM.new (ntheta * nphi)
SVM.unsafeWith res $ \p -> CUDA.peekArray (ntheta * nphi) dPdf p
unnormv <- SV.unsafeFreeze res
let unnorm = reshape nphi unnormv
-- Normalise.
let int = dtheta * dphi * sum (zipWith doone sinths (toRows unnorm))
return $ cmap (realToFrac . (/ int)) unnorm :: IO (Matrix Double)
-- Convert to per-point samples and generate per-point histograms.
let samples = [SV.generate nrealisations (\s -> (pdfs !! s) ! i ! j) :: V |
i <- [0..ntheta-1], j <- [0..nphi-1]]
(rngmin, rngmax) = range nbins $ SV.concat samples
hists = map (histogram_ nbins rngmin rngmax) samples :: [V]
step i = rngmin + d * fromIntegral i
d = (rngmax - rngmin) / fromIntegral nbins
bins = SV.generate nbins step
-- Set up output file.
let outthdim = NcDim "theta" ntheta False
outthvar = NcVar "theta" NcDouble [outthdim] M.empty
outphdim = NcDim "phi" nphi False
outphvar = NcVar "phi" NcDouble [outphdim] M.empty
outbindim = NcDim "bin" nbins False
outbinvar = NcVar "bin" NcDouble [outbindim] M.empty
outhistvar = NcVar "hist" NcDouble [outthdim, outphdim, outbindim] M.empty
outncinfo =
emptyNcInfo (workdir </> "sphere-hist.nc") #
addNcDim outthdim # addNcDim outphdim # addNcDim outbindim #
addNcVar outthvar # addNcVar outphvar # addNcVar outbinvar #
addNcVar outhistvar
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $
\outnc -> do
-- Write coordinate variable values.
put outnc outthvar $
(SV.fromList $ map realToFrac ths :: SV.Vector CDouble)
put outnc outphvar $
(SV.fromList $ map realToFrac phs :: SV.Vector CDouble)
put outnc outbinvar $
(SV.map realToFrac bins :: SV.Vector CDouble)
let idxs = [(i, j) | i <- [0..ntheta-1], j <- [0..nphi-1]]
forM_ (zip idxs hists) $ \((i, j), h) -> do
let hstore = cmap realToFrac h :: SV.Vector CDouble
putA outnc outhistvar [i, j, 0] [1, 1, nbins] hstore
return ()
putStrLn "OK"
-- Convert (theta, phi) coordinates to 3-D unit vector.
projToPt :: Vector Double -> V
projToPt p = sphPt (p ! 0) (p ! 1)
-- Convert (theta, phi) coordinates to 3-D unit vector.
sphPt :: Double -> Double -> V
sphPt th ph = SV.fromList [sin th * cos ph, sin th * sin ph, cos th]
module Main where
import Control.Applicative ((<$>))
import Control.Monad (forM_)
import System.FilePath
import qualified Data.Array as A
import Data.NetCDF
import Data.NetCDF.HMatrix
import Foreign.C
import qualified Data.Map as M
import qualified Data.NetCDF.Vector as V
import qualified Data.Vector.Storable as SV
import Numeric.LinearAlgebra.HMatrix
import Numeric.LinearAlgebra.Devel
import Debug.Trace
type SVRet a = IO (Either NcError (SV.Vector a))
type HMatrixRet a = IO (Either NcError (HMatrix a))
basedir, workdir :: FilePath
basedir = "/big/project-storage/haskell-data-analysis/atmos-non-diffusive"
workdir = basedir </> "spherical-pdf"
type M = Matrix Double
-- Use a regular 2.5 deg. x 2.5 deg. theta/phi grid on unit sphere.
ntheta, nphi :: Int
nphi = 2 * 360 `div` 5 ; ntheta = 2 * 180 `div` 5
-- Integration steps.
dtheta, dphi :: Double
dphi = 2.0 * pi / fromIntegral nphi ; dtheta = pi / fromIntegral ntheta
main :: IO ()
main = do
-- Open PDF and histogram data files for input.
Right pdfnc <- openFile $ workdir </> "sphere-pdf.nc"
let (Just pdfvar) = ncVar pdfnc "pdf"
Right (HMatrix pdf) <- get pdfnc pdfvar :: HMatrixRet CDouble
Right histnc <- openFile $ workdir </> "sphere-hist.nc"
let Just nbin = ncDimLength <$> ncDim histnc "bin"
(Just binvar) = ncVar histnc "bin"
(Just histvar) = ncVar histnc "hist"
Right bins <- get histnc binvar :: SVRet CDouble
let minbin = SV.head bins ; maxbin = SV.last bins
binwidth = (maxbin - minbin) / fromIntegral (nbin - 1)
Right histvals <- get histnc histvar :: SVRet CDouble
-- Split histogram values for later processing.
let nhistvals = SV.length histvals
oneslice i = SV.slice i nbin histvals
histvecs = map oneslice [0, nbin.. nhistvals - nbin]
hists = A.listArray ((0, 0), (ntheta-1, nphi-1)) histvecs
nrealisations = SV.sum $ hists A.! (0, 0)
-- Calculate significance values.
let doone :: CDouble -> CDouble -> CDouble
doone dith diph =
let ith = truncate dith ; iph = truncate diph
pdfval = pdf ! ith ! iph
hist = hists A.! (ith, iph)
pdfbin0 = truncate $ (pdfval - minbin) / binwidth
pdfbin = pdfbin0 `max` 0 `min` nbin - 1
in (SV.sum $ SV.take pdfbin hist) / nrealisations
sig = build (ntheta, nphi) doone :: Matrix CDouble
-- Set up output file.
let ths = [(0.5 + fromIntegral i) * dtheta | i <- [0..ntheta-1] ]
phs = [fromIntegral i * dphi | i <- [0..nphi-1] ]
outthdim = NcDim "theta" ntheta False
outthvar = NcVar "theta" NcDouble [outthdim] M.empty
outphdim = NcDim "phi" nphi False
outphvar = NcVar "phi" NcDouble [outphdim] M.empty
outsigvar = NcVar "significance" NcDouble [outthdim, outphdim] M.empty
outncinfo =
emptyNcInfo (workdir </> "sphere-significance.nc") #
addNcDim outthdim # addNcDim outphdim #
addNcVar outthvar # addNcVar outphvar #
addNcVar outsigvar
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $
\outnc -> do
-- Write coordinate variable values.
put outnc outthvar $
(SV.fromList $ map realToFrac ths :: SV.Vector CDouble)
put outnc outphvar $
(SV.fromList $ map realToFrac phs :: SV.Vector CDouble)
put outnc outsigvar $ HMatrix sig
return ()
module Main where
import Control.Applicative ((<$>))
import Control.Monad
import Control.Loop
import Control.Monad.ST
import System.FilePath
import Data.NetCDF
import Data.NetCDF.HMatrix
import Foreign.C
import qualified Data.Map as M
import qualified Data.NetCDF.Vector as V
import qualified Data.Vector.Storable as SV
import Numeric.LinearAlgebra.HMatrix
import Numeric.LinearAlgebra.Devel
import System.Random.MWC
import System.Random.MWC.Distributions
import Debug.Trace
type HMatrixRet a = IO (Either NcError (HMatrix a))
basedir, workdir, pcadir :: FilePath
basedir = "/big/project-storage/haskell-data-analysis/atmos-non-diffusive"
workdir = basedir </> "spherical-pdf"
pcadir = basedir </> "pca"
type V = (Double, Double, Double)
type M = Matrix Double
-- Use a regular 2.5 deg. x 2.5 deg. theta/phi grid on unit sphere.
ntheta, nphi :: Int
nphi = 2 * 360 `div` 5 ; ntheta = 2 * 180 `div` 5
-- Integration steps.
dtheta, dphi :: Double
dphi = 2.0 * pi / fromIntegral nphi ; dtheta = pi / fromIntegral ntheta
-- Degree-to-radian conversion factor.
degToRad :: Double
degToRad = pi / 180.0
-- Density kernel bandwidth.
bandwidth :: Double
bandwidth = pi / 6.0
-- Number of data points for each PDF realisation.
nData :: Int
nData = 9966
main :: IO ()
main = do
-- Random data point generation.
gen <- create
let randPt gen = do
unnorm <- SV.replicateM 3 (standard gen)
let mag = sqrt $ unnorm `dot` unnorm
norm = scale (1.0 / mag) unnorm
return (norm ! 0, norm ! 1, norm ! 2)
-- Create random data points, flatten to vector and allocate on
-- device.
dataPts <- replicateM nData (randPt gen)
-- Calculate kernel values for each grid point/data point
-- combination and accumulate into grid.
let unnorm :: Matrix Double
unnorm = build (ntheta, nphi) $ \ith iph ->
let k = kernel (grid ith iph)
in sum $ map k dataPts
-- Normalise.
let ths = [(0.5 + fromIntegral i) * dtheta | i <- [0..ntheta-1] ]
phs = [fromIntegral i * dphi | i <- [0..nphi-1] ]
sinths = map sin ths
doone x v = sumElements $ cmap (x *) v
int = dtheta * dphi * sum (zipWith doone sinths (toRows unnorm))
norm = cmap (realToFrac . (/ int)) unnorm :: Matrix CDouble
norm1 = cmap ((/ int)) unnorm
tst = dtheta * dphi * sum (zipWith doone sinths (toRows norm1))
-- Set up output file.
let outthdim = NcDim "theta" ntheta False
outthvar = NcVar "theta" NcDouble [outthdim] M.empty
outphdim = NcDim "phi" nphi False
outphvar = NcVar "phi" NcDouble [outphdim] M.empty
outpdfvar = NcVar "pdf" NcDouble [outthdim, outphdim] M.empty
outncinfo =
emptyNcInfo (workdir </> "sphere-pdf-unif.nc") #
addNcDim outthdim # addNcDim outphdim #
addNcVar outthvar # addNcVar outphvar #
addNcVar outpdfvar
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $
\outnc -> do
-- Write coordinate variable values.
put outnc outthvar $
(SV.fromList $ map realToFrac ths :: SV.Vector CDouble)
put outnc outphvar $
(SV.fromList $ map realToFrac phs :: SV.Vector CDouble)
put outnc outpdfvar $ HMatrix norm
return ()
-- Calculate kernel value at one point centred at another point.
kernel :: V -> V -> Double
kernel x y
| d < 1 = 1 - d^2
| d >= 1 = 0
where d = distance x y / bandwidth
-- Angular distance between two unit vectors.
distance :: V -> V -> Double
distance (x1, y1, z1) (x2, y2, z2) = acos $ x1 * x2 + y1 * y2 + z1 * z2
-- Convert (theta, phi) coordinates to 3-D unit vector.
projToPt :: Vector Double -> V
projToPt p = sphPt (p ! 0) (p ! 1)
-- Convert (theta, phi) coordinates to 3-D unit vector.
sphPt :: Double -> Double -> V
sphPt th ph = (sin th * cos ph, sin th * sin ph, cos th)
-- Generate grid point.
grid :: Double -> Double -> V
grid ith iph = sphPt th ph
where th = (0.5 + ith) * dtheta
ph = iph * dphi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment