Last active
August 29, 2015 14:14
-
-
Save ian-ross/e748bc06fa115796bbf7 to your computer and use it in GitHub Desktop.
Haskell data analysis: spherical PDF significance testing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Numeric | |
import Control.Applicative ((<$>)) | |
import Control.Monad | |
import System.FilePath | |
import Data.NetCDF | |
import Data.NetCDF.HMatrix | |
import Data.NetCDF.Vector | |
import Foreign.C | |
import qualified Data.Map as M | |
import qualified Data.Vector.Storable as SV | |
import qualified Data.Vector.Storable.Mutable as SVM | |
import Numeric.LinearAlgebra.HMatrix | |
import qualified Data.ByteString.Char8 as B | |
import qualified Foreign.CUDA.Driver as CUDA | |
import System.Random.MWC | |
import System.Random.MWC.Distributions | |
import Statistics.Sample.Histogram | |
type HMatrixRet a = IO (Either NcError (HMatrix a)) | |
basedir, workdir :: FilePath | |
basedir = "/big/project-storage/haskell-data-analysis/atmos-non-diffusive" | |
workdir = basedir </> "spherical-pdf" | |
type V = SV.Vector Double | |
-- Use a regular 2.5 deg. x 2.5 deg. theta/phi grid on unit sphere. | |
ntheta, nphi :: Int | |
nphi = 2 * 360 `div` 5 ; ntheta = 2 * 180 `div` 5 | |
-- Integration steps. | |
dtheta, dphi :: Double | |
dphi = 2.0 * pi / fromIntegral nphi ; dtheta = pi / fromIntegral ntheta | |
-- Realisations used for bootstrapping. | |
nrealisations :: Int | |
nrealisations = 10000 | |
-- Number of data points for each PDF realisation. | |
nData :: Int | |
nData = 9966 | |
-- Number of histogram bins. | |
nbins :: Int | |
nbins = 100 | |
main :: IO () | |
main = do | |
-- CUDA initialisation. | |
CUDA.initialise [] | |
dev <- CUDA.device 0 | |
ctx <- CUDA.create dev [] | |
ptx <- B.readFile "make_pdf.ptx" | |
CUDA.JITResult time log mdl <- CUDA.loadDataEx ptx [] | |
fun <- CUDA.getFun mdl "make_pdf_kernel" | |
-- Random data point generation. | |
gen <- create | |
let randPt gen = do | |
unnorm <- SV.replicateM 3 (standard gen) | |
let mag = sqrt $ unnorm `dot` unnorm | |
return $ scale (1.0 / mag) unnorm | |
let ths = [(0.5 + fromIntegral i) * dtheta | i <- [0..ntheta-1] ] | |
phs = [fromIntegral i * dphi | i <- [0..nphi-1] ] | |
sinths = map sin ths | |
doone x v = sumElements $ cmap (x *) v | |
-- CUDA data setup. | |
dDataPts <- CUDA.mallocArray $ 3 * nData | |
dPdf <- CUDA.mallocArray $ ntheta * nphi :: IO (CUDA.DevicePtr Double) | |
let threadsx = 32 | |
threadsy = 16 | |
blockSize = (threadsx, threadsy, 1) | |
gridSize = (nphi `div` threadsx, ntheta `div` threadsy, 1) | |
-- Generate PDF realisations. | |
pdfs <- forM [1..nrealisations] $ \r -> do | |
putStrLn $ "REALISATION: " ++ show r | |
-- Create random data points. | |
dataPts <- SV.concat <$> replicateM nData (randPt gen) | |
SV.unsafeWith dataPts $ \p -> CUDA.pokeArray (3 * nData) p dDataPts | |
-- Calculate kernel values for each grid point/data point | |
-- combination and accumulate into grid. | |
CUDA.launchKernel fun gridSize blockSize 0 Nothing | |
[CUDA.IArg (fromIntegral nData), CUDA.VArg dDataPts, CUDA.VArg dPdf] | |
CUDA.sync | |
res <- SVM.new (ntheta * nphi) | |
SVM.unsafeWith res $ \p -> CUDA.peekArray (ntheta * nphi) dPdf p | |
unnormv <- SV.unsafeFreeze res | |
let unnorm = reshape nphi unnormv | |
-- Normalise. | |
let int = dtheta * dphi * sum (zipWith doone sinths (toRows unnorm)) | |
return $ cmap (realToFrac . (/ int)) unnorm :: IO (Matrix Double) | |
-- Convert to per-point samples and generate per-point histograms. | |
let samples = [SV.generate nrealisations (\s -> (pdfs !! s) ! i ! j) :: V | | |
i <- [0..ntheta-1], j <- [0..nphi-1]] | |
(rngmin, rngmax) = range nbins $ SV.concat samples | |
hists = map (histogram_ nbins rngmin rngmax) samples :: [V] | |
step i = rngmin + d * fromIntegral i | |
d = (rngmax - rngmin) / fromIntegral nbins | |
bins = SV.generate nbins step | |
-- Set up output file. | |
let outthdim = NcDim "theta" ntheta False | |
outthvar = NcVar "theta" NcDouble [outthdim] M.empty | |
outphdim = NcDim "phi" nphi False | |
outphvar = NcVar "phi" NcDouble [outphdim] M.empty | |
outbindim = NcDim "bin" nbins False | |
outbinvar = NcVar "bin" NcDouble [outbindim] M.empty | |
outhistvar = NcVar "hist" NcDouble [outthdim, outphdim, outbindim] M.empty | |
outncinfo = | |
emptyNcInfo (workdir </> "sphere-hist.nc") # | |
addNcDim outthdim # addNcDim outphdim # addNcDim outbindim # | |
addNcVar outthvar # addNcVar outphvar # addNcVar outbinvar # | |
addNcVar outhistvar | |
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $ | |
\outnc -> do | |
-- Write coordinate variable values. | |
put outnc outthvar $ | |
(SV.fromList $ map realToFrac ths :: SV.Vector CDouble) | |
put outnc outphvar $ | |
(SV.fromList $ map realToFrac phs :: SV.Vector CDouble) | |
put outnc outbinvar $ | |
(SV.map realToFrac bins :: SV.Vector CDouble) | |
let idxs = [(i, j) | i <- [0..ntheta-1], j <- [0..nphi-1]] | |
forM_ (zip idxs hists) $ \((i, j), h) -> do | |
let hstore = cmap realToFrac h :: SV.Vector CDouble | |
putA outnc outhistvar [i, j, 0] [1, 1, nbins] hstore | |
return () | |
putStrLn "OK" | |
-- Convert (theta, phi) coordinates to 3-D unit vector. | |
projToPt :: Vector Double -> V | |
projToPt p = sphPt (p ! 0) (p ! 1) | |
-- Convert (theta, phi) coordinates to 3-D unit vector. | |
sphPt :: Double -> Double -> V | |
sphPt th ph = SV.fromList [sin th * cos ph, sin th * sin ph, cos th] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Control.Applicative ((<$>)) | |
import Control.Monad (forM_) | |
import System.FilePath | |
import qualified Data.Array as A | |
import Data.NetCDF | |
import Data.NetCDF.HMatrix | |
import Foreign.C | |
import qualified Data.Map as M | |
import qualified Data.NetCDF.Vector as V | |
import qualified Data.Vector.Storable as SV | |
import Numeric.LinearAlgebra.HMatrix | |
import Numeric.LinearAlgebra.Devel | |
import Debug.Trace | |
type SVRet a = IO (Either NcError (SV.Vector a)) | |
type HMatrixRet a = IO (Either NcError (HMatrix a)) | |
basedir, workdir :: FilePath | |
basedir = "/big/project-storage/haskell-data-analysis/atmos-non-diffusive" | |
workdir = basedir </> "spherical-pdf" | |
type M = Matrix Double | |
-- Use a regular 2.5 deg. x 2.5 deg. theta/phi grid on unit sphere. | |
ntheta, nphi :: Int | |
nphi = 2 * 360 `div` 5 ; ntheta = 2 * 180 `div` 5 | |
-- Integration steps. | |
dtheta, dphi :: Double | |
dphi = 2.0 * pi / fromIntegral nphi ; dtheta = pi / fromIntegral ntheta | |
main :: IO () | |
main = do | |
-- Open PDF and histogram data files for input. | |
Right pdfnc <- openFile $ workdir </> "sphere-pdf.nc" | |
let (Just pdfvar) = ncVar pdfnc "pdf" | |
Right (HMatrix pdf) <- get pdfnc pdfvar :: HMatrixRet CDouble | |
Right histnc <- openFile $ workdir </> "sphere-hist.nc" | |
let Just nbin = ncDimLength <$> ncDim histnc "bin" | |
(Just binvar) = ncVar histnc "bin" | |
(Just histvar) = ncVar histnc "hist" | |
Right bins <- get histnc binvar :: SVRet CDouble | |
let minbin = SV.head bins ; maxbin = SV.last bins | |
binwidth = (maxbin - minbin) / fromIntegral (nbin - 1) | |
Right histvals <- get histnc histvar :: SVRet CDouble | |
-- Split histogram values for later processing. | |
let nhistvals = SV.length histvals | |
oneslice i = SV.slice i nbin histvals | |
histvecs = map oneslice [0, nbin.. nhistvals - nbin] | |
hists = A.listArray ((0, 0), (ntheta-1, nphi-1)) histvecs | |
nrealisations = SV.sum $ hists A.! (0, 0) | |
-- Calculate significance values. | |
let doone :: CDouble -> CDouble -> CDouble | |
doone dith diph = | |
let ith = truncate dith ; iph = truncate diph | |
pdfval = pdf ! ith ! iph | |
hist = hists A.! (ith, iph) | |
pdfbin0 = truncate $ (pdfval - minbin) / binwidth | |
pdfbin = pdfbin0 `max` 0 `min` nbin - 1 | |
in (SV.sum $ SV.take pdfbin hist) / nrealisations | |
sig = build (ntheta, nphi) doone :: Matrix CDouble | |
-- Set up output file. | |
let ths = [(0.5 + fromIntegral i) * dtheta | i <- [0..ntheta-1] ] | |
phs = [fromIntegral i * dphi | i <- [0..nphi-1] ] | |
outthdim = NcDim "theta" ntheta False | |
outthvar = NcVar "theta" NcDouble [outthdim] M.empty | |
outphdim = NcDim "phi" nphi False | |
outphvar = NcVar "phi" NcDouble [outphdim] M.empty | |
outsigvar = NcVar "significance" NcDouble [outthdim, outphdim] M.empty | |
outncinfo = | |
emptyNcInfo (workdir </> "sphere-significance.nc") # | |
addNcDim outthdim # addNcDim outphdim # | |
addNcVar outthvar # addNcVar outphvar # | |
addNcVar outsigvar | |
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $ | |
\outnc -> do | |
-- Write coordinate variable values. | |
put outnc outthvar $ | |
(SV.fromList $ map realToFrac ths :: SV.Vector CDouble) | |
put outnc outphvar $ | |
(SV.fromList $ map realToFrac phs :: SV.Vector CDouble) | |
put outnc outsigvar $ HMatrix sig | |
return () |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Control.Applicative ((<$>)) | |
import Control.Monad | |
import Control.Loop | |
import Control.Monad.ST | |
import System.FilePath | |
import Data.NetCDF | |
import Data.NetCDF.HMatrix | |
import Foreign.C | |
import qualified Data.Map as M | |
import qualified Data.NetCDF.Vector as V | |
import qualified Data.Vector.Storable as SV | |
import Numeric.LinearAlgebra.HMatrix | |
import Numeric.LinearAlgebra.Devel | |
import System.Random.MWC | |
import System.Random.MWC.Distributions | |
import Debug.Trace | |
type HMatrixRet a = IO (Either NcError (HMatrix a)) | |
basedir, workdir, pcadir :: FilePath | |
basedir = "/big/project-storage/haskell-data-analysis/atmos-non-diffusive" | |
workdir = basedir </> "spherical-pdf" | |
pcadir = basedir </> "pca" | |
type V = (Double, Double, Double) | |
type M = Matrix Double | |
-- Use a regular 2.5 deg. x 2.5 deg. theta/phi grid on unit sphere. | |
ntheta, nphi :: Int | |
nphi = 2 * 360 `div` 5 ; ntheta = 2 * 180 `div` 5 | |
-- Integration steps. | |
dtheta, dphi :: Double | |
dphi = 2.0 * pi / fromIntegral nphi ; dtheta = pi / fromIntegral ntheta | |
-- Degree-to-radian conversion factor. | |
degToRad :: Double | |
degToRad = pi / 180.0 | |
-- Density kernel bandwidth. | |
bandwidth :: Double | |
bandwidth = pi / 6.0 | |
-- Number of data points for each PDF realisation. | |
nData :: Int | |
nData = 9966 | |
main :: IO () | |
main = do | |
-- Random data point generation. | |
gen <- create | |
let randPt gen = do | |
unnorm <- SV.replicateM 3 (standard gen) | |
let mag = sqrt $ unnorm `dot` unnorm | |
norm = scale (1.0 / mag) unnorm | |
return (norm ! 0, norm ! 1, norm ! 2) | |
-- Create random data points, flatten to vector and allocate on | |
-- device. | |
dataPts <- replicateM nData (randPt gen) | |
-- Calculate kernel values for each grid point/data point | |
-- combination and accumulate into grid. | |
let unnorm :: Matrix Double | |
unnorm = build (ntheta, nphi) $ \ith iph -> | |
let k = kernel (grid ith iph) | |
in sum $ map k dataPts | |
-- Normalise. | |
let ths = [(0.5 + fromIntegral i) * dtheta | i <- [0..ntheta-1] ] | |
phs = [fromIntegral i * dphi | i <- [0..nphi-1] ] | |
sinths = map sin ths | |
doone x v = sumElements $ cmap (x *) v | |
int = dtheta * dphi * sum (zipWith doone sinths (toRows unnorm)) | |
norm = cmap (realToFrac . (/ int)) unnorm :: Matrix CDouble | |
norm1 = cmap ((/ int)) unnorm | |
tst = dtheta * dphi * sum (zipWith doone sinths (toRows norm1)) | |
-- Set up output file. | |
let outthdim = NcDim "theta" ntheta False | |
outthvar = NcVar "theta" NcDouble [outthdim] M.empty | |
outphdim = NcDim "phi" nphi False | |
outphvar = NcVar "phi" NcDouble [outphdim] M.empty | |
outpdfvar = NcVar "pdf" NcDouble [outthdim, outphdim] M.empty | |
outncinfo = | |
emptyNcInfo (workdir </> "sphere-pdf-unif.nc") # | |
addNcDim outthdim # addNcDim outphdim # | |
addNcVar outthvar # addNcVar outphvar # | |
addNcVar outpdfvar | |
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $ | |
\outnc -> do | |
-- Write coordinate variable values. | |
put outnc outthvar $ | |
(SV.fromList $ map realToFrac ths :: SV.Vector CDouble) | |
put outnc outphvar $ | |
(SV.fromList $ map realToFrac phs :: SV.Vector CDouble) | |
put outnc outpdfvar $ HMatrix norm | |
return () | |
-- Calculate kernel value at one point centred at another point. | |
kernel :: V -> V -> Double | |
kernel x y | |
| d < 1 = 1 - d^2 | |
| d >= 1 = 0 | |
where d = distance x y / bandwidth | |
-- Angular distance between two unit vectors. | |
distance :: V -> V -> Double | |
distance (x1, y1, z1) (x2, y2, z2) = acos $ x1 * x2 + y1 * y2 + z1 * z2 | |
-- Convert (theta, phi) coordinates to 3-D unit vector. | |
projToPt :: Vector Double -> V | |
projToPt p = sphPt (p ! 0) (p ! 1) | |
-- Convert (theta, phi) coordinates to 3-D unit vector. | |
sphPt :: Double -> Double -> V | |
sphPt th ph = (sin th * cos ph, sin th * sin ph, cos th) | |
-- Generate grid point. | |
grid :: Double -> Double -> V | |
grid ith iph = sphPt th ph | |
where th = (0.5 + ith) * dtheta | |
ph = iph * dphi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment