Created
June 18, 2012 16:00
-
-
Save rrnewton/2949100 to your computer and use it in GitHub Desktop.
Accelerate cuda backend bug reproducer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE ScopedTypeVariables #-} | |
{-# LANGUAGE TypeOperators #-} | |
{-# LANGUAGE FlexibleContexts #-} | |
{-# LANGUAGE FlexibleInstances #-} | |
{-# LANGUAGE CPP #-} | |
-- Naive translation of shen_anova1_batch.m into Accelerate. | |
module Main (main) where | |
import Data.Array.Accelerate as A | |
import Data.Array.Accelerate.CUDA as R | |
import Prelude as P | |
import Data.List hiding (group) | |
import Data.List.Split (splitEvery) | |
-- import Data.Set as S | |
import Control.Monad (when, mapM_, forM_) | |
import Data.Array.Unboxed (IArray, UArray, elems, listArray) | |
import System.Random | |
type Matrix a = Array DIM2 a | |
type EI = Exp Int -- Shorthand for the lazy. | |
runE x = R.run (unit x) -- Run for scalars | |
dbg = False -- TOGGLE me. | |
-------------------------------------------------------------------------------- | |
-- Test harness -- set up and run the test. | |
test_anova1 :: Int -> Int -> IO () | |
test_anova1 nsbj width = do | |
let | |
nsnp = 1 | |
nsbj' = constant nsbj | |
area = constant (width*width) | |
-- imdata = rand( width*width, nsbj ) | |
imdata :: Acc (Matrix Float) = generate (index2 area nsbj') inorder | |
inorder x = let (r, c) = unlift (unindex2 x) in | |
A.fromIntegral (r + c * area) -- Count by ones in row major.. | |
-- Ammendment -- for now fixing nsnp == 1 and just doing a vector rather than a matrix for snpdata: | |
snpdata :: Acc (Vector Int) | |
snpdata = generate (index1 nsbj') ((`mod` 3) . unindex1) -- Return 0,1, or 2 | |
putStrLn "=== Accelerate naive anova1 batch ===" | |
putStrLn$ "Image width: "++ show width | |
putStrLn$ "SNP category membership across patients:"++ pp(R.run snpdata) | |
-- tic | |
-- forM [0 .. nsnp-1] $ \ i -> do | |
anova1_batch imdata snpdata | |
-- fmap1(:,i) = F; ????? | |
-- toc | |
return () | |
---------------------------------------------------------------------------------------------------- | |
-- Shen's implementation of anova1 in a batch mode | |
-- 'voxels' is a matrix | |
-- 'group' is a column vector | |
-- 'voxel' columns have the same group memebership | |
anova1_batch :: Acc (Matrix Float) -> Acc (Vector Int) -> IO () | |
-- RRN: I believe group should represent a set of individual's single | |
-- nucleotide values at a specific location (a specific SNP). | |
-- 'voxel's represents a /flat/ row of voxels for each individual in | |
-- the group. Here we analyze how variance at each voxel correlates | |
-- with the different categories of individuals created by 'group'. | |
-- | |
-- Specifically, in this case there are three /subgroups/ within | |
-- 'group', corresponding to snp=0, snp=1, snp=2. | |
anova1_batch voxels group = do | |
let d = shape voxels | |
-- n = total number of voxels in each image | |
-- r = number of parallel anova runs == number of individuals in group | |
(n::EI, r::EI) = unlift$ unindex2 d | |
-- FIXME: | |
-- uniqueSnps = unique (R.run group) -- 'g': All the unique SNP settings. | |
-- numUniqueSNPs = size g -- 'p': How many unique values. | |
uniqueSNPs = [0,1,2] -- TEMP, FIXME -- hardcoding | |
numUniqueSNPs = 3 -- TEMP, FIXME -- hardcoding | |
-- NOTE: We could try to keep everything within Accelerate, but | |
-- for the "outermost" loop here, over different SNP values, we | |
-- allow ourselves the rconvenience of using lists, and keeping | |
-- things in regular Haskell. | |
-- First, how much variance is there across ALL voxels (per sample): | |
var_v = variance1of2 voxels :: Acc (Vector Float) | |
ss_T = A.map (df_T *) var_v; | |
-- Next, we compute per-group variance. | |
------------------------------------------------------------ | |
-- Group is a mapping between index->groupID, and we effectively want to invert it | |
-- to get groupID->indices. To accomplish that we first explicitly attach indices: | |
zippedGroup = A.zip (iota r) group | |
-- For each observed SNP value {0,1,2} ... | |
idxs = P.map (\ snpVal -> | |
let | |
-- Here we want the index of *all* of the voxels belonging to | |
-- individuals with a particular snpVal. | |
idx = A.map A.fst $ filterAcc isMatch zippedGroup | |
isMatch pr = let (_::EI,thisSnp::EI) = unlift pr in | |
thisSnp ==* constant snpVal | |
in idx | |
) uniqueSNPs | |
-- Now we're ready to extract the set of voxels that belong to this group of patients: | |
groupedVoxels :: [Acc (Matrix Float)] | |
-- rowGroups = selectRows idx voxels | |
groupedVoxels = P.map (`selectRows` voxels) idxs | |
-- Whew, now we have finally achieved unflat = X(idx,:) in the original code.... | |
-- Next we look at variance within each voxel for individuals in THIS snp category: | |
-- The result is a per-voxel variance: | |
-- variances = A.map (* (A.fromIntegral$ newCols - 1)) (variance2of2 unflat) | |
variances :: [Acc (Vector Float)] = P.map variance2of2 groupedVoxels | |
-- What is the size of each group, minus 1: | |
lens :: [Exp Float] = P.map ((\x -> A.fromIntegral (x - 1)) . unindex1 . shape) idxs | |
-- Do a sum reduction, the result is an array of per-voxel statistics: | |
ss_W = foldl1 (.+) (P.zipWith (liftScalar (.*)) lens variances) | |
------------------------------------------------------------ | |
df_B :: Exp Float = A.fromIntegral$ numUniqueSNPs - 1 | |
df_W :: Exp Float = A.fromIntegral$ n - numUniqueSNPs | |
df_T :: Exp Float = A.fromIntegral$ n - 1 | |
-- Well, this is quite verbose due to there being no scalar/array overloading: | |
ms_W = liftScalarR (./) ss_W df_W | |
ss_B = ss_T .- ss_W; | |
ms_B = liftScalarR (./) ss_B df_B | |
f_statistic = ms_B ./ ms_W | |
putStrLn$ "Finally, here's the F statistic: " ++ pp(run ss_W) | |
return () | |
-- TEMP -- hardcoding this to a specific dimensionality until I fix the general version below | |
-- This sums along the RIGHT of two dimensions (INNERMOST). | |
-- If interpreted as a (row,column) matrix, this collapses each row. | |
variance2of2 :: (Elt a, IsFloating a) | |
=> Acc (Array DIM2 a) -> Acc (Array DIM1 a) | |
variance2of2 arr = | |
A.map (/ (denom-1)) | |
(A.zipWith (-) sum2 | |
(A.zipWith (*) mean sum1)) | |
where | |
mean = A.map (/ denom) sum1 | |
denom = A.fromIntegral rows | |
Z :. (rows::EI) :. (cols::EI) = unlift sh1 | |
sh1 :: Exp ( Z :. Int :. Int) = shape arr | |
-- Sum along LEFT dimension: | |
sum1 = fold (+) 0 arr | |
sum2 = fold (+) 0 sqrs | |
sqrs = A.zipWith (*) arr arr | |
-- TEMP -- hardcoding this to a specific dimensionality until I fix the general version below | |
-- This iterates along the LEFT of two dimensions (OUTERMOST). | |
-- If interpreted as a (row,column) matrix, this collapses each column, | |
-- which is the same behavior as the var() function in matlab applied to a matrix. | |
variance1of2 :: (Elt a, IsFloating a) | |
=> Acc (Array DIM2 a) -> Acc (Array DIM1 a) | |
variance1of2 arr = | |
A.map (/ (denom-1)) | |
(A.zipWith (-) sum2 | |
(A.zipWith (*) mean sum1)) | |
where | |
mean = A.map (/ denom) sum1 | |
denom = A.fromIntegral rows | |
Z :. (rows::EI) :. (cols::EI) = unlift sh1 | |
sh1 :: Exp ( Z :. Int :. Int) = shape arr | |
-- Sum along LEFT dimension: | |
swapped = swap2Dims arr | |
sum1 = fold (+) 0 swapped | |
sum2 = fold (+) 0 sqrs | |
sqrs = A.zipWith (*) swapped swapped | |
-- A swap function fixed to exactly two dimensions. | |
swap2Dims :: (Elt e, Num e) => Acc (Matrix e) -> Acc (Matrix e) | |
swap2Dims a = | |
backpermute new_extent swap2 a -- Perform a gather. | |
where | |
new_extent = swap2 (shape a) | |
swap2 :: Exp DIM2 -> Exp DIM2 | |
swap2 ind = let (Z :.i2 :.i1) = unlift ind in | |
lift (Z :. (i1::EI) :. (i2::EI)) | |
---------------------------------------------------------------------------------------------------- | |
-- Let's print matrices nicely. | |
padleft n str | length str >= n = str | |
padleft n str | otherwise = P.take (n - length str) (repeat ' ') ++ str | |
class NiceShow a where | |
pp :: a -> String | |
instance Show a => NiceShow (Array DIM1 a) where | |
pp arr = | |
capends$ concat$ | |
intersperse " " $ | |
P.map (padleft maxpad) ls | |
where | |
ls = P.map show $ toList arr | |
maxpad = maximum$ P.map length ls | |
capends x = "| "++x++" |" | |
-- This could be much more efficient: | |
instance Show a => NiceShow (Array DIM2 a) where | |
pp arr = concat $ | |
intersperse "\n" $ | |
P.map (capends . | |
concat . | |
intersperse " " . | |
P.map (padleft maxpad)) | |
rowls | |
where (Z :. rows :. cols) = arrayShape arr | |
ls = P.map show $ toList arr | |
maxpad = maximum$ P.map length ls | |
rowls = splitEvery cols ls | |
---------------------------------------------------------------------------------------------------- | |
-- General Utility functions: | |
---------------------------------------------------------------------------------------------------- | |
-- Project a subset of the columns (second dim) from a two dimensional matrix. | |
-- | |
-- @selectRows sl xs@ is similar to xs(sl,:) in Matlab. | |
selectRows :: Elt e => Acc (Vector Int) -> Acc (Array DIM2 e) -> Acc (Array DIM2 e) | |
selectRows sl xs = | |
let Z :. rows = unlift $ shape sl | |
Z :. _ :. cols = unlift $ shape xs :: Z :. Exp Int :. Exp Int | |
in | |
backpermute | |
(index2 rows cols) | |
(\ix -> let Z :. j :. i = unlift ix in index2 (sl ! index1 j) i) | |
xs | |
-- Filter -- from accelerate-examples | |
-- ------ | |
filterAcc :: Elt a | |
=> (Exp a -> Exp Bool) | |
-> Acc (Vector a) | |
-> Acc (Vector a) | |
filterAcc p arr | |
= let -- arr = A.use vec | |
flags = A.map (boolToInt . p) arr | |
(targetIdx, len) = A.scanl' (+) 0 flags | |
arr' = A.backpermute (index1 $ the len) id arr | |
in | |
A.permute const arr' (\ix -> flags!ix ==* 0 ? (ignore, index1 $ targetIdx!ix)) arr | |
-- FIXME: This is abusing 'permute' in that the first two arguments are | |
-- only justified because we know the permutation function will | |
-- write to each location in the target exactly once. | |
-- Instead, we should have a primitive that directly encodes the | |
-- compaction pattern of the permutation function. | |
-- Create a vector containing integers in the range [0,n) | |
-- iota :: (Elt n, IsNum n) => Exp Int -> Acc (Vector n) | |
iota :: Exp Int -> Acc (Vector Int) | |
iota n = A.generate (index1 n) (A.fromIntegral . unindex1) | |
-- Perform a scalar/array operation by lifting an array/array binary | |
-- operator. | |
liftScalar :: (Shape sh, Elt e) | |
=> (Acc (Array sh e) -> Acc (Array sh e) -> Acc (Array sh e)) | |
-> Exp e -> Acc (Array sh e) -> Acc (Array sh e) | |
liftScalar op left right = op left' right | |
where | |
-- I can't figure out how to use replicate here. This is a spurious data dependency introduced by map: | |
left' = A.map (\_ -> left) right | |
-- left' = A.replicate (shape right) (A.unit left) | |
-- Same but flipped: | |
liftScalarR :: (Shape sh, Elt e) | |
=> (Acc (Array sh e) -> Acc (Array sh e) -> Acc (Array sh e)) | |
-> Acc (Array sh e) -> Exp e -> Acc (Array sh e) | |
liftScalarR op = flip (liftScalar (flip op)) | |
-------------------------------------------------------------------------------- | |
-- Elementwise operation matching matlab: | |
infixl 7 ./ | |
infixl 7 .* | |
infixl 6 .+ | |
infixl 6 .- | |
a ./ b = A.zipWith (/) a b | |
a .* b = A.zipWith (*) a b | |
a .+ b = A.zipWith (+) a b | |
a .- b = A.zipWith (-) a b | |
-------------------------------------------------------------------------------- | |
small = test_anova1 6 3 | |
big = test_anova1 300 256 | |
main = small | |
---------------------------------------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment