Skip to content

Instantly share code, notes, and snippets.

@fgaray
Created June 8, 2017 04:45
Show Gist options
  • Save fgaray/727f112d03f313de4c621a27f5012d15 to your computer and use it in GitHub Desktop.
Save fgaray/727f112d03f313de4c621a27f5012d15 to your computer and use it in GitHub Desktop.
SIMD eDSL in Haskell
{-# LANGUAGE GADTs #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
module SIMD where
import Data.List hiding (transpose)
import Data.List.Split
import Control.Monad (liftM2, liftM4, void)
import Data.SBV
import Test.QuickCheck
idempotencia xs = sort (sort xs) == sort xs
data Vector4 a = Vector4 a a a a
deriving (Eq, Show)
instance (EqSymbolic a) => EqSymbolic (Vector4 a) where
(Vector4 a b c d) .== (Vector4 a' b' c' d') = a .== a' &&& b .== b' &&& c .== c' &&& d .== d'
instance (Mergeable a) => Mergeable (Vector4 a) where
symbolicMerge b sb (Vector4 a0 a1 a2 a3) (Vector4 b0 b1 b2 b3) =
let (a', b', c', d') = symbolicMerge b sb (a0, a1, a2, a3) (b0, b1, b2, b3)
in Vector4 a' b' c' d'
lift128i :: (a -> a -> a -> a -> (a, a, a, a)) -> Vector4 a -> Vector4 a
lift128i f (Vector4 a b c d) =
let (a', b', c', d') = f a b c d
in Vector4 a' b' c' d'
lift128i2 :: ((a, a, a, a) -> (a, a, a, a) -> (a, a, a, a)) -> Vector4 a -> Vector4 a -> Vector4 a
lift128i2 f (Vector4 a b c d) (Vector4 a' b' c' d') =
let (a'', b'', c'', d'') = f (a, b, c, d) (a', b', c', d')
in Vector4 a'' b'' c'' d''
add128i :: (Num a) => Vector4 a -> Vector4 a -> Vector4 a
add128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (a + a') (b + b') (c + c') (d + d')
-- (1, 3, 3, 0)
--
-- [1, 2, 3, 4] [0.1, 0.2, 0.3, 0.4]
--
-- [1, 4, 0.4, 0.1]
--
shuffle128i :: Vector4 a -> Vector4 a -> (Int, Int, Int, Int) -> Vector4 a
shuffle128i v1 v2 (x, y, z, w) = Vector4 (take w v1) (take z v1) (take y v2) (take x v2)
where
take :: Int -> Vector4 a -> a
take 0 (Vector4 x _ _ _) = x
take 1 (Vector4 _ x _ _) = x
take 2 (Vector4 _ _ x _) = x
take 3 (Vector4 _ _ _ x) = x
moveldup128i :: Vector4 a -> Vector4 a
moveldup128i (Vector4 a _ c _) = Vector4 a a c c
movehdup128i :: Vector4 a -> Vector4 a
movehdup128i (Vector4 _ b _ d) = Vector4 b b d d
addsubp128i :: (Num a) => Vector4 a -> Vector4 a -> Vector4 a
addsubp128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (a - a') (b + b') (c - c') (d + d')
mul128i :: (Num a) => Vector4 a -> Vector4 a -> Vector4 a
mul128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (a * a') (b * b') (c * c') (d * d')
min128i :: (Num a, OrdSymbolic a) => Vector4 a -> Vector4 a -> Vector4 a
min128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (smin a a') (smin b b') (smin c c') (smin d d')
max128i :: (Num a, OrdSymbolic a) => Vector4 a -> Vector4 a -> Vector4 a
max128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (smax a a') (smax b b') (smax c c') (smax d d')
{-min128i :: (Num a, Ord a) => Vector4 a -> Vector4 a -> Vector4 a-}
{-min128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (min a a') (min b b') (min c c') (min d d')-}
{-max128i :: (Num a, Ord a) => Vector4 a -> Vector4 a -> Vector4 a-}
{-max128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (max a a') (max b b') (max c c') (max d d')-}
genVectorInt :: SInt32 -> SInt32 -> SInt32 -> SInt32 -> Vector4 SInt32
genVectorInt = Vector4
genVectorIntQC :: Int -> Int -> Int -> Int -> Vector4 Int
genVectorIntQC = Vector4
test_add128 = prove f
where
f a b c d = add128i (genVectorInt a b c d) (genVectorInt a b c d) .== genVectorInt (a + a) (b + b) (c + c) (d + d)
test_shuffle128 a b c d a' b' c' d' = shuffle128i v1 v2 (1, 3, 3, 0) .== genVectorInt a d d' b'
where
v1 = genVectorInt a b c d
v2 = genVectorInt a' b' c' d'
test_shuffle128Idem a b c d = shuffle128i v1 v1 (0, 0, 0, 0) .== genVectorInt a a a a
where
v1 = genVectorInt a b c d
test_addsub a b c d a' b' c' d' = addsubp128i v1 v2 .== genVectorInt (a - a') (b + b') (c - c') (d + d')
where
v1 = genVectorInt a b c d
v2 = genVectorInt a' b' c' d'
-- prove profe
profe a b c d a' b' c' d' = programa v1 v2 .== check v1 v2
where
v1 = genVectorInt a b c d
v2 = genVectorInt a' b' c' d'
programa a b =
let a1 = moveldup128i a
a2 = movehdup128i a
b1 = mul128i a1 b
b2 = mul128i a2 b
c = shuffle128i b2 b2 (2, 3, 0, 1)
in addsubp128i b1 c
check a b = lift128i2 (\(a, b, c, d) (a', b', c', d') -> (real a b a' b', img a b a' b', real c d c' d', img c d c' d')) a b
where
real ar ai br bi = ar * br - ai * bi
img ar ai br bi = ar * bi + ai * br
sort4MinMax :: (OrdSymbolic a, Num a) => Vector4 a -> Vector4 a
sort4MinMax v =
let v2 = shuffle128i v v (1, 0, 3, 2)
v3_min = min128i v2 v
v3_max = max128i v2 v
v3 = shuffle128i v3_max v3_min (3, 2, 1, 0)
v3' = shuffle128i v3 v3 (2, 3, 0, 1)
v4_min = min128i v3' v3
v4_max = max128i v3' v3
v4_max_min = shuffle128i v4_max v4_min (3, 1, 2, 0)
v5 = shuffle128i v4_max_min v4_max_min (3, 1, 2, 0)
v5' = shuffle128i v5 v5 (3, 1, 2, 3)
v6_min = min128i v5' v5
v6_max = max128i v5' v5
v6 = shuffle128i v6_min v6_max (0, 1, 2, 3)
in v6
testSort4MinMax a b c d =
let (Vector4 a' b' c' d') = sort4MinMax v
in a' .<= b' &&& b' .<= c' &&& c' .<= d'
where
v = genVectorInt a b c d
sort4MinMaxReverse :: (OrdSymbolic a, Num a) => Vector4 a -> Vector4 a
sort4MinMaxReverse v =
let v2 = shuffle128i v v (1, 0, 3, 2)
v3_min = min128i v2 v
v3_max = max128i v2 v
v3 = shuffle128i v3_max v3_min (3, 2, 1, 0)
v3' = shuffle128i v3 v3 (2, 3, 0, 1)
v4_min = min128i v3' v3
v4_max = max128i v3' v3
v4_max_min = shuffle128i v4_max v4_min (3, 1, 2, 0)
v5 = shuffle128i v4_max_min v4_max_min (3, 1, 2, 0)
v5' = shuffle128i v5 v5 (3, 1, 2, 3)
v6_min = min128i v5' v5
v6_max = max128i v5' v5
v6 = shuffle128i v6_max v6_min (3, 2, 1, 0)
in v6
testSort4MinMaxReverse a b c d =
let (Vector4 a' b' c' d') = sort4MinMaxReverse v
in a' .>= b' &&& b' .>= c' &&& c' .>= d'
where
v = genVectorInt a b c d
--bitonicSort :: (OrdSymbolic a, Num a) => Vector4 a -> Vector4 a -> (Vector4 a, Vector4 a)
bitonicSort v1 v2 =
let v1' = shuffle128i v1 v1 (3, 1, 2, 0)
v2' = shuffle128i v2 v2 (3, 1, 2, 0)
v_e1_min = min128i v1' v2'
v_e1_max = max128i v1' v2'
v_e1_min' = shuffle128i v_e1_min v_e1_max (0, 2, 2, 0)
v_e1_max' = shuffle128i v_e1_min v_e1_max (3, 1, 3, 1)
v_e1_min'' = shuffle128i v_e1_min' v_e1_min' (2, 1, 3, 0)
v_e1_max'' = shuffle128i v_e1_max' v_e1_max' (3, 1, 2, 0) -- probado
v_e2_min = min128i v_e1_min'' v_e1_max''
v_e2_max = max128i v_e1_min'' v_e1_max''
v_e2_min' = shuffle128i v_e2_min v_e2_max (1, 0, 1, 0)
v_e2_max' = shuffle128i v_e2_min v_e2_max (3, 2, 3, 2)
v_e2_min'' = shuffle128i v_e2_min' v_e2_min' (3, 1, 2, 0)
v_e2_max'' = shuffle128i v_e2_max' v_e2_max' (3, 1, 2, 0)
v_e3_min = min128i v_e2_min'' v_e2_max''
v_e3_max = max128i v_e2_min'' v_e2_max''
b2_min' = shuffle128i v_e3_min v_e3_max (1, 0, 1, 0)
b2_max' = shuffle128i v_e3_min v_e3_max (3, 2, 3, 2)
b2_min'' = shuffle128i b2_min' b2_min' (3, 1, 2, 0)
b2_max'' = shuffle128i b2_max' b2_max' (3, 1, 2, 0)
in (b2_min'', b2_max'')
testBitonicSort a b c d e f g h =
let ((Vector4 a' b' c' d'), (Vector4 e' f' g' h')) = bitonicSort v1 v2
in a' .<= b' &&& b' .<= c' &&& c' .<= d' &&& d' .<= e' &&& f' .<= g' &&& g' .<= h'
where
v1 = sort4MinMax (genVectorInt a b c d)
v2 = sort4MinMaxReverse (genVectorInt e f g h)
-- El primer shuffle grande del bitonic
shuffle1 v_e1_min v_e1_max =
let v_e1_min' = shuffle128i v_e1_min v_e1_max (0, 2, 2, 0)
v_e1_max' = shuffle128i v_e1_min v_e1_max (3, 1, 3, 1)
v_e1_min'' = shuffle128i v_e1_min' v_e1_min' (2, 1, 3, 0)
v_e1_max'' = shuffle128i v_e1_max' v_e1_max' (3, 1, 2, 0)
in (v_e1_min'', v_e1_max'')
testShuffle a b c d e f g h =
let ((Vector4 a' b' c' d'), (Vector4 e' f' g' h')) = shuffle1 v1 v2
in a' .== a &&& b .== e' &&& c .== c' &&& d .== g' &&& e .== b' &&& f .== f' &&& g .== d' &&& h .== h'
where
v1 = genVectorInt a b c d
v2 = genVectorInt e f g h
-- El segundo shuffle grande el bitonic
shuffle2 v_e2_min v_e2_max =
let v_e2_min' = shuffle128i v_e2_min v_e2_max (1, 0, 1, 0)
v_e2_max' = shuffle128i v_e2_min v_e2_max (3, 2, 3, 2)
v_e2_min'' = shuffle128i v_e2_min' v_e2_min' (3, 1, 2, 0)
v_e2_max'' = shuffle128i v_e2_max' v_e2_max' (3, 1, 2, 0)
in (v_e2_min'', v_e2_max'')
testShuffle2 a b c d e f g h =
let ((Vector4 a' b' c' d'), (Vector4 e' f' g' h')) = shuffle2 v1 v2
in a' .== a &&& b' .== e &&& c' .== b &&& d' .== f &&& e' .== c &&& f' .== g &&& g' .== d &&& h' .== h
{-in a' .== a &&& b' .== b -- &&& c' .== e &&& d' .== f &&& e' .== c &&& f' .== d &&& g' .== g &&& h' .== h-}
where
v1 = genVectorInt a b c d
v2 = genVectorInt e f g h
inRegisterP1 a b c d =
let a' = min128i a c
b' = min128i b d
c' = max128i a c
d' = max128i b d
a'' = min128i a' b'
b'' = max128i a' b'
c'' = min128i c' d'
d'' = max128i c' d'
b''' = min128i b'' c''
c''' = max128i b'' c''
in (a'', b''', c''', d'')
transpose (a, b, c, d) =
let (Vector4 a0 a1 a2 a3) = a
(Vector4 b0 b1 b2 b3) = b
(Vector4 c0 c1 c2 c3) = c
(Vector4 d0 d1 d2 d3) = d
a' = Vector4 a0 b0 c0 d0
b' = Vector4 a1 b1 c1 d1
c' = Vector4 a2 b2 c2 d2
d' = Vector4 a3 b3 c3 d3
in (a', b', c', d')
inRegisterP2 (a, b, c, d) =
let (a', b') = bitonicSort a (reverse_register b)
(c', d') = bitonicSort c (reverse_register d)
in ((a', b'), (c', d'))
testInRegister a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 d0 d1 d2 d3 =
let ((Vector4 a0' a1' a2' a3', Vector4 b0' b1' b2' b3'), (Vector4 c0' c1' c2' c3', Vector4 d0' d1' d2' d3')) = inRegisterP2 . transpose $ inRegisterP1 v1 v2 v3 v4
in a0' .<= a1' &&& a1' .<= a2' &&& a2' .<= a3' &&& a3' .<= b0' &&&
b0' .<= b1' &&& b1' .<= b2' &&& b2' .<= b3' &&&
c0' .<= c1' &&& c1' .<= c2' &&& c2' .<= c3' &&& c3' .<= d0' &&&
d0' .<= d1' &&& d1' .<= d2' &&& d2' .<= d3'
where
v1 = genVectorInt a0 a1 a2 a3
v2 = genVectorInt b0 b1 b2 b3
v3 = genVectorInt c0 c1 c2 c3
v4 = genVectorInt d0 d1 d2 d3
reverse_register (Vector4 a b c d) = Vector4 d c b a
merge_simd ((v1, v2), (v3, v4)) =
let (o1, o2) = bitonicSort v1 (reverse_register v3)
(Vector4 a _ _ _) = v2
(Vector4 b _ _ _) = v4
(o1', o1'', o2'') = ite (a .<= b) (
let (o1', o2') = bitonicSort o2 (reverse_register v2)
(o1'', o2'') = bitonicSort o2' (reverse_register v4)
in (o1', o1'', o2''))
(let (o1', o2') = bitonicSort o2 (reverse_register v4)
(o1'', o2'') = bitonicSort o2' (reverse_register v2)
in (o1', o1'', o2''))
in (o1, o1', o1'', o2'')
{-merge_simd2 ((v1, v2), (v3, v4)) =-}
{-let (o1, o2) = bitonicSort v1 (reverse_register v3)-}
{-max = max128i v2 v4-}
{-min = min128i v2 v4-}
{-(o1', o2') = bitonicSort o2 (reverse_register min)-}
{-(o1'', o2'') = bitonicSort o2' (reverse_register max)-}
{-in (o1, o1', o1'', o2'')-}
testMergeSimd a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 d0 d1 d2 d3 =
let (Vector4 a0' a1' a2' a3', Vector4 b0' b1' b2' b3', Vector4 c0' c1' c2' c3', Vector4 d0' d1' d2' d3') = merge_simd . inRegisterP2 . transpose $ inRegisterP1 v1 v2 v3 v4
in a0' .<= a1' &&& a1' .<= a2' &&& a2' .<= a3' &&& a3' .<= b0' &&&
b0' .<= b1' &&& b1' .<= b2' &&& b2' .<= b3' &&& b3' .<= c0' &&&
c0' .<= c1' &&& c1' .<= c2' &&& c2' .<= c3' &&& c3' .<= d0' &&&
d0' .<= d1' &&& d1' .<= d2' &&& d2' .<= d3'
where
v1 = genVectorInt a0 a1 a2 a3
v2 = genVectorInt b0 b1 b2 b3
v3 = genVectorInt c0 c1 c2 c3
v4 = genVectorInt d0 d1 d2 d3
{-test_all = do-}
{-void $ testSort4MinMax-}
{-void $ testSort4MinMaxReverse-}
{-void $ testBitonicSort-}
{-void $ testInRegister-}
{---testMergeSimd-}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment