Created
June 8, 2017 04:45
-
-
Save fgaray/727f112d03f313de4c621a27f5012d15 to your computer and use it in GitHub Desktop.
SIMD eDSL in Haskell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE GADTs #-} | |
{-# LANGUAGE TypeSynonymInstances #-} | |
{-# LANGUAGE FlexibleInstances #-} | |
module SIMD where | |
import Data.List hiding (transpose) | |
import Data.List.Split | |
import Control.Monad (liftM2, liftM4, void) | |
import Data.SBV | |
import Test.QuickCheck | |
idempotencia xs = sort (sort xs) == sort xs | |
data Vector4 a = Vector4 a a a a | |
deriving (Eq, Show) | |
instance (EqSymbolic a) => EqSymbolic (Vector4 a) where | |
(Vector4 a b c d) .== (Vector4 a' b' c' d') = a .== a' &&& b .== b' &&& c .== c' &&& d .== d' | |
instance (Mergeable a) => Mergeable (Vector4 a) where | |
symbolicMerge b sb (Vector4 a0 a1 a2 a3) (Vector4 b0 b1 b2 b3) = | |
let (a', b', c', d') = symbolicMerge b sb (a0, a1, a2, a3) (b0, b1, b2, b3) | |
in Vector4 a' b' c' d' | |
lift128i :: (a -> a -> a -> a -> (a, a, a, a)) -> Vector4 a -> Vector4 a | |
lift128i f (Vector4 a b c d) = | |
let (a', b', c', d') = f a b c d | |
in Vector4 a' b' c' d' | |
lift128i2 :: ((a, a, a, a) -> (a, a, a, a) -> (a, a, a, a)) -> Vector4 a -> Vector4 a -> Vector4 a | |
lift128i2 f (Vector4 a b c d) (Vector4 a' b' c' d') = | |
let (a'', b'', c'', d'') = f (a, b, c, d) (a', b', c', d') | |
in Vector4 a'' b'' c'' d'' | |
add128i :: (Num a) => Vector4 a -> Vector4 a -> Vector4 a | |
add128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (a + a') (b + b') (c + c') (d + d') | |
-- (1, 3, 3, 0) | |
-- | |
-- [1, 2, 3, 4] [0.1, 0.2, 0.3, 0.4] | |
-- | |
-- [1, 4, 0.4, 0.1] | |
-- | |
shuffle128i :: Vector4 a -> Vector4 a -> (Int, Int, Int, Int) -> Vector4 a | |
shuffle128i v1 v2 (x, y, z, w) = Vector4 (take w v1) (take z v1) (take y v2) (take x v2) | |
where | |
take :: Int -> Vector4 a -> a | |
take 0 (Vector4 x _ _ _) = x | |
take 1 (Vector4 _ x _ _) = x | |
take 2 (Vector4 _ _ x _) = x | |
take 3 (Vector4 _ _ _ x) = x | |
moveldup128i :: Vector4 a -> Vector4 a | |
moveldup128i (Vector4 a _ c _) = Vector4 a a c c | |
movehdup128i :: Vector4 a -> Vector4 a | |
movehdup128i (Vector4 _ b _ d) = Vector4 b b d d | |
addsubp128i :: (Num a) => Vector4 a -> Vector4 a -> Vector4 a | |
addsubp128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (a - a') (b + b') (c - c') (d + d') | |
mul128i :: (Num a) => Vector4 a -> Vector4 a -> Vector4 a | |
mul128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (a * a') (b * b') (c * c') (d * d') | |
min128i :: (Num a, OrdSymbolic a) => Vector4 a -> Vector4 a -> Vector4 a | |
min128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (smin a a') (smin b b') (smin c c') (smin d d') | |
max128i :: (Num a, OrdSymbolic a) => Vector4 a -> Vector4 a -> Vector4 a | |
max128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (smax a a') (smax b b') (smax c c') (smax d d') | |
{-min128i :: (Num a, Ord a) => Vector4 a -> Vector4 a -> Vector4 a-} | |
{-min128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (min a a') (min b b') (min c c') (min d d')-} | |
{-max128i :: (Num a, Ord a) => Vector4 a -> Vector4 a -> Vector4 a-} | |
{-max128i (Vector4 a b c d) (Vector4 a' b' c' d') = Vector4 (max a a') (max b b') (max c c') (max d d')-} | |
genVectorInt :: SInt32 -> SInt32 -> SInt32 -> SInt32 -> Vector4 SInt32 | |
genVectorInt = Vector4 | |
genVectorIntQC :: Int -> Int -> Int -> Int -> Vector4 Int | |
genVectorIntQC = Vector4 | |
test_add128 = prove f | |
where | |
f a b c d = add128i (genVectorInt a b c d) (genVectorInt a b c d) .== genVectorInt (a + a) (b + b) (c + c) (d + d) | |
test_shuffle128 a b c d a' b' c' d' = shuffle128i v1 v2 (1, 3, 3, 0) .== genVectorInt a d d' b' | |
where | |
v1 = genVectorInt a b c d | |
v2 = genVectorInt a' b' c' d' | |
test_shuffle128Idem a b c d = shuffle128i v1 v1 (0, 0, 0, 0) .== genVectorInt a a a a | |
where | |
v1 = genVectorInt a b c d | |
test_addsub a b c d a' b' c' d' = addsubp128i v1 v2 .== genVectorInt (a - a') (b + b') (c - c') (d + d') | |
where | |
v1 = genVectorInt a b c d | |
v2 = genVectorInt a' b' c' d' | |
-- prove profe | |
profe a b c d a' b' c' d' = programa v1 v2 .== check v1 v2 | |
where | |
v1 = genVectorInt a b c d | |
v2 = genVectorInt a' b' c' d' | |
programa a b = | |
let a1 = moveldup128i a | |
a2 = movehdup128i a | |
b1 = mul128i a1 b | |
b2 = mul128i a2 b | |
c = shuffle128i b2 b2 (2, 3, 0, 1) | |
in addsubp128i b1 c | |
check a b = lift128i2 (\(a, b, c, d) (a', b', c', d') -> (real a b a' b', img a b a' b', real c d c' d', img c d c' d')) a b | |
where | |
real ar ai br bi = ar * br - ai * bi | |
img ar ai br bi = ar * bi + ai * br | |
sort4MinMax :: (OrdSymbolic a, Num a) => Vector4 a -> Vector4 a | |
sort4MinMax v = | |
let v2 = shuffle128i v v (1, 0, 3, 2) | |
v3_min = min128i v2 v | |
v3_max = max128i v2 v | |
v3 = shuffle128i v3_max v3_min (3, 2, 1, 0) | |
v3' = shuffle128i v3 v3 (2, 3, 0, 1) | |
v4_min = min128i v3' v3 | |
v4_max = max128i v3' v3 | |
v4_max_min = shuffle128i v4_max v4_min (3, 1, 2, 0) | |
v5 = shuffle128i v4_max_min v4_max_min (3, 1, 2, 0) | |
v5' = shuffle128i v5 v5 (3, 1, 2, 3) | |
v6_min = min128i v5' v5 | |
v6_max = max128i v5' v5 | |
v6 = shuffle128i v6_min v6_max (0, 1, 2, 3) | |
in v6 | |
testSort4MinMax a b c d = | |
let (Vector4 a' b' c' d') = sort4MinMax v | |
in a' .<= b' &&& b' .<= c' &&& c' .<= d' | |
where | |
v = genVectorInt a b c d | |
sort4MinMaxReverse :: (OrdSymbolic a, Num a) => Vector4 a -> Vector4 a | |
sort4MinMaxReverse v = | |
let v2 = shuffle128i v v (1, 0, 3, 2) | |
v3_min = min128i v2 v | |
v3_max = max128i v2 v | |
v3 = shuffle128i v3_max v3_min (3, 2, 1, 0) | |
v3' = shuffle128i v3 v3 (2, 3, 0, 1) | |
v4_min = min128i v3' v3 | |
v4_max = max128i v3' v3 | |
v4_max_min = shuffle128i v4_max v4_min (3, 1, 2, 0) | |
v5 = shuffle128i v4_max_min v4_max_min (3, 1, 2, 0) | |
v5' = shuffle128i v5 v5 (3, 1, 2, 3) | |
v6_min = min128i v5' v5 | |
v6_max = max128i v5' v5 | |
v6 = shuffle128i v6_max v6_min (3, 2, 1, 0) | |
in v6 | |
testSort4MinMaxReverse a b c d = | |
let (Vector4 a' b' c' d') = sort4MinMaxReverse v | |
in a' .>= b' &&& b' .>= c' &&& c' .>= d' | |
where | |
v = genVectorInt a b c d | |
--bitonicSort :: (OrdSymbolic a, Num a) => Vector4 a -> Vector4 a -> (Vector4 a, Vector4 a) | |
bitonicSort v1 v2 = | |
let v1' = shuffle128i v1 v1 (3, 1, 2, 0) | |
v2' = shuffle128i v2 v2 (3, 1, 2, 0) | |
v_e1_min = min128i v1' v2' | |
v_e1_max = max128i v1' v2' | |
v_e1_min' = shuffle128i v_e1_min v_e1_max (0, 2, 2, 0) | |
v_e1_max' = shuffle128i v_e1_min v_e1_max (3, 1, 3, 1) | |
v_e1_min'' = shuffle128i v_e1_min' v_e1_min' (2, 1, 3, 0) | |
v_e1_max'' = shuffle128i v_e1_max' v_e1_max' (3, 1, 2, 0) -- probado | |
v_e2_min = min128i v_e1_min'' v_e1_max'' | |
v_e2_max = max128i v_e1_min'' v_e1_max'' | |
v_e2_min' = shuffle128i v_e2_min v_e2_max (1, 0, 1, 0) | |
v_e2_max' = shuffle128i v_e2_min v_e2_max (3, 2, 3, 2) | |
v_e2_min'' = shuffle128i v_e2_min' v_e2_min' (3, 1, 2, 0) | |
v_e2_max'' = shuffle128i v_e2_max' v_e2_max' (3, 1, 2, 0) | |
v_e3_min = min128i v_e2_min'' v_e2_max'' | |
v_e3_max = max128i v_e2_min'' v_e2_max'' | |
b2_min' = shuffle128i v_e3_min v_e3_max (1, 0, 1, 0) | |
b2_max' = shuffle128i v_e3_min v_e3_max (3, 2, 3, 2) | |
b2_min'' = shuffle128i b2_min' b2_min' (3, 1, 2, 0) | |
b2_max'' = shuffle128i b2_max' b2_max' (3, 1, 2, 0) | |
in (b2_min'', b2_max'') | |
testBitonicSort a b c d e f g h = | |
let ((Vector4 a' b' c' d'), (Vector4 e' f' g' h')) = bitonicSort v1 v2 | |
in a' .<= b' &&& b' .<= c' &&& c' .<= d' &&& d' .<= e' &&& f' .<= g' &&& g' .<= h' | |
where | |
v1 = sort4MinMax (genVectorInt a b c d) | |
v2 = sort4MinMaxReverse (genVectorInt e f g h) | |
-- El primer shuffle grande del bitonic | |
shuffle1 v_e1_min v_e1_max = | |
let v_e1_min' = shuffle128i v_e1_min v_e1_max (0, 2, 2, 0) | |
v_e1_max' = shuffle128i v_e1_min v_e1_max (3, 1, 3, 1) | |
v_e1_min'' = shuffle128i v_e1_min' v_e1_min' (2, 1, 3, 0) | |
v_e1_max'' = shuffle128i v_e1_max' v_e1_max' (3, 1, 2, 0) | |
in (v_e1_min'', v_e1_max'') | |
testShuffle a b c d e f g h = | |
let ((Vector4 a' b' c' d'), (Vector4 e' f' g' h')) = shuffle1 v1 v2 | |
in a' .== a &&& b .== e' &&& c .== c' &&& d .== g' &&& e .== b' &&& f .== f' &&& g .== d' &&& h .== h' | |
where | |
v1 = genVectorInt a b c d | |
v2 = genVectorInt e f g h | |
-- El segundo shuffle grande el bitonic | |
shuffle2 v_e2_min v_e2_max = | |
let v_e2_min' = shuffle128i v_e2_min v_e2_max (1, 0, 1, 0) | |
v_e2_max' = shuffle128i v_e2_min v_e2_max (3, 2, 3, 2) | |
v_e2_min'' = shuffle128i v_e2_min' v_e2_min' (3, 1, 2, 0) | |
v_e2_max'' = shuffle128i v_e2_max' v_e2_max' (3, 1, 2, 0) | |
in (v_e2_min'', v_e2_max'') | |
testShuffle2 a b c d e f g h = | |
let ((Vector4 a' b' c' d'), (Vector4 e' f' g' h')) = shuffle2 v1 v2 | |
in a' .== a &&& b' .== e &&& c' .== b &&& d' .== f &&& e' .== c &&& f' .== g &&& g' .== d &&& h' .== h | |
{-in a' .== a &&& b' .== b -- &&& c' .== e &&& d' .== f &&& e' .== c &&& f' .== d &&& g' .== g &&& h' .== h-} | |
where | |
v1 = genVectorInt a b c d | |
v2 = genVectorInt e f g h | |
inRegisterP1 a b c d = | |
let a' = min128i a c | |
b' = min128i b d | |
c' = max128i a c | |
d' = max128i b d | |
a'' = min128i a' b' | |
b'' = max128i a' b' | |
c'' = min128i c' d' | |
d'' = max128i c' d' | |
b''' = min128i b'' c'' | |
c''' = max128i b'' c'' | |
in (a'', b''', c''', d'') | |
transpose (a, b, c, d) = | |
let (Vector4 a0 a1 a2 a3) = a | |
(Vector4 b0 b1 b2 b3) = b | |
(Vector4 c0 c1 c2 c3) = c | |
(Vector4 d0 d1 d2 d3) = d | |
a' = Vector4 a0 b0 c0 d0 | |
b' = Vector4 a1 b1 c1 d1 | |
c' = Vector4 a2 b2 c2 d2 | |
d' = Vector4 a3 b3 c3 d3 | |
in (a', b', c', d') | |
inRegisterP2 (a, b, c, d) = | |
let (a', b') = bitonicSort a (reverse_register b) | |
(c', d') = bitonicSort c (reverse_register d) | |
in ((a', b'), (c', d')) | |
testInRegister a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 d0 d1 d2 d3 = | |
let ((Vector4 a0' a1' a2' a3', Vector4 b0' b1' b2' b3'), (Vector4 c0' c1' c2' c3', Vector4 d0' d1' d2' d3')) = inRegisterP2 . transpose $ inRegisterP1 v1 v2 v3 v4 | |
in a0' .<= a1' &&& a1' .<= a2' &&& a2' .<= a3' &&& a3' .<= b0' &&& | |
b0' .<= b1' &&& b1' .<= b2' &&& b2' .<= b3' &&& | |
c0' .<= c1' &&& c1' .<= c2' &&& c2' .<= c3' &&& c3' .<= d0' &&& | |
d0' .<= d1' &&& d1' .<= d2' &&& d2' .<= d3' | |
where | |
v1 = genVectorInt a0 a1 a2 a3 | |
v2 = genVectorInt b0 b1 b2 b3 | |
v3 = genVectorInt c0 c1 c2 c3 | |
v4 = genVectorInt d0 d1 d2 d3 | |
reverse_register (Vector4 a b c d) = Vector4 d c b a | |
merge_simd ((v1, v2), (v3, v4)) = | |
let (o1, o2) = bitonicSort v1 (reverse_register v3) | |
(Vector4 a _ _ _) = v2 | |
(Vector4 b _ _ _) = v4 | |
(o1', o1'', o2'') = ite (a .<= b) ( | |
let (o1', o2') = bitonicSort o2 (reverse_register v2) | |
(o1'', o2'') = bitonicSort o2' (reverse_register v4) | |
in (o1', o1'', o2'')) | |
(let (o1', o2') = bitonicSort o2 (reverse_register v4) | |
(o1'', o2'') = bitonicSort o2' (reverse_register v2) | |
in (o1', o1'', o2'')) | |
in (o1, o1', o1'', o2'') | |
{-merge_simd2 ((v1, v2), (v3, v4)) =-} | |
{-let (o1, o2) = bitonicSort v1 (reverse_register v3)-} | |
{-max = max128i v2 v4-} | |
{-min = min128i v2 v4-} | |
{-(o1', o2') = bitonicSort o2 (reverse_register min)-} | |
{-(o1'', o2'') = bitonicSort o2' (reverse_register max)-} | |
{-in (o1, o1', o1'', o2'')-} | |
testMergeSimd a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 d0 d1 d2 d3 = | |
let (Vector4 a0' a1' a2' a3', Vector4 b0' b1' b2' b3', Vector4 c0' c1' c2' c3', Vector4 d0' d1' d2' d3') = merge_simd . inRegisterP2 . transpose $ inRegisterP1 v1 v2 v3 v4 | |
in a0' .<= a1' &&& a1' .<= a2' &&& a2' .<= a3' &&& a3' .<= b0' &&& | |
b0' .<= b1' &&& b1' .<= b2' &&& b2' .<= b3' &&& b3' .<= c0' &&& | |
c0' .<= c1' &&& c1' .<= c2' &&& c2' .<= c3' &&& c3' .<= d0' &&& | |
d0' .<= d1' &&& d1' .<= d2' &&& d2' .<= d3' | |
where | |
v1 = genVectorInt a0 a1 a2 a3 | |
v2 = genVectorInt b0 b1 b2 b3 | |
v3 = genVectorInt c0 c1 c2 c3 | |
v4 = genVectorInt d0 d1 d2 d3 | |
{-test_all = do-} | |
{-void $ testSort4MinMax-} | |
{-void $ testSort4MinMaxReverse-} | |
{-void $ testBitonicSort-} | |
{-void $ testInRegister-} | |
{---testMergeSimd-} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment