Last active
March 30, 2018 12:39
-
-
Save andrewthad/f7be4b3ac18c2c52be16d2e3c2bd6a97 to your computer and use it in GitHub Desktop.
Benchmark performance impact of removing start index from primitive vector
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE ScopedTypeVariables #-} | |
{-# LANGUAGE BangPatterns #-} | |
{-# LANGUAGE MagicHash #-} | |
{-# LANGUAGE UnboxedTuples #-} | |
{-# LANGUAGE RankNTypes #-} | |
-- If you are building with GHC 8.2.1, it is recommonded that | |
-- this module be built with: | |
-- ghc -O2 -fllvm -pgmlo opt-3.9 -pgmlc llc-3.9 prim_bench.hs | |
-- For other GHC versions, choose the appropriate versions of | |
-- the LLVM tools. | |
import Criterion.Main | |
import Data.Primitive.Types | |
import Control.Monad.Primitive | |
import GHC.Int (Int(..)) | |
import GHC.Prim | |
import Control.Monad.ST | |
import qualified Data.Vector.Primitive.Mutable as MPV | |
main :: IO () | |
main = defaultMain | |
[ bgroup "10K Elements" | |
[ bench "array" $ whnf runArr 10000 | |
, bench "vector" $ whnf runVector 10000 | |
] | |
, bgroup "40K Elements" | |
[ bench "array" $ whnf runArr 40000 | |
, bench "vector" $ whnf runVector 40000 | |
] | |
] | |
runArr :: Int -> Int | |
runArr theSize = runST $ do | |
arr <- newPrimArray theSize | |
let build !ix = if ix < theSize | |
then do | |
writePrimArray arr ix ix | |
build (ix + 1) | |
else return () | |
build 0 | |
let total !ix !s = if ix < theSize | |
then do | |
v <- readPrimArray arr ix | |
total (ix + 1) (s + v) | |
else return s | |
total 0 0 | |
runVector :: Int -> Int | |
runVector theSize = runST $ do | |
arr <- MPV.unsafeNew theSize | |
let build !ix = if ix < theSize | |
then do | |
MPV.unsafeWrite arr ix ix | |
build (ix + 1) | |
else return () | |
build 0 | |
let total !ix !s = if ix < theSize | |
then do | |
v <- MPV.unsafeRead arr ix | |
total (ix + 1) (s + v) | |
else return s | |
total 0 0 | |
-- | Mutable primitive arrays associated with a primitive state token | |
data MutablePrimArray s a = MutablePrimArray (MutableByteArray# s) | |
newPrimArray :: forall m a. (PrimMonad m, Prim a) => Int -> m (MutablePrimArray (PrimState m) a) | |
{-# INLINE newPrimArray #-} | |
newPrimArray (I# n#) | |
= primitive (\s# -> | |
case newByteArray# (n# *# sizeOf# (undefined :: a)) s# of | |
(# s'#, arr# #) -> (# s'#, MutablePrimArray arr# #) | |
) | |
readPrimArray :: (Prim a, PrimMonad m) => MutablePrimArray (PrimState m) a -> Int -> m a | |
{-# INLINE readPrimArray #-} | |
readPrimArray (MutablePrimArray arr#) (I# i#) | |
= primitive (readByteArray# arr# i#) | |
writePrimArray :: | |
(Prim a, PrimMonad m) | |
=> MutablePrimArray (PrimState m) a | |
-> Int | |
-> a | |
-> m () | |
{-# INLINE writePrimArray #-} | |
writePrimArray (MutablePrimArray arr#) (I# i#) x | |
= primitive_ (writeByteArray# arr# i# x) |
I've updated this to make the comparison more fair by using unsafeRead
and unsafeWrite
while testing Data.Vector.Primitive
. With the LLVM backend, the two implementations perform equally. With the NCG backend, PrimArray
wins by a 25% margin.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Performance on my machine:
This shows about a ten percent speedup. In this benchmark, two conditions hold: (a) all the data fits in cache, and (b) the array is accessed in a linearly, which is very cache-coherent. The ten-percent speedup probably only happens when both of these are true.