Demonstration of SIMD performance gap
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as B8
import Criterion.Main
import Criterion.Types (Config (..))
import qualified Mystery
input :: Int -> ByteString
input x =
B8.replicate x 'Z'
main :: IO ()
main = do
let cfg = defaultConfig {
reportFile = Just "bench.html"
, csvFile = Just "bench.csv"
bmN = nf Mystery.toLowerNative . input
bmF = nf Mystery.toLowerFFI . input
defaultMainWith cfg [
bgroup "toLowerNative" [
bench "100" $ bmN 100
, bench "500" $ bmN 500
, bench "1000" $ bmN 1000
, bench "2000" $ bmN 2000
, bgroup "toLowerFFI" [
bench "100" $ bmF 100
, bench "500" $ bmF 500
, bench "1000" $ bmF 1000
, bench "2000" $ bmF 2000
void to_lower (char* str, int len) {
for (int i = 0; i < len; i++) {
str[i] |= 0x20;
name: mystery
synopsis: A performance murder mystery
author: Tim Humphries
build-type: Simple
cabal-version: >=1.10
default-language: Haskell2010
ghc-options: -Wall
base >= 4.9 && < 4.11
, bytestring >= 0.10.8 && < 0.11
c-sources: lower.c
benchmark bench
type: exitcode-stdio-1.0
main-is: Bench.hs
ghc-options: -Wall -threaded -O2
build-depends: base
, mystery
, bytestring
, criterion
{-# LANGUAGE ForeignFunctionInterface #-}
module Mystery where
import Data.Bits ((.|.))
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import Foreign.C.Types (CChar)
import Foreign.Ptr (Ptr)
import qualified System.IO.Unsafe as Unsafe
toLowerNative :: ByteString -> ByteString
toLowerNative = (.|. 0x20)
toLowerFFI :: ByteString -> ByteString
toLowerFFI bs =
Unsafe.unsafePerformIO . B.useAsCStringLen bs $ \(ptr, len) -> do
to_lower ptr len
B.packCStringLen (ptr, len)
foreign import ccall to_lower :: Ptr CChar -> Int -> IO ()
