Skip to content

@gregorycollins /bench.hs forked from snoyberg/bench.hs
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Lower-casing a ByteString
{-# LANGUAGE OverloadedStrings, MagicHash, UnboxedTuples, BangPatterns #-}
import Control.Monad (replicateM_)
import Criterion.Main (defaultMain, bench, whnfIO)
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import Data.ByteString.Unsafe (unsafePackAddressLen, unsafeUseAsCStringLen)
import qualified Data.Char
import Data.Char (ord)
import qualified Data.Word8
import System.IO.Unsafe (unsafePerformIO)
import Foreign.Storable (peekByteOff)
import Foreign.Ptr (Ptr)
import Foreign.C.String (CStringLen)
import GHC.Prim
( leWord#, Word#, plusWord#, State#, RealWorld, Addr#
, writeWord8Array#, (==#), byteArrayContents#, unsafeFreezeByteArray#
, newPinnedByteArray#
)
import GHC.Base (Int (..), unsafeChr)
import GHC.Word (Word (..), Word8 (..))
import GHC.Types (IO (..))
main :: IO ()
main = do
input <- S.readFile "bench.hs"
defaultMain
[ bench "Word8" $ b (S.length . S.map Data.Word8.toLower) input
, bench "Word8-local" $ b (S.length . S.map toLower8) input
, bench "Char8 toLowerC" $ b (S.length . S8.map toLowerC) input
]
where
b f i = whnfIO $ replicateM_ 1000 $ do
!_ <- return $! f i
return $! ()
toLower8 :: Word8 -> Word8
toLower8 w
| isUpper8 w = w + 32
| otherwise = w
{-# INLINE toLower8 #-}
isUpper8 :: Word8 -> Bool
isUpper8 w = 0x41 <= w && w <= 0x5a
|| 0xc0 <= w && w <= 0xd6
|| 0xd8 <= w && w <= 0xde
{-# INLINE isUpper8 #-}
bsToLower :: S.ByteString -> S.ByteString
bsToLower bs
| S.null bs = S.empty
| otherwise = unsafePerformIO $ unsafeUseAsCStringLen bs cstrlenToLower
cstrlenToLower :: CStringLen -> IO S.ByteString
cstrlenToLower (input, !len'@(I# len)) = IO $ \state0 ->
case newPinnedByteArray# len state0 of
(# state1, mba #) ->
case loop mba 0 state1 of
state2 ->
case unsafeFreezeByteArray# mba state2 of
(# state3, ba #) ->
unsafePackAddressLen' len' (byteArrayContents# ba) state3
where
loop mba offset@(I# offset') stateA
| offset' ==# len = stateA
| otherwise =
case peek' input offset stateA of
(# stateB, W8# w #) ->
case writeWord8Array# mba offset' (toLowerU w) stateB of
stateC -> loop mba (offset + 1) stateC
unsafePackAddressLen' :: Int
-> Addr#
-> State# RealWorld
-> (# State# RealWorld, S.ByteString #)
unsafePackAddressLen' i a =
f
where
IO f = unsafePackAddressLen i a
peek' :: Ptr b -> Int -> State# RealWorld -> (# State# RealWorld, Word8 #)
peek' ptr offset =
f
where
IO f = peekByteOff ptr offset
toLowerU :: Word# -> Word#
toLowerU w
| isUpperU w = w `plusWord#` space
| otherwise = w
where
!(W8# space) = 0x20
isUpperU :: Word# -> Bool
isUpperU w =
(l1 `leWord#` w && w `leWord#` h1) ||
(l2 `leWord#` w && w `leWord#` h2) ||
(l3 `leWord#` w && w `leWord#` h3)
where
!(W# l1) = 0x41
!(W# h1) = 0x5a
!(W# l2) = 0xc0
!(W# h2) = 0xd6
!(W# l3) = 0xd8
!(W# h3) = 0xde
toLowerC :: Char -> Char
toLowerC w
| isUpperC w = unsafeChr $ ord w + 0x20
| otherwise = w
isUpperC :: Char -> Bool
isUpperC w =
(l1 <= w && w <= h1) ||
(l2 <= w && w <= h2) ||
(l3 <= w && w <= h3)
where
l1 = '\x41'
h1 = '\x5a'
l2 = '\xc0'
h2 = '\xd6'
l3 = '\xd8'
h3 = '\xde'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.