Skip to content

Instantly share code, notes, and snippets.

@gregorycollins
Forked from snoyberg/bench.hs
Created September 20, 2012 16:19
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gregorycollins/3756876 to your computer and use it in GitHub Desktop.
Save gregorycollins/3756876 to your computer and use it in GitHub Desktop.
Lower-casing a ByteString
{-# LANGUAGE OverloadedStrings, MagicHash, UnboxedTuples, BangPatterns #-}
import Control.Monad (replicateM_)
import Criterion.Main (defaultMain, bench, whnfIO)
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import Data.ByteString.Unsafe (unsafePackAddressLen, unsafeUseAsCStringLen)
import qualified Data.Char
import Data.Char (ord)
import qualified Data.Word8
import System.IO.Unsafe (unsafePerformIO)
import Foreign.Storable (peekByteOff)
import Foreign.Ptr (Ptr)
import Foreign.C.String (CStringLen)
import GHC.Prim
( leWord#, Word#, plusWord#, State#, RealWorld, Addr#
, writeWord8Array#, (==#), byteArrayContents#, unsafeFreezeByteArray#
, newPinnedByteArray#
)
import GHC.Base (Int (..), unsafeChr)
import GHC.Word (Word (..), Word8 (..))
import GHC.Types (IO (..))
main :: IO ()
main = do
input <- S.readFile "bench.hs"
defaultMain
[ bench "Word8" $ b (S.length . S.map Data.Word8.toLower) input
, bench "Word8-local" $ b (S.length . S.map toLower8) input
, bench "Char8 toLowerC" $ b (S.length . S8.map toLowerC) input
]
where
b f i = whnfIO $ replicateM_ 1000 $ do
!_ <- return $! f i
return $! ()
toLower8 :: Word8 -> Word8
toLower8 w
| isUpper8 w = w + 32
| otherwise = w
{-# INLINE toLower8 #-}
isUpper8 :: Word8 -> Bool
isUpper8 w = 0x41 <= w && w <= 0x5a
|| 0xc0 <= w && w <= 0xd6
|| 0xd8 <= w && w <= 0xde
{-# INLINE isUpper8 #-}
bsToLower :: S.ByteString -> S.ByteString
bsToLower bs
| S.null bs = S.empty
| otherwise = unsafePerformIO $ unsafeUseAsCStringLen bs cstrlenToLower
cstrlenToLower :: CStringLen -> IO S.ByteString
cstrlenToLower (input, !len'@(I# len)) = IO $ \state0 ->
case newPinnedByteArray# len state0 of
(# state1, mba #) ->
case loop mba 0 state1 of
state2 ->
case unsafeFreezeByteArray# mba state2 of
(# state3, ba #) ->
unsafePackAddressLen' len' (byteArrayContents# ba) state3
where
loop mba offset@(I# offset') stateA
| offset' ==# len = stateA
| otherwise =
case peek' input offset stateA of
(# stateB, W8# w #) ->
case writeWord8Array# mba offset' (toLowerU w) stateB of
stateC -> loop mba (offset + 1) stateC
unsafePackAddressLen' :: Int
-> Addr#
-> State# RealWorld
-> (# State# RealWorld, S.ByteString #)
unsafePackAddressLen' i a =
f
where
IO f = unsafePackAddressLen i a
peek' :: Ptr b -> Int -> State# RealWorld -> (# State# RealWorld, Word8 #)
peek' ptr offset =
f
where
IO f = peekByteOff ptr offset
toLowerU :: Word# -> Word#
toLowerU w
| isUpperU w = w `plusWord#` space
| otherwise = w
where
!(W8# space) = 0x20
isUpperU :: Word# -> Bool
isUpperU w =
(l1 `leWord#` w && w `leWord#` h1) ||
(l2 `leWord#` w && w `leWord#` h2) ||
(l3 `leWord#` w && w `leWord#` h3)
where
!(W# l1) = 0x41
!(W# h1) = 0x5a
!(W# l2) = 0xc0
!(W# h2) = 0xd6
!(W# l3) = 0xd8
!(W# h3) = 0xde
toLowerC :: Char -> Char
toLowerC w
| isUpperC w = unsafeChr $ ord w + 0x20
| otherwise = w
isUpperC :: Char -> Bool
isUpperC w =
(l1 <= w && w <= h1) ||
(l2 <= w && w <= h2) ||
(l3 <= w && w <= h3)
where
l1 = '\x41'
h1 = '\x5a'
l2 = '\xc0'
h2 = '\xd6'
l3 = '\xd8'
h3 = '\xde'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment