public
Last active — forked from snoyberg/bench.hs

Lower-casing a ByteString

  • Download Gist
bench.hs
Haskell
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
{-# LANGUAGE OverloadedStrings, MagicHash, UnboxedTuples, BangPatterns #-}
 
import Control.Monad (replicateM_)
import Criterion.Main (defaultMain, bench, whnfIO)
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import Data.ByteString.Unsafe (unsafePackAddressLen, unsafeUseAsCStringLen)
import qualified Data.Char
import Data.Char (ord)
import qualified Data.Word8
import System.IO.Unsafe (unsafePerformIO)
import Foreign.Storable (peekByteOff)
import Foreign.Ptr (Ptr)
import Foreign.C.String (CStringLen)
 
import GHC.Prim
( leWord#, Word#, plusWord#, State#, RealWorld, Addr#
, writeWord8Array#, (==#), byteArrayContents#, unsafeFreezeByteArray#
, newPinnedByteArray#
)
import GHC.Base (Int (..), unsafeChr)
import GHC.Word (Word (..), Word8 (..))
import GHC.Types (IO (..))
 
main :: IO ()
main = do
input <- S.readFile "bench.hs"
defaultMain
[ bench "Word8" $ b (S.length . S.map Data.Word8.toLower) input
, bench "Word8-local" $ b (S.length . S.map toLower8) input
, bench "Char8 toLowerC" $ b (S.length . S8.map toLowerC) input
]
where
b f i = whnfIO $ replicateM_ 1000 $ do
!_ <- return $! f i
return $! ()
 
 
toLower8 :: Word8 -> Word8
toLower8 w
| isUpper8 w = w + 32
| otherwise = w
{-# INLINE toLower8 #-}
 
isUpper8 :: Word8 -> Bool
isUpper8 w = 0x41 <= w && w <= 0x5a
|| 0xc0 <= w && w <= 0xd6
|| 0xd8 <= w && w <= 0xde
{-# INLINE isUpper8 #-}
 
 
bsToLower :: S.ByteString -> S.ByteString
bsToLower bs
| S.null bs = S.empty
| otherwise = unsafePerformIO $ unsafeUseAsCStringLen bs cstrlenToLower
 
cstrlenToLower :: CStringLen -> IO S.ByteString
cstrlenToLower (input, !len'@(I# len)) = IO $ \state0 ->
case newPinnedByteArray# len state0 of
(# state1, mba #) ->
case loop mba 0 state1 of
state2 ->
case unsafeFreezeByteArray# mba state2 of
(# state3, ba #) ->
unsafePackAddressLen' len' (byteArrayContents# ba) state3
where
loop mba offset@(I# offset') stateA
| offset' ==# len = stateA
| otherwise =
case peek' input offset stateA of
(# stateB, W8# w #) ->
case writeWord8Array# mba offset' (toLowerU w) stateB of
stateC -> loop mba (offset + 1) stateC
 
unsafePackAddressLen' :: Int
-> Addr#
-> State# RealWorld
-> (# State# RealWorld, S.ByteString #)
unsafePackAddressLen' i a =
f
where
IO f = unsafePackAddressLen i a
 
peek' :: Ptr b -> Int -> State# RealWorld -> (# State# RealWorld, Word8 #)
peek' ptr offset =
f
where
IO f = peekByteOff ptr offset
 
toLowerU :: Word# -> Word#
toLowerU w
| isUpperU w = w `plusWord#` space
| otherwise = w
where
!(W8# space) = 0x20
 
isUpperU :: Word# -> Bool
isUpperU w =
(l1 `leWord#` w && w `leWord#` h1) ||
(l2 `leWord#` w && w `leWord#` h2) ||
(l3 `leWord#` w && w `leWord#` h3)
where
!(W# l1) = 0x41
!(W# h1) = 0x5a
!(W# l2) = 0xc0
!(W# h2) = 0xd6
!(W# l3) = 0xd8
!(W# h3) = 0xde
 
 
toLowerC :: Char -> Char
toLowerC w
| isUpperC w = unsafeChr $ ord w + 0x20
| otherwise = w
 
isUpperC :: Char -> Bool
isUpperC w =
(l1 <= w && w <= h1) ||
(l2 <= w && w <= h2) ||
(l3 <= w && w <= h3)
where
l1 = '\x41'
h1 = '\x5a'
l2 = '\xc0'
h2 = '\xd6'
l3 = '\xd8'
h3 = '\xde'

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.