public
Created

  • Download Gist
BenchIsSpace.hs
Haskell
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
{-# OPTIONS_GHC -Wall -fwarn-tabs #-}
{-# LANGUAGE ForeignFunctionInterface #-}
----------------------------------------------------------------
-- Modified by John MacFarlane from an earlier benchmark by
-- wren ng thornton.
 
-- Normalized benchmark results:
--
-- Compiled without optimization:
-- --------------------------------------------------------------
-- Input isSpace_DataChar isSpace_Pattern isSpace_Alt
-- --------------- ---------------- --------------- -----------
-- ascii text 1.0 0.54 0.17
-- greek text 1.0 0.57 0.71
-- haskell code 1.0 0.57 0.24
-- chars 0..255 1.0 0.54 0.39
-- all space chars 1.0 0.70 0.90
-- --------------------------------------------------------------
--
-- Compiled with -O2:
-- --------------------------------------------------------------
-- Input isSpace_DataChar isSpace_Pattern isSpace_Alt
-- --------------- ---------------- --------------- -----------
-- ascii text 1.0 0.93 0.40
-- greek text 1.0 0.98 0.99
-- haskell code 1.0 1.03 0.58
-- chars 0..255 1.0 0.92 0.62
-- all space chars 1.0 0.88 0.92
-- --------------------------------------------------------------
--
--- Conclusions: We can make isSpace considerably faster just by
-- using pattern matching instead of '=='. And we can optimize
-- further for common inputs by testing for common nonspace characters.
-- (The exceptions are inputs consisting entirely of non-ascii
-- characters and inputs consisting entirely of space characters.
-- But even in these cases, isSpace_Alt does better than
-- isSpace_DataChar.)
 
 
module Main (main) where
 
import qualified Data.Char as C
import Foreign.C.Types (CInt(..))
import Criterion (bench, bgroup, nf, Benchmark)
import Criterion.Main (defaultMain)
import Data.Char (isSpace)
 
----------------------------------------------------------------
-- N.B. \x9..\xD == "\t\n\v\f\r"
 
foreign import ccall unsafe "u_iswspace"
iswspace :: CInt -> CInt
 
-- | Verbatim version of 'Data.Char.isSpace' (i.e., 'GHC.Unicode.isSpace'
-- as of base-4.2.0.2).
isSpace_DataChar :: Char -> Bool
{-# INLINE isSpace_DataChar #-}
isSpace_DataChar c =
c == ' ' ||
c == '\t' ||
c == '\n' ||
c == '\r' ||
c == '\f' ||
c == '\v' ||
c == '\xa0' ||
iswspace (fromIntegral (C.ord c)) /= 0
 
isSpace_Alt :: Char -> Bool
{-# INLINE isSpace_Alt #-}
isSpace_Alt c | c > '\x20' && c < '\xa0' = False
| c == ' ' = True
| '\t' <= c && c <= '\r' = True
| c == '\xa0' = True
| otherwise = iswspace (fromIntegral (C.ord c)) /= 0
 
isSpace_Alt2 :: Char -> Bool
{-# INLINE isSpace_Alt2 #-}
isSpace_Alt2 c | c > '\x20' && c < '\xa0' = False
| c == ' ' = True
| c <= '\r' && '\t' <= c = True
| c == '\xa0' = True
| otherwise = iswspace (fromIntegral (C.ord c)) /= 0
 
isSpace_Alt3 :: Char -> Bool
{-# INLINE isSpace_Alt3 #-}
isSpace_Alt3 ' ' = True
isSpace_Alt3 c | c <= '\r' = '\t' <= c
| c == '\xA0' = True
| c <= '\xFF' = False
| otherwise = iswspace (fromIntegral (C.ord c)) /= 0
 
isSpace_Alt4 :: Char -> Bool
{-# INLINE isSpace_Alt4 #-}
isSpace_Alt4 ' ' = True
isSpace_Alt4 c | c <= '\r' = '\t' <= c
| c == '\xA0' = True
| c < '\x1680' = False
| otherwise = iswspace (fromIntegral (C.ord c)) /= 0
-- N.B., iswspace only returns true for \x20, \xA0, and things at or above \x1680
 
isSpace_Alt5 :: Char -> Bool
{-# INLINE isSpace_Alt5 #-}
isSpace_Alt5 ' ' = True
isSpace_Alt5 c | c <= '\r' = '\t' <= c
| c < '\x1680' = c == '\x20' || c == '\xA0'
| otherwise = iswspace (fromIntegral (C.ord c)) /= 0
 
 
isSpace_Pattern :: Char -> Bool
{-# INLINE isSpace_Pattern #-}
isSpace_Pattern c | c == ' ' = True
| '\t' <= c && c <= '\r' = True
| c == '\xa0' = True
| otherwise = iswspace (fromIntegral (C.ord c)) /= 0
 
----------------------------------------------------------------
 
sophocles :: String
sophocles = "Ἰοὺ ἰού· τὰ πάντʼ ἂν ἐξήκοι σαφῆ.\nὮ φῶς, τελευταῖόν σε προσϐλέψαιμι νῦν,\nὅστις πέφασμαι φύς τʼ ἀφʼ ὧν οὐ χρῆν, ξὺν οἷς τʼ\nοὐ χρῆν ὁμιλῶν, οὕς τέ μʼ οὐκ ἔδει κτανών.\nἸοὺ ἰού· τὰ πάντʼ ἂν ἐξήκοι σαφῆ.\nὮ φῶς, τελευταῖόν σε προσϐλέψαιμι νῦν,\nὅστις πέφασμαι φύς τʼ ἀφʼ ὧν οὐ χρῆν, ξὺν οἷς τʼ\nοὐ χρῆν ὁμιλῶν, οὕς τέ μʼ οὐκ ἔδει κτανών."
 
lorem :: String
lorem = "Lorem ipsum dolor sit amet, consectetur adipisicing elit,\nsed do eiusmod tempor incididunt ut labore et\ndolore magna aliqua. Ut enim ad minim veniam,\nquis nostrud exercitation ullamco laboris nisi ut\naliquip ex ea commodo consequat. Duis aute irure dolor\nin reprehenderit in voluptate velit esse cillum dolore\neu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,\nsunt in culpa qui officia deserunt mollit anim id est laborum.\n"
 
haskell :: String
haskell = "isSpace_DataChar :: Char -> Bool\n{-# INLINE isSpace_DataChar #-}\n isSpace_DataChar c =\n c == ' ' ||\n c == '\t' ||\n c == '\n' ||\n c == '\r' ||\n c == '\f' ||\n\tc == '\v' ||\n\tc == '\xa0' ||\n\tiswspace (fromIntegral (C.ord c)) /= 0\nisSpace_DataChar :: Char -> Bool\n{-# INLINE isSpace_DataChar #-}\n isSpace_DataChar c =\n c == ' ' ||\n c == '\t' ||\n c == '\n' ||\n c == '\r' ||\n c == '\f' ||\n\tc == '\v' ||\n\tc == '\xa0' ||\n\tiswspace (fromIntegral (C.ord c)) /= 0\n"
 
main :: IO ()
main = defaultMain
[ group "ascii text" lorem
, group "Greek text" sophocles
, group "Haskell code" haskell
, group "chars 0..255" ['\0'..'\255']
, group "all spaces" $ concat $ replicate 50 "\t\r\n\n "
]
 
group :: String -> String -> Benchmark
group name inp = bgroup name
[ bench "isSpace_DataChar" $ nf (map isSpace_DataChar) inp
-- , bench "isSpace" $ nf (map isSpace) inp
, bench "isSpace_Pattern" $ nf (map isSpace_Pattern) inp
, bench "isSpace_Alt" $ nf (map isSpace_Alt) inp
, bench "isSpace_Alt2" $ nf (map isSpace_Alt2) inp
, bench "isSpace_Alt3" $ nf (map isSpace_Alt3) inp
, bench "isSpace_Alt4" $ nf (map isSpace_Alt4) inp
, bench "isSpace_Alt5" $ nf (map isSpace_Alt5) inp
]

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.