Created
October 28, 2012 04:04
-
-
Save jgm/3967452 to your computer and use it in GitHub Desktop.
Benchmark of various reimplementations of Data.Char.isSpace
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# OPTIONS_GHC -Wall -fwarn-tabs #-} | |
{-# LANGUAGE ForeignFunctionInterface #-} | |
---------------------------------------------------------------- | |
-- Modified by John MacFarlane from an earlier benchmark by | |
-- wren ng thornton. | |
--------------------------------------------------------------- | |
module Main (main) where | |
import qualified Data.Char as C | |
import Foreign.C.Types (CInt(..)) | |
import Criterion (bench, bgroup, nf) | |
import Criterion.Main (defaultMain) | |
---------------------------------------------------------------- | |
-- N.B. \x9..\xD == "\t\n\v\f\r" | |
foreign import ccall unsafe "u_iswspace" | |
iswspace :: CInt -> CInt | |
-- | Verbatim version of 'Data.Char.isSpace' (i.e., 'GHC.Unicode.isSpace' | |
-- as of base-4.2.0.2). | |
isSpace_DataChar :: Char -> Bool | |
{-# INLINE isSpace_DataChar #-} | |
isSpace_DataChar c = | |
c == ' ' || | |
c == '\t' || | |
c == '\n' || | |
c == '\r' || | |
c == '\f' || | |
c == '\v' || | |
c == '\xa0' || | |
iswspace (fromIntegral (C.ord c)) /= 0 | |
isSpace_Alt :: Char -> Bool | |
{-# INLINE isSpace_Alt #-} | |
isSpace_Alt c | c > '\x20' && c < '\xa0' = False | |
| c == ' ' = True | |
| '\t' <= c && c <= '\r' = True | |
| c == '\xa0' = True | |
| otherwise = iswspace (fromIntegral (C.ord c)) /= 0 | |
isSpace_Alt' :: Char -> Bool | |
{-# INLINE isSpace_Alt' #-} | |
isSpace_Alt' c | c > '\x20' && c < '\xa0' = False | |
| c == ' ' = True | |
| c > '\xa0' && c <= '\xff' = False | |
| '\t' <= c && c <= '\r' = True | |
| c < '\t' = False | |
| c == '\xa0' = True | |
| otherwise = iswspace (fromIntegral (C.ord c)) /= 0 | |
isSpace_Pattern :: Char -> Bool | |
{-# INLINE isSpace_Pattern #-} | |
isSpace_Pattern c | c == ' ' = True | |
| '\t' <= c && c <= '\r' = True | |
| c == '\xa0' = True | |
| otherwise = iswspace (fromIntegral (C.ord c)) /= 0 | |
---------------------------------------------------------------- | |
main :: IO () | |
main = do | |
let text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit,\nsed do eiusmod tempor incididunt ut labore et\ndolore magna aliqua. Ut enim ad minim veniam,\nquis nostrud exercitation ullamco laboris nisi ut\naliquip ex ea commodo consequat. Duis aute irure dolor\nin reprehenderit in voluptate velit esse cillum dolore\neu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,\nsunt in culpa qui officia deserunt mollit anim id est laborum.\n" | |
let chars = ['\0'..'\255'] | |
let upperchars = ['\0'..'\2048'] | |
defaultMain | |
[ bgroup "lorem ipsum" | |
[ bench "isSpace_DataChar" $ nf (map isSpace_DataChar) text | |
, bench "isSpace_Alt" $ nf (map isSpace_Alt) text | |
, bench "isSpace_Alt'" $ nf (map isSpace_Alt') text | |
, bench "isSpace_Pattern" $ nf (map isSpace_Pattern) text | |
] | |
, bgroup "chars 0..255" | |
[ bench "isSpace_DataChar" $ nf (map isSpace_DataChar) chars | |
, bench "isSpace_Alt" $ nf (map isSpace_Alt) chars | |
, bench "isSpace_Alt'" $ nf (map isSpace_Alt') chars | |
, bench "isSpace_Pattern" $ nf (map isSpace_Pattern) chars | |
] | |
, bgroup "chars 0..2048" | |
[ bench "isSpace_DataChar" $ nf (map isSpace_DataChar) upperchars | |
, bench "isSpace_Alt" $ nf (map isSpace_Alt) upperchars | |
, bench "isSpace_Alt'" $ nf (map isSpace_Alt') upperchars | |
, bench "isSpace_Pattern" $ nf (map isSpace_Pattern) upperchars | |
] | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Benchmarks:
Compiled with -O2: