Created
August 28, 2012 10:16
-
-
Save adimit/3496913 to your computer and use it in GitHub Desktop.
Test Text string escaping.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
import qualified Data.Text as X | |
import Data.Text (Text) | |
import Criterion | |
import Criterion.Main | |
escapedChars :: String | |
escapedChars = '"':'\\':"-!@~/()*[]=" | |
escapeText1 :: Text -> Text | |
escapeText1 = X.concatMap (\x -> if x `elem` escapedChars | |
then X.pack $ '\\':[x] | |
else X.singleton x) | |
escapeText2 :: Text -> Text | |
escapeText2 = X.intercalate "\\" . breakBy (`elem` escapedChars) | |
escapeText3 :: Text -> Text | |
escapeText3 t | t == X.empty = t | |
| otherwise = let (untouched, broken) = X.break (`elem` escapedChars) t | |
in untouched `X.append` ('\\' `X.cons` (if broken == X.empty then X.empty else X.head broken `X.cons` escapeText3 (X.tail broken))) | |
escapeText4 :: Text -> Text | |
escapeText4 t | t == X.empty = t | |
| otherwise = let (untouched, broken) = X.break (`elem` escapedChars) t | |
in untouched `X.append` ('\\' `X.cons` X.take 1 broken `X.append` escapeText3 (X.drop 1 broken)) | |
escapeText5 :: Text -> Text | |
escapeText5 t | t == X.empty = t | |
| otherwise = let (untouched, broken) = X.break (`elem` escapedChars) t | |
in X.concat [untouched, "\\", X.take 1 broken, escapeText4 $ X.drop 1 broken] | |
breakBy = X.groupBy . const . fmap not | |
t1 = "abc\"\\-!@~/()*[]foobar" | |
t2 = "This is more natural text! It contains (only some) characters we *want* to escape, and it contains an email address: foo@bar.baz." | |
main = defaultMain | |
[ bench "escapeText1-t1" $ whnf escapeText1 t1 | |
, bench "escapeText2-t1" $ whnf escapeText2 t1 | |
, bench "escapeText3-t1" $ whnf escapeText3 t1 | |
, bench "escapeText4-t1" $ whnf escapeText4 t1 | |
, bench "escapeText5-t1" $ whnf escapeText5 t1 | |
, bench "escapeText1-t2" $ whnf escapeText1 t2 | |
, bench "escapeText2-t2" $ whnf escapeText2 t2 | |
, bench "escapeText3-t2" $ whnf escapeText3 t2 | |
, bench "escapeText4-t2" $ whnf escapeText4 t2 | |
, bench "escapeText5-t2" $ whnf escapeText5 t2 | |
] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment