Last active
April 16, 2020 07:41
-
-
Save KillyMXI/59ab7f79cc3127fecc8ad62bc05df5e5 to your computer and use it in GitHub Desktop.
Haskell grapheme cluster examples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Text.ICU | |
import qualified Data.Text as T | |
graphemeClustersCount :: LocaleName -> T.Text -> Int | |
graphemeClustersCount loc = length . breaks (breakCharacter loc) | |
testStr = "😀éé" | |
main :: IO () | |
main = print $ graphemeClustersCount Current $ T.pack $ testStr | |
-- Output: | |
-- 3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Text.ICU | |
import qualified Data.Text as T | |
import GHC.IO.Encoding | |
import System.Win32.Console | |
graphemeClustersCount :: LocaleName -> T.Text -> Int | |
graphemeClustersCount loc = length . breaks (breakCharacter loc) | |
splitToGraphemeClusters :: LocaleName -> T.Text -> [T.Text] | |
splitToGraphemeClusters loc = map brkBreak . breaks (breakCharacter loc) | |
reverseGraphemeClusters :: LocaleName -> T.Text -> T.Text | |
reverseGraphemeClusters loc = T.concat . reverse . splitToGraphemeClusters loc | |
procString :: (T.Text -> T.Text) -> String -> String | |
procString f = T.unpack . f . T.pack | |
testStr = "😀éé" | |
main :: IO () | |
main = do | |
setLocaleEncoding utf8 | |
setConsoleOutputCP 65001 -- Windows specific | |
print $ graphemeClustersCount Current $ T.pack $ testStr | |
putStrLn $ procString (reverseGraphemeClusters Current) $ testStr | |
-- Output: | |
-- 3 | |
-- éé😀 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Text.ICU | |
import qualified Data.Text as T | |
import GHC.IO.Encoding | |
import System.Win32.Console | |
splitToGraphemeClusters :: LocaleName -> T.Text -> [T.Text] | |
splitToGraphemeClusters loc txt = map brkBreak (breaks (breakCharacter loc) txt) | |
type GraphemeCluster = [Char] | |
type SwiftString = [GraphemeCluster] | |
pack :: String -> SwiftString | |
pack = (map T.unpack) . (splitToGraphemeClusters Root) . T.pack | |
unpack :: SwiftString -> String | |
unpack = concat | |
procString :: (SwiftString -> SwiftString) -> String -> String | |
procString f = unpack . f . pack | |
testStr = "😀éé" | |
main :: IO () | |
main = do | |
setLocaleEncoding utf8 | |
setConsoleOutputCP 65001 -- Windows specific | |
print $ length $ pack $ testStr | |
putStrLn $ procString reverse $ testStr | |
-- Output: | |
-- 3 | |
-- éé😀 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment