Last active
May 23, 2023 18:30
-
-
Save newton-migosi/99badb0c7a6eebbd55d48ecc849d6ec0 to your computer and use it in GitHub Desktop.
[Haskell-cafe] Fast number parsing with strict bytestrings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Control.Monad | |
import Data.Maybe | |
import qualified Data.ByteString.Char8 as B | |
divisibleBy :: Int -> Int -> Bool | |
a `divisibleBy` n = a `rem` n == 0 | |
main :: IO () | |
main = do | |
[n,k] <- (map int . B.split ' ') `fmap` B.getLine :: IO [Int] | |
let | |
doLine :: Int -> Int -> IO Int | |
doLine r _ = B.getLine >>= return . testDiv r | |
-- 'return' moved here ^^ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
doLine :: Int -> Int -> IO Int | |
doLine r _ = B.getLine >>= \s -> return $! testDiv r s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Maybe | |
import Data.List | |
import qualified Data.ByteString.Lazy.Char8 as L | |
main :: IO () | |
main = do | |
(l:ls) <- L.lines `fmap` L.getContents -- done with IO now. | |
let [n,k] = map int (L.split ' ' l) | |
print . foldl' (test k) 0 . map int . take n $ ls | |
test :: Int -> Int -> Int -> Int | |
test k acc n | |
| n `divisibleBy` k = acc+1 | |
| otherwise = acc | |
int :: L.ByteString -> Int | |
int = fst . fromJust . L.readInt | |
divisibleBy :: Int -> Int -> Bool | |
a `divisibleBy` n = a `rem` n == 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# OPTIONS -fbang-patterns #-} | |
import Data.Char | |
import Data.Maybe | |
import Data.ByteString.Base | |
import qualified Data.ByteString.Char8 as S | |
import qualified Data.ByteString.Lazy.Char8 as L | |
main = do | |
ss <- L.getContents -- done with IO now. | |
let (l,ls) = L.break (=='\n') ss | |
-- don't need count, we're allocating lazily | |
k = fst . fromJust . L.readInt . last . L.split ' ' $ l | |
file = L.toChunks (L.tail ls) -- a lazy list of strict cache chunks | |
print $ process k 0 file | |
divisibleBy :: Int -> Int -> Bool | |
a `divisibleBy` n = a `rem` n == 0 | |
-- --------------------------------------------------------------------- | |
-- | |
-- Optimised parsing of strict bytestrings representing \n separated numbers | |
-- | |
-- | |
-- we have the file as a list of cache chunks | |
-- align them on \n boundaries, and process each chunk separately | |
-- when the next chunk is demanded, it will be read in. | |
-- | |
process :: Int -> Int -> [S.ByteString] -> Int | |
process k i [] = i | |
process k !i (s:t:ts) | |
| S.last s /= '\n' = process k (add k i s') ts' | |
where | |
(s',r) = S.breakEnd (=='\n') s | |
ts' = (S.append r t) : ts -- join chunks on line boundaries | |
process k i (s: ss) = process k (add k i s) ss | |
-- | |
-- process a single cache-sized chunk of numbers, \n aligned | |
-- | |
add :: Int -> Int -> S.ByteString -> Int | |
add k i s | |
| S.null s = i | |
| otherwise = test k i (parse x) xs | |
where (x,xs) = uncons s | |
-- | |
-- process a single line, until \n | |
-- | |
test :: Int -> Int -> Int -> ByteString -> Int | |
test k i !n t | |
| y == '\n' = -- done reading the line, process it: | |
if n `divisibleBy` k then add k (i+1) ys | |
else add k i ys | |
| otherwise = test k i n' ys | |
where (y,ys) = uncons t | |
n' = parse y + 10 * n | |
parse c = ord c - ord '0' | |
-- fastest way to take the head of a strict bytestring | |
uncons s = (w2c (unsafeHead s), unsafeTail s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
add :: Int -> Int -> S.ByteString -> Int | |
add k i s = if S.null s then i else test k i (parse x) xs | |
where (x,xs) = uncons s | |
{-# INLINE add #-} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment