Skip to content

Instantly share code, notes, and snippets.

@newton-migosi
Last active May 23, 2023 18:30
Show Gist options
  • Save newton-migosi/99badb0c7a6eebbd55d48ecc849d6ec0 to your computer and use it in GitHub Desktop.
Save newton-migosi/99badb0c7a6eebbd55d48ecc849d6ec0 to your computer and use it in GitHub Desktop.
[Haskell-cafe] Fast number parsing with strict bytestrings
import Control.Monad
import Data.Maybe
import qualified Data.ByteString.Char8 as B
divisibleBy :: Int -> Int -> Bool
a `divisibleBy` n = a `rem` n == 0
main :: IO ()
main = do
[n,k] <- (map int . B.split ' ') `fmap` B.getLine :: IO [Int]
let
doLine :: Int -> Int -> IO Int
doLine r _ = B.getLine >>= return . testDiv r
-- 'return' moved here ^^
doLine :: Int -> Int -> IO Int
doLine r _ = B.getLine >>= \s -> return $! testDiv r s
import Data.Maybe
import Data.List
import qualified Data.ByteString.Lazy.Char8 as L
main :: IO ()
main = do
(l:ls) <- L.lines `fmap` L.getContents -- done with IO now.
let [n,k] = map int (L.split ' ' l)
print . foldl' (test k) 0 . map int . take n $ ls
test :: Int -> Int -> Int -> Int
test k acc n
| n `divisibleBy` k = acc+1
| otherwise = acc
int :: L.ByteString -> Int
int = fst . fromJust . L.readInt
divisibleBy :: Int -> Int -> Bool
a `divisibleBy` n = a `rem` n == 0
{-# OPTIONS -fbang-patterns #-}
import Data.Char
import Data.Maybe
import Data.ByteString.Base
import qualified Data.ByteString.Char8 as S
import qualified Data.ByteString.Lazy.Char8 as L
main = do
ss <- L.getContents -- done with IO now.
let (l,ls) = L.break (=='\n') ss
-- don't need count, we're allocating lazily
k = fst . fromJust . L.readInt . last . L.split ' ' $ l
file = L.toChunks (L.tail ls) -- a lazy list of strict cache chunks
print $ process k 0 file
divisibleBy :: Int -> Int -> Bool
a `divisibleBy` n = a `rem` n == 0
-- ---------------------------------------------------------------------
--
-- Optimised parsing of strict bytestrings representing \n separated numbers
--
--
-- we have the file as a list of cache chunks
-- align them on \n boundaries, and process each chunk separately
-- when the next chunk is demanded, it will be read in.
--
process :: Int -> Int -> [S.ByteString] -> Int
process k i [] = i
process k !i (s:t:ts)
| S.last s /= '\n' = process k (add k i s') ts'
where
(s',r) = S.breakEnd (=='\n') s
ts' = (S.append r t) : ts -- join chunks on line boundaries
process k i (s: ss) = process k (add k i s) ss
--
-- process a single cache-sized chunk of numbers, \n aligned
--
add :: Int -> Int -> S.ByteString -> Int
add k i s
| S.null s = i
| otherwise = test k i (parse x) xs
where (x,xs) = uncons s
--
-- process a single line, until \n
--
test :: Int -> Int -> Int -> ByteString -> Int
test k i !n t
| y == '\n' = -- done reading the line, process it:
if n `divisibleBy` k then add k (i+1) ys
else add k i ys
| otherwise = test k i n' ys
where (y,ys) = uncons t
n' = parse y + 10 * n
parse c = ord c - ord '0'
-- fastest way to take the head of a strict bytestring
uncons s = (w2c (unsafeHead s), unsafeTail s)
add :: Int -> Int -> S.ByteString -> Int
add k i s = if S.null s then i else test k i (parse x) xs
where (x,xs) = uncons s
{-# INLINE add #-}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment