Skip to content

Instantly share code, notes, and snippets.

@aculich
Forked from ivant/Makefile
Created July 10, 2011 05:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aculich/1074302 to your computer and use it in GitHub Desktop.
Save aculich/1074302 to your computer and use it in GitHub Desktop.
wc using Data.Enumerator
{-# LANGUAGE ScopedTypeVariables, ViewPatterns, OverloadedStrings #-}
import Control.Applicative
import Control.Exception (SomeException)
import Control.Monad (when, mapM)
import Control.Monad.Trans (lift)
import Data.Char (ord)
import Data.Enumerator hiding (map, mapM, length, filter, foldl')
import Data.List (foldl')
import Data.Word (Word8, Word64)
import qualified Data.ByteString as SB
import qualified Data.ByteString.Char8 as SB8
import qualified Data.ByteString.Lazy as LB; import Data.ByteString.Lazy (ByteString)
import qualified Data.Enumerator.Binary as EB
import System.IO (stdin)
import Text.Printf (printf)
type Counter = Int -- Int is ~3% faster
type CountingIteratee m = Iteratee SB.ByteString m Counter
countCharacters :: forall m. Monad m => CountingIteratee m
countCharacters = continue (step 0)
where
step :: Monad m => Counter -> Stream SB.ByteString -> CountingIteratee m
step n _ | n `seq` False = undefined
step n EOF = return n
step n (Chunks cs) = continue (step $ n + fromIntegral (sum (map SB.length cs)))
countWords :: forall m. Monad m => CountingIteratee m
countWords = countWords' 0
where
isSpc :: Word8 -> Bool
isSpc c = c `SB.elem` " \t\r\n\f\v\xa0"
countWords' :: Monad m => Counter -> CountingIteratee m
countWords' n | n `seq` False = undefined
countWords' n = do
_ <- EB.takeWhile isSpc
cs <- EB.takeWhile (not . isSpc)
if LB.null cs then
return n
else do
countWords' (n + 1) -- TODO is that properly optimized (do we get a stack overflow?)
countLines :: forall m. Monad m => CountingIteratee m
countLines = countLines' 0
where
-- Proper line endings: \n, \r, \r\n
isEOL :: Word8 -> Bool
isEOL c = c == cr || c == lf
cr, lf :: Word8
cr = fromIntegral (ord '\r')
lf = fromIntegral (ord '\n')
countLines' :: Monad m => Counter -> CountingIteratee m
countLines' n | n `seq` False = undefined
countLines' n = do
eof <- isEOF
if eof then
return n
else do
_ <- EB.takeWhile (not . isEOL)
eol <- EB.head -- consume the first EOL character
when (eol == Just cr) $ do
c <- EB.head
case c of
Just c | c == lf -> yield () (Chunks [SB.singleton c])
_ -> return ()
countLines' (n + 1)
count :: (a -> Bool) -> [a] -> Int
count p xs = foldl' countOne 0 xs
where
countOne a x | a `seq` False = undefined
countOne a x | p x = a+1
| otherwise = a
fanout :: forall a m r. Monad m => [Iteratee a m r] -> Iteratee a m [Either SomeException r]
fanout iters = do
steps <- mapM (lift . runIteratee) iters
continue (step steps)
where
step :: Monad m
=> [Step a m r] -- ^ list of unfinished iteratees or their results/errors
-> Stream a -- ^ stream piece to process
-> Iteratee a m [Either SomeException r]
step ss stream | countContinues ss == 0 = yield (map extractResult ss) stream
| otherwise = do
ss' <- mapM (stepContinue stream) ss
case stream of
EOF -> return (map extractResult ss')
_ -> continue (step ss')
countContinues :: [Step a m r] -> Int
countContinues = count isContinue
isContinue :: Step a m r -> Bool
isContinue (Continue _) = True
isContinue _ = False
stepContinue :: Monad m => Stream a -> Step a m r -> Iteratee a m (Step a m r)
stepContinue stream (Continue k) = lift $ runIteratee (k stream)
stepContinue _ s = return s
extractResult :: Step a m r -> Either SomeException r
extractResult (Yield r _) = Right r
extractResult (Error e) = Left e
extractResult _ = error "impossible happened"
main = do
[Right l,Right w,Right c] <- run_ $ EB.enumHandle (65536*4) stdin ==<< fanout [countLines, countWords, countCharacters]
printf "%4d %4d %4d\n" l w c
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment