Skip to content

Instantly share code, notes, and snippets.

@michaelt michaelt/examples.hs
Last active Apr 27, 2018

Embed
What would you like to do?
simple shell-like programs using Data.ByteString.Streaming, following the io-streams tutorial
-- These examples are based on the tutorial module in the io-streams package
{-#LANGUAGE OverloadedStrings #-}
import Streaming
import Streaming.Prelude (yield, next, each, for, with, subst)
import qualified Streaming.Prelude as S
import qualified Data.ByteString.Char8 as B
import Data.ByteString.Streaming (ByteString)
import qualified Data.ByteString.Streaming.Char8 as Q
import System.IO (withFile, IOMode(..))
import Control.Monad
import Control.Applicative
import Data.Monoid
cat :: FilePath -> IO ()
cat file = withFile file ReadMode $ \h -> Q.stdout (Q.fromHandle h)
echo :: IO ()
echo = Q.stdout Q.stdin
-- Send the first n lines to stdout.
-- Note that this streams even if it hits a 10 terabyte "line"
head :: Int -> FilePath -> IO ()
head n file = withFile file ReadMode $ \h ->
Q.stdout -- IO () -- stream to IO.stdout
$ Q.unlines -- ByteString m () -- insert '\n' between bytestream layers
$ takes n -- Stream (ByteString m) m () -- nb. "takes" is 'functor general'
$ Q.lines -- Stream (ByteString m) m () -- divided into Stream layers
$ Q.fromHandle h -- ByteString m () -- raw bytes
yes :: IO ()
yes = Q.stdout $ Q.cycle "y\n" -- uses OverloadedStrings instance for 'ByteString m ()'
grep :: B.ByteString -> FilePath -> IO ()
grep pat file = withFile file ReadMode $ \h -> do
let raw :: ByteString IO () -- get raw bytes
raw = Q.fromHandle h
segmented :: Stream (ByteString IO) IO () -- divide on newlines
segmented = Q.lines raw
individualized :: Stream (Of B.ByteString) IO ()
individualized = mapped Q.toStrict segmented -- danger: concatenate 'real' bytestrings!
matching :: Stream (Of B.ByteString) IO () -- filter out matching bytestrings
matching = S.filter (B.isInfixOf pat) individualized
deindividualized :: Stream (ByteString IO) IO () -- restream (implicitly using
deindividualized = with matching Q.chunk -- the new chunk structure)
unsegmented :: ByteString IO () -- add newlines
unsegmented = Q.unlines deindividualized
Q.stdout unsegmented -- stream to IO.stdout
-- or, more compactly:
grep' :: B.ByteString -> FilePath -> IO ()
grep' pat file = withFile file ReadMode $ \h -> do
Q.stdout
$ Q.unlines
$ subst Q.chunk
$ S.filter (B.isInfixOf pat)
$ mapped Q.toStrict
$ Q.lines
$ Q.fromHandle h
-- >>> grep "grep" "examples.hs"
-- grep :: B.ByteString -> FilePath -> IO ()
-- grep pat file = withFile file ReadMode $ \h -> do
-- grep' pat file = withFile file ReadMode $ \h -> do
data Option = Bytes | Words | Lines
len = S.fold (\n _ -> n + 1) 0 id
wc :: Option -> FilePath -> IO ()
wc opt file = withFile file ReadMode $ \h ->
do n <- count (Q.fromHandle h)
print n
where
count is = case opt of
Bytes -> Q.length is
Words -> S.sum $ mapped blank_layer $ Q.words is
Lines -> S.sum $ mapped blank_layer $ Q.lines is
blank_layer :: Monad m => ByteString m r -> m (Of Int r)
blank_layer = liftM (1 :>) . Q.effects
-- replace each layer with (1 :> ...); here we do not accumlate strict-bs words
-- >>> wc Words "examples.hs"
-- 801 :> ()
-- exercise: write `wc` to permit a list of options, using `foldl` to combine
-- the different folds. This would require a more direct implementation
-- of what might be called `line_count :: Fold Char Int` and `word_count :: Fold Char Int`
paste file file' = withFile file ReadMode $ \h ->
withFile file' ReadMode $ \h' ->
Q.stdout
$ Q.unlines
$ let left = Q.lines (Q.fromHandle h)
right = Q.lines (Q.fromHandle h' )
center = repeats $ Q.chunk "\t"
in left <|> center <|> right
-- >>> paste "nums.txt" "numbers.txt"
-- 1.1
-- 1 2.2
-- 2
-- 3 3.3
-- 4
-- 5 4.4
-- 6 5.5 -- this program is ziplike and breaks where either does.
-- number the lines of a file
nl :: FilePath -> IO ()
nl file = withFile file ReadMode $ \h ->
Q.stdout -- IO () / \
$ Q.unlines -- ByteString IO () |
$ (\bss -> with bss Q.chunk) -- Stream (ByteString IO) IO () |
$ S.zipWith nlpad (each [1..]) -- Stream (Of B.ByteString) IO () |
$ mapped Q.toStrict -- Stream (Of B.ByteString) IO () |
$ Q.lines -- Stream (ByteString IO) IO () |
$ Q.fromHandle h -- ByteString IO () |
where
nlpad :: Int -> B.ByteString -> B.ByteString
nlpad n bs = padding <> B.pack (show n <> " ") <> bs
where
len = length (show n); diff = 9 - len
padding = if diff > 0 then B.replicate diff ' ' else B.singleton ' '
nl_streaming :: FilePath -> IO ()
nl_streaming file = withFile file ReadMode $ \h ->
Q.stdout
$ Q.unlines
$ interleaves numerals -- interleaves might instead be called 'fuseLayers' or something
$ Q.lines -- note proper streaming
$ Q.fromHandle h
where
numerals :: Monad m => Stream (ByteString m) m ()
numerals = maps trans (each [1..]) where
trans (n:>r) = Q.chunk stuff >> return r
where
len = length (show n); diff = 9 - len
padding = if diff > 0 then B.replicate diff ' ' else B.singleton ' '
stuff = padding <> B.pack (show n ++ " ")
@PierreR

This comment has been minimized.

Copy link

PierreR commented Sep 7, 2015

Out of curiosity (but I admit it has distracted me a bit ;-) why do you use Q for Data.ByteString.Streaming.Char8 ?

@michaelt

This comment has been minimized.

Copy link
Owner Author

michaelt commented Oct 13, 2015

Ha, I just noticed this. I just called it Q because I ran out of letters. T=Text, S = Streaming; P = Pipes; L = Control.Foldl; B=ByteString ... so I hit on R=Data.ByteString.Streaming and Q=Data.ByteString.Streaming.Char8. I guess it is a little strange, but I became accustomed to using L for Foldl, following the documentation, and it is very sensible once you get a little accustomed to it.

@afcady

This comment has been minimized.

Copy link

afcady commented Feb 16, 2016

How would you modify nl to only number non-blank lines?

@mayeranalytics

This comment has been minimized.

Copy link

mayeranalytics commented Apr 27, 2018

Shouldn't takes n in line 27 be Streaming.Prelude.take n?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.