gelisam/PolyConduit.hs

## PolyConduit.hs
-- Follow up to [1], praising @viercc's better solution [2].
--
-- [1] https://gist.github.com/gelisam/a8bee217410b74f030c21f782de23d11
-- [2] https://www.reddit.com/r/haskell/comments/yb9bi4/comment/itfh07z
--
-- The challenge is still to implement a function which takes in three
-- Conduits, and uses the values from the first Conduit in order to decide
-- which of the other two Conduits to sample from next. Something like this:
--
--     example bools ints strings = do
--       maybeBool <- awaitMaybe bools
--       case maybeBool of
--         Nothing -> do
--           liftIO $ putStrLn "it's over"
--         Just True -> do
--           int <- await ints
--           yield (int + 4)
--           example
--         Just False -> do
--           str <- await strings
--           liftIO $ putStrLn str
--           yield $ length str
--           example
--
-- And I still want a solution which works for n Conduits, not just three. But
-- this time, instead of switching to the notoriously-complex "machines"
-- package, I'll stick to conduit, thanks to @viercc's great tip of using
-- Conduit _transformers_.
{-# LANGUAGE LambdaCase, RankNTypes, ScopedTypeVariables #-}
module Main where

import Test.DocTest

import Control.Monad.IO.Class (MonadIO(liftIO))
import Control.Monad.Trans.Class (lift)
import Data.Conduit hiding (Source)
import Data.Functor.Identity (Identity(runIdentity))
import Data.Void (absurd)

-- $setup
-- >>> import qualified Data.Conduit.Combinators as Conduit
-- >>> import qualified Data.Conduit.List as Conduit
-- >>> :{
-- let testConduit
--       :: Show o
--       => ConduitT () o IO ()
--       -> IO ()
--     testConduit source = do
--       os <- connect source Conduit.consume
--       print os
-- :}


-- In the previous post, I used a type-level list to keep track of the
-- multiple inputs. This time, we'll represent a conduit with n inputs as a
-- stack of n 'ConduitT' transformers.

type Source  m o       = ConduitT () o m ()
type Process m a o     = ConduitT a o m ()
type Tee     m a b o   = ConduitT a o
                           (ConduitT b Void m) ()
type Tee3    m a b c o = ConduitT a o
                           (ConduitT b Void
                             (ConduitT c Void m)) ()

-- Each 'ConduitT' layer has two type arguments; one for the elements you
-- await from the single input you get when you use conduits the normal way,
-- and one for the elements you send downstream. With n layers, we can thus
-- specify n inputs, which is what we want, but we must also specify n
-- outputs, which is n-1 too many. Using 'Void' for all but one of the outputs
-- clarifies that only one of them is actually used.

-- Now that we have unexpectedly managed to represent conduits which take more
-- than one input, how can we attach those inputs? In the previous post, I
-- defined a versatile 'polyCapL' function which could attach a 'Source' to a
-- number of different machines:
--
--     -- polyCapL :: Source m a -> Process m a o     -> Source  m o
--     -- polyCapL :: Source m a -> Tee     m a b o   -> Process m b o
--     -- polyCapL :: Source m a -> Tee3    m a b c o -> Tee     m b c o
--     polyCapL
--       :: Source m a1
--       -> PolyTee m (a1 ': as) o
--       -> PolyTee m as o
--
-- I would like to construct a similar function here. As before, I want to
-- attach a single source as the first input 'a1', while leaving the remaining
-- inputs 'as' untouched. In this conduit stack representation, the first
-- input is the input of the outermost 'ConduitT' layer, while the remaining
-- inputs are specified by the rest of the layers, 'mm':
--
--     myPolyCapL
--       :: Source m a1 ()
--       -> ConduitT a1 o mm r
--       -> mm r
--
-- In order for this type to specialize to these,
--
--     myPolyCapL :: Source m a -> Process m a o     -> Source  m o
--     myPolyCapL :: Source m a -> Tee     m a b o   -> Process m b o
--     myPolyCapL :: Source m a -> Tee3    m a b c o -> Tee     m b c o
--
-- I need to somehow specify that the monad at the base of the 'mm' stack must
-- be 'm'. When 'm' is IO, this is represented using a 'MonadIO' constraint:
--
--     liftIO :: forall x. IO x -> mm x
--
-- There exists a @MonadBase m@ constraint which generalizes 'MonadIO':
--
--     liftBase :: forall x. m x -> mm x
--
-- But instead of adding an orphan @MonadBase m (ConduitT i o mm)@ instance,
-- I'll just ask for an extra @forall x. m x -> mm x@ parameter:
--
--     myPolyCapL
--       :: (forall x. m x -> mm x)
--       => Source m a1 ()
--       -> ConduitT a1 o mm r
--       -> mm r

-- Another way in which the type above isn't quite right is that the output
-- type 'o' disappears. The fix is quite simple: instead of only concretizing
-- the very outermost ConduitT layer and leaving the rest abstract, I
-- concretize the _two_ outermost ConduitT layers:
myPolyCapL
  :: forall a b o m mm r. (Monad m, Monad mm)
  => (forall x. m x -> mm x)
  -> ConduitT () a m ()
  -> ConduitT a o (ConduitT b Void mm) r
  -> ConduitT b o mm r
myPolyCapL liftM src doubleConduit
  = connect src' doubleConduit''
  where
    -- The implementation looks very different from polyCapL's, but it's the
    -- same idea. It just so happens that the conduit API is expressive enough
    -- that we can achieve our goal via several small transformations, without
    -- having to unravel the conduits into sequences of instructions.

    src' :: ConduitT () a (ConduitT b o mm) ()
    src'
      = transPipe (lift . liftM) src

    doubleConduit' :: ConduitT a o (ConduitT b o mm) r
    doubleConduit'
      = transPipe (mapOutput absurd) doubleConduit

    -- The 'doubleConduit' transformations swap the 'Void' and 'o' output
    -- types.
    -- At this point one might wonder why I chose the convention of
    -- using 'Void' for all but the _outermost_ layer. If I had chosen the
    -- innermost layer instead, the 'o' would already be in the right
    -- position, and I wouldn't need to perform any transformations on
    -- 'doubleConduit'!
    -- The reason is simply to provide a more ergonomic experience to the
    -- user: by choosing the outermost layer, the user can emit by writing
    --
    --     emit o
    --
    -- Whereas if I had chosen the innermost layer, the user would have to
    -- write this instead.
    --
    --     lift $ lift $ emit o
    doubleConduit'' :: ConduitT a Void (ConduitT b o mm) r
    doubleConduit''
      = fuseUpstream doubleConduit' outputToInner

    outputToInner :: ConduitT o Void (ConduitT b o mm) ()
    outputToInner = do
      await >>= \case
        Just o -> do
          lift $ yield o
          outputToInner
        Nothing -> do
          pure ()

-- The types line up, but how does this work? We're compiling down to a single layer, so how did we persuade conduit to magically create a ConduitT with more than one input?
--
-- The secret is, we didn't! Conduits are combined in the same way machines are: by lining up and then eliminating matching `yield` and `await` instructions. Thus, after we have attached an input and eliminated a ConduitT layer, all the `await` calls which were reading from that input have been replaced by a fragment of the code from that input, namely the code between two consecutive `yield`s. That's the magic of representing computations as a sequence of instructions, we can splice and rearrange those instructions!
--
-- In fact, machines are represented in pretty much the same way as conduits, as a sequence of instructions, they aren't stored as a tree or a graph of instructions as one might expect. Pretty much the only difference is that a machine's `awaits` instruction takes an extra argument specifying which inputs it is awaiting from.


-- Anyway, since I suspect that most conduit stacks will either have IO or Identity as
-- a base monad, here are two specializations which fill-in the 'liftM'
-- parameter.

-- polyCapIO :: Source IO a -> Process IO a o     -> Source  IO o
-- polyCapIO :: Source IO a -> Tee     IO a b o   -> Process IO b o
-- polyCapIO :: Source IO a -> Tee3    IO a b c o -> Tee     IO b c o
polyCapIO
  :: forall a b o mm r. MonadIO mm
  => Source IO a
  -> ConduitT a o (ConduitT b Void mm) r
  -> ConduitT b o mm r
polyCapIO
  = myPolyCapL liftIO

polyCap
  :: forall a b o mm r. Monad mm
  => ConduitT () a Identity ()
  -> ConduitT a o (ConduitT b Void mm) r
  -> ConduitT b o mm r
polyCap
  = myPolyCapL (pure . runIdentity)


-- In the previous post, I also implemented a 'polyCapR' function for
-- converting a fully-saturated 'PolyTee' into a 'Source', so it can be used
-- with existing machine combinators.
--
--     polyCapR
--       :: PolyTee m '[] b
--       -> Source m b
--
-- With the conduit layers representation, such a function is not needed.
-- After attaching all but one of the inputs, the result is a single ConduitT
-- layer, that is, a normal conduit which can already be used with existing
-- conduit combinators. In particular, 'fuse' can be used to attach the last
-- input, thus converting the conduit to a source.


-- We can finally implement the challenge; twice, in order to exercise both
-- the IO and Identity specializations.

-- |
-- >>> :{
-- testConduit
--   $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
--   $ polyCapIO (Conduit.yieldMany [1..])
--   $ polyCapIO (Conduit.yieldMany [True, False, False, True, False])
--   $ exampleIO
-- :}
-- foo
-- bar
-- quux
-- it's over
-- [5,3,3,6,4]
exampleIO
  :: ConduitT Bool Int
       (ConduitT Int Void
         (ConduitT String Void IO))
       ()
exampleIO = do
  maybeBool <- await
  case maybeBool of
    Nothing -> do
      liftIO $ putStrLn "it's over"
    Just True -> do
      Just int <- lift await
      yield (int + 4)
      exampleIO
    Just False -> do
      Just str <- lift $ lift await
      liftIO $ putStrLn str
      yield $ length str
      exampleIO

-- |
-- >>> :{
-- testConduit
--   $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
--   $ polyCap (Conduit.yieldMany [1..])
--   $ polyCap (Conduit.yieldMany [True, False, False, True, False])
--   $ example
-- :}
-- [5,3,3,6,4]
example
  :: forall m. MonadFail m
  => ConduitT Bool Int
       (ConduitT Int Void
         (ConduitT String Void m))
       ()
example = do
  maybeBool <- await
  case maybeBool of
    Nothing -> do
      pure ()
    Just True -> do
      Just int <- lift await
      yield (int + 4)
      example
    Just False -> do
      Just str <- lift $ lift await
      yield $ length str
      example

main :: IO ()
main = do
  putStrLn "typechecks."

test :: IO ()
test = do
  doctest ["src/Main.hs"]
	-- Follow up to [1], praising @viercc's better solution [2].
	--
	-- [1] https://gist.github.com/gelisam/a8bee217410b74f030c21f782de23d11
	-- [2] https://www.reddit.com/r/haskell/comments/yb9bi4/comment/itfh07z
	--
	-- The challenge is still to implement a function which takes in three
	-- Conduits, and uses the values from the first Conduit in order to decide
	-- which of the other two Conduits to sample from next. Something like this:
	--
	-- example bools ints strings = do
	-- maybeBool <- awaitMaybe bools
	-- case maybeBool of
	-- Nothing -> do
	-- liftIO $ putStrLn "it's over"
	-- Just True -> do
	-- int <- await ints
	-- yield (int + 4)
	-- example
	-- Just False -> do
	-- str <- await strings
	-- liftIO $ putStrLn str
	-- yield $ length str
	-- example
	--
	-- And I still want a solution which works for n Conduits, not just three. But
	-- this time, instead of switching to the notoriously-complex "machines"
	-- package, I'll stick to conduit, thanks to @viercc's great tip of using
	-- Conduit _transformers_.
	{-# LANGUAGE LambdaCase, RankNTypes, ScopedTypeVariables #-}
	module Main where

	import Test.DocTest

	import Control.Monad.IO.Class (MonadIO(liftIO))
	import Control.Monad.Trans.Class (lift)
	import Data.Conduit hiding (Source)
	import Data.Functor.Identity (Identity(runIdentity))
	import Data.Void (absurd)

	-- $setup
	-- >>> import qualified Data.Conduit.Combinators as Conduit
	-- >>> import qualified Data.Conduit.List as Conduit
	-- >>> :{
	-- let testConduit
	-- :: Show o
	-- => ConduitT () o IO ()
	-- -> IO ()
	-- testConduit source = do
	-- os <- connect source Conduit.consume
	-- print os
	-- :}


	-- In the previous post, I used a type-level list to keep track of the
	-- multiple inputs. This time, we'll represent a conduit with n inputs as a
	-- stack of n 'ConduitT' transformers.

	type Source m o = ConduitT () o m ()
	type Process m a o = ConduitT a o m ()
	type Tee m a b o = ConduitT a o
	(ConduitT b Void m) ()
	type Tee3 m a b c o = ConduitT a o
	(ConduitT b Void
	(ConduitT c Void m)) ()

	-- Each 'ConduitT' layer has two type arguments; one for the elements you
	-- await from the single input you get when you use conduits the normal way,
	-- and one for the elements you send downstream. With n layers, we can thus
	-- specify n inputs, which is what we want, but we must also specify n
	-- outputs, which is n-1 too many. Using 'Void' for all but one of the outputs
	-- clarifies that only one of them is actually used.

	-- Now that we have unexpectedly managed to represent conduits which take more
	-- than one input, how can we attach those inputs? In the previous post, I
	-- defined a versatile 'polyCapL' function which could attach a 'Source' to a
	-- number of different machines:
	--
	-- -- polyCapL :: Source m a -> Process m a o -> Source m o
	-- -- polyCapL :: Source m a -> Tee m a b o -> Process m b o
	-- -- polyCapL :: Source m a -> Tee3 m a b c o -> Tee m b c o
	-- polyCapL
	-- :: Source m a1
	-- -> PolyTee m (a1 ': as) o
	-- -> PolyTee m as o
	--
	-- I would like to construct a similar function here. As before, I want to
	-- attach a single source as the first input 'a1', while leaving the remaining
	-- inputs 'as' untouched. In this conduit stack representation, the first
	-- input is the input of the outermost 'ConduitT' layer, while the remaining
	-- inputs are specified by the rest of the layers, 'mm':
	--
	-- myPolyCapL
	-- :: Source m a1 ()
	-- -> ConduitT a1 o mm r
	-- -> mm r
	--
	-- In order for this type to specialize to these,
	--
	-- myPolyCapL :: Source m a -> Process m a o -> Source m o
	-- myPolyCapL :: Source m a -> Tee m a b o -> Process m b o
	-- myPolyCapL :: Source m a -> Tee3 m a b c o -> Tee m b c o
	--
	-- I need to somehow specify that the monad at the base of the 'mm' stack must
	-- be 'm'. When 'm' is IO, this is represented using a 'MonadIO' constraint:
	--
	-- liftIO :: forall x. IO x -> mm x
	--
	-- There exists a @MonadBase m@ constraint which generalizes 'MonadIO':
	--
	-- liftBase :: forall x. m x -> mm x
	--
	-- But instead of adding an orphan @MonadBase m (ConduitT i o mm)@ instance,
	-- I'll just ask for an extra @forall x. m x -> mm x@ parameter:
	--
	-- myPolyCapL
	-- :: (forall x. m x -> mm x)
	-- => Source m a1 ()
	-- -> ConduitT a1 o mm r
	-- -> mm r

	-- Another way in which the type above isn't quite right is that the output
	-- type 'o' disappears. The fix is quite simple: instead of only concretizing
	-- the very outermost ConduitT layer and leaving the rest abstract, I
	-- concretize the _two_ outermost ConduitT layers:
	myPolyCapL
	:: forall a b o m mm r. (Monad m, Monad mm)
	=> (forall x. m x -> mm x)
	-> ConduitT () a m ()
	-> ConduitT a o (ConduitT b Void mm) r
	-> ConduitT b o mm r
	myPolyCapL liftM src doubleConduit
	= connect src' doubleConduit''
	where
	-- The implementation looks very different from polyCapL's, but it's the
	-- same idea. It just so happens that the conduit API is expressive enough
	-- that we can achieve our goal via several small transformations, without
	-- having to unravel the conduits into sequences of instructions.

	src' :: ConduitT () a (ConduitT b o mm) ()
	src'
	= transPipe (lift . liftM) src

	doubleConduit' :: ConduitT a o (ConduitT b o mm) r
	doubleConduit'
	= transPipe (mapOutput absurd) doubleConduit

	-- The 'doubleConduit' transformations swap the 'Void' and 'o' output
	-- types.
	-- At this point one might wonder why I chose the convention of
	-- using 'Void' for all but the _outermost_ layer. If I had chosen the
	-- innermost layer instead, the 'o' would already be in the right
	-- position, and I wouldn't need to perform any transformations on
	-- 'doubleConduit'!
	-- The reason is simply to provide a more ergonomic experience to the
	-- user: by choosing the outermost layer, the user can emit by writing
	--
	-- emit o
	--
	-- Whereas if I had chosen the innermost layer, the user would have to
	-- write this instead.
	--
	-- lift $ lift $ emit o
	doubleConduit'' :: ConduitT a Void (ConduitT b o mm) r
	doubleConduit''
	= fuseUpstream doubleConduit' outputToInner

	outputToInner :: ConduitT o Void (ConduitT b o mm) ()
	outputToInner = do
	await >>= \case
	Just o -> do
	lift $ yield o
	outputToInner
	Nothing -> do
	pure ()

	-- The types line up, but how does this work? We're compiling down to a single layer, so how did we persuade conduit to magically create a ConduitT with more than one input?
	--
	-- The secret is, we didn't! Conduits are combined in the same way machines are: by lining up and then eliminating matching `yield` and `await` instructions. Thus, after we have attached an input and eliminated a ConduitT layer, all the `await` calls which were reading from that input have been replaced by a fragment of the code from that input, namely the code between two consecutive `yield`s. That's the magic of representing computations as a sequence of instructions, we can splice and rearrange those instructions!
	--
	-- In fact, machines are represented in pretty much the same way as conduits, as a sequence of instructions, they aren't stored as a tree or a graph of instructions as one might expect. Pretty much the only difference is that a machine's `awaits` instruction takes an extra argument specifying which inputs it is awaiting from.


	-- Anyway, since I suspect that most conduit stacks will either have IO or Identity as
	-- a base monad, here are two specializations which fill-in the 'liftM'
	-- parameter.

	-- polyCapIO :: Source IO a -> Process IO a o -> Source IO o
	-- polyCapIO :: Source IO a -> Tee IO a b o -> Process IO b o
	-- polyCapIO :: Source IO a -> Tee3 IO a b c o -> Tee IO b c o
	polyCapIO
	:: forall a b o mm r. MonadIO mm
	=> Source IO a
	-> ConduitT a o (ConduitT b Void mm) r
	-> ConduitT b o mm r
	polyCapIO
	= myPolyCapL liftIO

	polyCap
	:: forall a b o mm r. Monad mm
	=> ConduitT () a Identity ()
	-> ConduitT a o (ConduitT b Void mm) r
	-> ConduitT b o mm r
	polyCap
	= myPolyCapL (pure . runIdentity)


	-- In the previous post, I also implemented a 'polyCapR' function for
	-- converting a fully-saturated 'PolyTee' into a 'Source', so it can be used
	-- with existing machine combinators.
	--
	-- polyCapR
	-- :: PolyTee m '[] b
	-- -> Source m b
	--
	-- With the conduit layers representation, such a function is not needed.
	-- After attaching all but one of the inputs, the result is a single ConduitT
	-- layer, that is, a normal conduit which can already be used with existing
	-- conduit combinators. In particular, 'fuse' can be used to attach the last
	-- input, thus converting the conduit to a source.


	-- We can finally implement the challenge; twice, in order to exercise both
	-- the IO and Identity specializations.

	-- \|
	-- >>> :{
	-- testConduit
	-- $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
	-- $ polyCapIO (Conduit.yieldMany [1..])
	-- $ polyCapIO (Conduit.yieldMany [True, False, False, True, False])
	-- $ exampleIO
	-- :}
	-- foo
	-- bar
	-- quux
	-- it's over
	-- [5,3,3,6,4]
	exampleIO
	:: ConduitT Bool Int
	(ConduitT Int Void
	(ConduitT String Void IO))
	()
	exampleIO = do
	maybeBool <- await
	case maybeBool of
	Nothing -> do
	liftIO $ putStrLn "it's over"
	Just True -> do
	Just int <- lift await
	yield (int + 4)
	exampleIO
	Just False -> do
	Just str <- lift $ lift await
	liftIO $ putStrLn str
	yield $ length str
	exampleIO

	-- \|
	-- >>> :{
	-- testConduit
	-- $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
	-- $ polyCap (Conduit.yieldMany [1..])
	-- $ polyCap (Conduit.yieldMany [True, False, False, True, False])
	-- $ example
	-- :}
	-- [5,3,3,6,4]
	example
	:: forall m. MonadFail m
	=> ConduitT Bool Int
	(ConduitT Int Void
	(ConduitT String Void m))
	()
	example = do
	maybeBool <- await
	case maybeBool of
	Nothing -> do
	pure ()
	Just True -> do
	Just int <- lift await
	yield (int + 4)
	example
	Just False -> do
	Just str <- lift $ lift await
	yield $ length str
	example

	main :: IO ()
	main = do
	putStrLn "typechecks."

	test :: IO ()
	test = do
	doctest ["src/Main.hs"]