Skip to content

Instantly share code, notes, and snippets.

@paolino
Last active March 16, 2024 00:52
Show Gist options
  • Save paolino/fd05c46d9c8910f1670adb735db1836b to your computer and use it in GitHub Desktop.
Save paolino/fd05c46d9c8910f1670adb735db1836b to your computer and use it in GitHub Desktop.
Haskell with the expression problems
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module TT where
import Data.Functor.Compose (Compose(..))
-- Box ------------------------------------------
data Box x = One x | Box [Box x]
deriving (Show)
-- let's say we have 2 goods with similarly structured information
-- in some sense we could think that the information could be zipped via Either
-- Water (first good)----------------------------------------
data WaterContainers
= Bottle Int
| Jug Int
| BagInBox Int
| Tank Int
deriving (Show)
data WaterSource
= River String
| Lake String
| Rain
deriving (Show)
data WaterInfo = WaterInfo
{ waterName :: String
, waterQuality :: Int
, waterSource :: WaterSource
, waterContainer :: Box WaterContainers
}
deriving (Show)
renderWater :: WaterSource -> Box WaterContainers -> String
renderWater s c = "WATER! " ++ show s ++ " " ++ show c
waterInfoExample :: WaterInfo
waterInfoExample =
WaterInfo
{ waterName = "Acqua Panna"
, waterQuality = 10
, waterSource = River "Arno"
, waterContainer = Box [One (Bottle 1), Box [One (Jug 2), One (BagInBox 3)]]
}
-- Bananas (second good) --------------------------------------
data BananasContainers
= Bunch Int
| Crate Int
deriving (Show)
data Coo = Coo Int Int
deriving (Show)
data BananasSource
= MyTree
| Plantation String Coo
| Distributor String
deriving (Show)
data Color = Yellow | Green | Brown | Black
deriving (Show)
data BananasInfo = BananasInfo
{ bananasName :: String
, bananasQuality :: String
, bananasColor :: Color
, bananasSource :: BananasSource
, bananasContainer :: Box BananasContainers
}
deriving (Show)
renderBananas :: BananasSource -> Box BananasContainers -> String
renderBananas s c = "BANANAS! " ++ show s ++ " " ++ show c
bananasInfoExample :: BananasInfo
bananasInfoExample =
BananasInfo
{ bananasName = "Cavendish"
, bananasQuality = "A"
, bananasColor = Yellow
, bananasSource = Plantation "Ecuador" (Coo 0 0)
, bananasContainer = Box [One (Bunch 1), Box [One (Crate 2), One (Bunch 3)]]
}
-- Problem --------------------------------------
-- write a program that render the output of any good given its info
-- Obvious solution
obvious :: Either WaterInfo BananasInfo -> String
obvious (Left w) = renderWater (waterSource w) (waterContainer w)
obvious (Right b) = renderBananas (bananasSource b) (bananasContainer b)
-- great, we got the goal. But this is not optimal in terms of code complexity
-- In fact, in that obvious function, there are unexploited commonalities between
-- the two cases, so we are creating spaghetti code
-- we have 3 functions composed in the same way that we would like to abstract
-- 1. render
-- 2. source
-- 3. container
-- we would like to write
-- render :: Source i -> Container i -> String
-- container :: Info i -> Container i
-- source :: Info i -> Source i
-- program :: AGood Info -> String
-- program (AGood info) = render (source info) (container info)
-- for some definition of all of them .....
-- then we can reuse them
-- given a new function
-- volume :: Container i -> Volume
-- we can compose it with the existing ones
-- program' :: AGood Info -> Volume
-- program' (AGood info) = volume (container info)
-- We present some approaches that have a common parts
-- GoodT index the good at the type level (DataKinds)
-- this is common to all the approaches
data GoodT = WaterT | BananasT
-- Type families applies to first and second approach
-- InfoT, SourceT, ContainerT are type families that index the type of the values
-- via the GoodT type
-- We also have wrap into a newtype to help the compiler in a way I cannot explain yet
type family InfoT (i :: GoodT)
newtype Info i = Info (InfoT i)
type family SourceT (i :: GoodT)
newtype Source i = Source (SourceT i)
type family ContainerT (i :: GoodT)
newtype Container i = Container (ContainerT i)
type instance InfoT WaterT = WaterInfo
type instance SourceT WaterT = WaterSource
type instance ContainerT WaterT = Box WaterContainers
type instance InfoT BananasT = BananasInfo
type instance SourceT BananasT = BananasSource
type instance ContainerT BananasT = Box BananasContainers
-- every time we need to handle values that have a different type depending on the good
-- we have to add a type family with all the instances for the goods
-- this has the same cost with all approaches , type families or data families
----------------- multiple classes solution ---------------------
-- The naive approach is to use one type class for every feature
class RenderC i where
renderC :: Source i -> Container i -> String
instance RenderC WaterT where
renderC (Source s) (Container c) = renderWater s c
instance RenderC BananasT where
renderC (Source s) (Container c) = renderBananas s c
class SourceC i where
sourceC :: Info i -> Source i
instance SourceC WaterT where
sourceC (Info w) = Source $ waterSource w
instance SourceC BananasT where
sourceC (Info b) = Source $ bananasSource b
class ContainerC i where
containerC :: Info i -> Container i
instance ContainerC WaterT where
containerC (Info w) = Container $ waterContainer w
instance ContainerC BananasT where
containerC (Info b) = Container $ bananasContainer b
classy' :: forall i. (RenderC i, SourceC i, ContainerC i) => Info i -> String
classy' info = renderC (sourceC info) (containerC info)
data AGoodC f
= forall i.
(RenderC i, SourceC i, ContainerC i) =>
AGoodC (f i)
classy :: AGoodC Info -> String
classy (AGoodC info) = classy' info
-- we have to judge the approach in terms of scalability so we can disregrad the
-- setup cost. But we have to consider what happens when we add a new good and when
-- we add a new function.
-- 1. Adding a new good
-- we have to add a type instance for every value (Info, Source, Container ...) (common part)
-- we have to add a class instance for every feature (RenderC, SourceC, ContainerC ...)
-- 2. Adding a new function using known values (Source, Container, Info ...)
-- we have to add a class
-- we have to add a class instance for every good
-- we have to handle the diffusion of the new constraint wherever we will use the new function!
-- last one is pretty much a killer, the definition of AGood has to change but any
-- function that uses the new function will have to depend on the new class constraint
-- but at we can do better in some respect if we use GADTs
----------------- GADTs TF solution ----------------
-- Use one GADTs and only one class to reify the good from the type level and
-- pattern match on the reification to branch the code for the different goods
-- A GADT that represents the good type at the value level
data Good i where
Water :: Good WaterT
Bananas :: Good BananasT
-- A type class that reifies the good type
class GoodIndex i where
goodOf :: Good i
-- 1 only instance for every new good
instance GoodIndex WaterT where
goodOf = Water
instance GoodIndex BananasT where
goodOf = Bananas
-- a universal existential type good for any good present or future as long as
-- there is a GoodIndex instance for it
data AGood f = forall i. (GoodIndex i) => AGood (f i)
-- functions! Now can use direct pattern matching on the good type
container :: forall i. (GoodIndex i) => Info i -> Container i
container = case goodOf @i of
Water -> f waterContainer
Bananas -> f bananasContainer
where
f g (Info i) = Container $ g i
source :: forall i. (GoodIndex i) => Info i -> Source i
source = case goodOf @i of
Water -> f waterSource
Bananas -> f bananasSource
where
f g (Info i) = Source $ g i
render :: forall i. (GoodIndex i) => Source i -> Container i -> String
render (Source s) (Container c) = case goodOf @i of
Water -> renderWater s c
Bananas -> renderBananas s c
program :: AGood Info -> String
program (AGood info) = render (source info) (container info)
-- with this approach the cost of expanding the code becomes
-- 1. Adding a new good
-- we have to add a type instance for every value (Info, Source, Container ...) (common part)
-- we have to write a pattern clause for every feature (render, source, container ...)
-- (not much better, but no instances)
-- 2. Adding a new function using known values (Source, Container, Info ...)
-- we have to write the function using pattern matching on the good type
-- (GREAT SUCCESS, no class , no instances, no diffusion of constraints!
-- A note on cost of pattern matching
-- The compiler is happy to REMOVE the pattern matching if the feature functions
-- are inlineable. So (to be verified) we can expect the cost of pattern matching
-- to be zero even in linked code if we add {-# INLINABLE #-} to the feature functions
-------------- GADTs data family variation ----------------
-- here we drop the newtype and use data families to define the types
data family InfoF (i :: GoodT)
data family SourceF (i :: GoodT)
data family ContainerF (i :: GoodT)
-- the cost is in the invention of constructors for the data families
data instance InfoF WaterT = InfoW WaterInfo
data instance ContainerF WaterT = ContainerW (Box WaterContainers)
data instance SourceF WaterT = SourceW WaterSource
data instance InfoF BananasT = InfoB BananasInfo
data instance SourceF BananasT = SourceB BananasSource
data instance ContainerF BananasT = ContainerB (Box BananasContainers)
renderF :: forall i. (GoodIndex i) => SourceF i -> ContainerF i -> String
renderF = case goodOf @i of
Water -> \(SourceW s) (ContainerW c) -> renderWater s c
Bananas -> \(SourceB s) (ContainerB c) -> renderBananas s c
containerF :: forall i. (GoodIndex i) => InfoF i -> ContainerF i
containerF = case goodOf @i of
Water -> \(InfoW i) -> ContainerW $ waterContainer i
Bananas -> \(InfoB i) -> ContainerB $ bananasContainer i
sourceF :: forall i. (GoodIndex i) => InfoF i -> SourceF i
sourceF = case goodOf @i of
Water -> \(InfoW i) -> SourceW $ waterSource i
Bananas -> \(InfoB i) -> SourceB $ bananasSource i
programF :: AGood InfoF -> String
programF (AGood info) = renderF (sourceF info) (containerF info)
---------------- GADTs data family with invertion ----------------
--- finally a radical approach which invert the code dependency stating that the
-- common functionalities were known before data definition
-- and are the unique index of the goods data
-- now we can avoid the data constructor as well
data family InfoFF (i :: GoodT)
data family SourceFF (i :: GoodT)
data family ContainerFF (i :: GoodT)
data instance InfoFF WaterT = WaterInfo'
{ waterName' :: String
, waterQuality' :: Int
, waterSource' :: SourceFF WaterT
, waterContainer' :: Box (ContainerFF WaterT)
}
deriving (Show)
data instance ContainerFF WaterT
= Bottle' Int
| Jug' Int
| BagInBox' Int
| Tank' Int
deriving (Show)
data instance SourceFF WaterT = River' String | Lake' String | Rain'
deriving (Show)
data instance InfoFF BananasT = BananasInfo'
{ bananasName' :: String
, bananasQuality' :: String
, bananasColor' :: Color
, bananasSource' :: SourceFF BananasT
, bananasContainer' :: Box (ContainerFF BananasT)
}
deriving (Show)
data instance ContainerFF BananasT
= Bunch' Int
| Crate' Int
deriving (Show)
data instance SourceFF BananasT
= MyTree'
| Plantation' String Coo
| Distributor' String
deriving (Show)
renderWater' :: SourceFF WaterT -> Box (ContainerFF WaterT) -> String
renderWater' s c = "WATER! " ++ show s ++ " " ++ show c
renderBananas' :: SourceFF BananasT -> Box (ContainerFF BananasT) -> String
renderBananas' s c = "BANANAS! " ++ show s ++ " " ++ show c
renderFF :: forall i. (GoodIndex i) => SourceFF i -> (Compose Box ContainerFF) i -> String
renderFF = case goodOf @i of
Water -> \s c -> renderWater' s $ getCompose c
Bananas -> \s c -> renderBananas' s $ getCompose c
containerFF :: forall i. (GoodIndex i) => InfoFF i -> (Compose Box ContainerFF) i
containerFF = case goodOf @i of
Water -> Compose . waterContainer'
Bananas -> Compose . bananasContainer'
sourceFF :: forall i. (GoodIndex i) => InfoFF i -> SourceFF i
sourceFF = case goodOf @i of
Water -> waterSource'
Bananas -> bananasSource'
programFF :: AGood InfoFF -> String
programFF (AGood info) = renderFF (sourceFF info) (containerFF info)
-- One particular thing I do not like about the data family full approach is
-- that data is indexed by only one kind of types (GoodT) which means that if
-- we spot another angle of commonality we cannot reuse the data families and we
-- have to resort to type familes again. So which angle of commonality is the best
-- to index the data families? This looks arbitrary to me.
-- Moreover data families becomes a dependency of the data, so that implementing
-- data becomes dedicated to the data families. Reusing that data in other contexts
-- becomes awkward because the types now always mention the data families.
-- In fact most of the time we want to use this machinary on datatypes that already exists
-- so we are out of luck.
-- Finally the Compose stuff is annoying and probably doesn't scale well.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment