Last active
March 16, 2024 00:52
-
-
Save paolino/fd05c46d9c8910f1670adb735db1836b to your computer and use it in GitHub Desktop.
Haskell with the expression problems
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE AllowAmbiguousTypes #-} | |
{-# LANGUAGE DataKinds #-} | |
{-# LANGUAGE GADTs #-} | |
{-# LANGUAGE KindSignatures #-} | |
{-# LANGUAGE RankNTypes #-} | |
{-# LANGUAGE ScopedTypeVariables #-} | |
{-# LANGUAGE TypeApplications #-} | |
{-# LANGUAGE TypeFamilies #-} | |
{-# LANGUAGE TypeOperators #-} | |
module TT where | |
import Data.Functor.Compose (Compose(..)) | |
-- Box ------------------------------------------ | |
data Box x = One x | Box [Box x] | |
deriving (Show) | |
-- let's say we have 2 goods with similarly structured information | |
-- in some sense we could think that the information could be zipped via Either | |
-- Water (first good)---------------------------------------- | |
data WaterContainers | |
= Bottle Int | |
| Jug Int | |
| BagInBox Int | |
| Tank Int | |
deriving (Show) | |
data WaterSource | |
= River String | |
| Lake String | |
| Rain | |
deriving (Show) | |
data WaterInfo = WaterInfo | |
{ waterName :: String | |
, waterQuality :: Int | |
, waterSource :: WaterSource | |
, waterContainer :: Box WaterContainers | |
} | |
deriving (Show) | |
renderWater :: WaterSource -> Box WaterContainers -> String | |
renderWater s c = "WATER! " ++ show s ++ " " ++ show c | |
waterInfoExample :: WaterInfo | |
waterInfoExample = | |
WaterInfo | |
{ waterName = "Acqua Panna" | |
, waterQuality = 10 | |
, waterSource = River "Arno" | |
, waterContainer = Box [One (Bottle 1), Box [One (Jug 2), One (BagInBox 3)]] | |
} | |
-- Bananas (second good) -------------------------------------- | |
data BananasContainers | |
= Bunch Int | |
| Crate Int | |
deriving (Show) | |
data Coo = Coo Int Int | |
deriving (Show) | |
data BananasSource | |
= MyTree | |
| Plantation String Coo | |
| Distributor String | |
deriving (Show) | |
data Color = Yellow | Green | Brown | Black | |
deriving (Show) | |
data BananasInfo = BananasInfo | |
{ bananasName :: String | |
, bananasQuality :: String | |
, bananasColor :: Color | |
, bananasSource :: BananasSource | |
, bananasContainer :: Box BananasContainers | |
} | |
deriving (Show) | |
renderBananas :: BananasSource -> Box BananasContainers -> String | |
renderBananas s c = "BANANAS! " ++ show s ++ " " ++ show c | |
bananasInfoExample :: BananasInfo | |
bananasInfoExample = | |
BananasInfo | |
{ bananasName = "Cavendish" | |
, bananasQuality = "A" | |
, bananasColor = Yellow | |
, bananasSource = Plantation "Ecuador" (Coo 0 0) | |
, bananasContainer = Box [One (Bunch 1), Box [One (Crate 2), One (Bunch 3)]] | |
} | |
-- Problem -------------------------------------- | |
-- write a program that render the output of any good given its info | |
-- Obvious solution | |
obvious :: Either WaterInfo BananasInfo -> String | |
obvious (Left w) = renderWater (waterSource w) (waterContainer w) | |
obvious (Right b) = renderBananas (bananasSource b) (bananasContainer b) | |
-- great, we got the goal. But this is not optimal in terms of code complexity | |
-- In fact, in that obvious function, there are unexploited commonalities between | |
-- the two cases, so we are creating spaghetti code | |
-- we have 3 functions composed in the same way that we would like to abstract | |
-- 1. render | |
-- 2. source | |
-- 3. container | |
-- we would like to write | |
-- render :: Source i -> Container i -> String | |
-- container :: Info i -> Container i | |
-- source :: Info i -> Source i | |
-- program :: AGood Info -> String | |
-- program (AGood info) = render (source info) (container info) | |
-- for some definition of all of them ..... | |
-- then we can reuse them | |
-- given a new function | |
-- volume :: Container i -> Volume | |
-- we can compose it with the existing ones | |
-- program' :: AGood Info -> Volume | |
-- program' (AGood info) = volume (container info) | |
-- We present some approaches that have a common parts | |
-- GoodT index the good at the type level (DataKinds) | |
-- this is common to all the approaches | |
data GoodT = WaterT | BananasT | |
-- Type families applies to first and second approach | |
-- InfoT, SourceT, ContainerT are type families that index the type of the values | |
-- via the GoodT type | |
-- We also have wrap into a newtype to help the compiler in a way I cannot explain yet | |
type family InfoT (i :: GoodT) | |
newtype Info i = Info (InfoT i) | |
type family SourceT (i :: GoodT) | |
newtype Source i = Source (SourceT i) | |
type family ContainerT (i :: GoodT) | |
newtype Container i = Container (ContainerT i) | |
type instance InfoT WaterT = WaterInfo | |
type instance SourceT WaterT = WaterSource | |
type instance ContainerT WaterT = Box WaterContainers | |
type instance InfoT BananasT = BananasInfo | |
type instance SourceT BananasT = BananasSource | |
type instance ContainerT BananasT = Box BananasContainers | |
-- every time we need to handle values that have a different type depending on the good | |
-- we have to add a type family with all the instances for the goods | |
-- this has the same cost with all approaches , type families or data families | |
----------------- multiple classes solution --------------------- | |
-- The naive approach is to use one type class for every feature | |
class RenderC i where | |
renderC :: Source i -> Container i -> String | |
instance RenderC WaterT where | |
renderC (Source s) (Container c) = renderWater s c | |
instance RenderC BananasT where | |
renderC (Source s) (Container c) = renderBananas s c | |
class SourceC i where | |
sourceC :: Info i -> Source i | |
instance SourceC WaterT where | |
sourceC (Info w) = Source $ waterSource w | |
instance SourceC BananasT where | |
sourceC (Info b) = Source $ bananasSource b | |
class ContainerC i where | |
containerC :: Info i -> Container i | |
instance ContainerC WaterT where | |
containerC (Info w) = Container $ waterContainer w | |
instance ContainerC BananasT where | |
containerC (Info b) = Container $ bananasContainer b | |
classy' :: forall i. (RenderC i, SourceC i, ContainerC i) => Info i -> String | |
classy' info = renderC (sourceC info) (containerC info) | |
data AGoodC f | |
= forall i. | |
(RenderC i, SourceC i, ContainerC i) => | |
AGoodC (f i) | |
classy :: AGoodC Info -> String | |
classy (AGoodC info) = classy' info | |
-- we have to judge the approach in terms of scalability so we can disregrad the | |
-- setup cost. But we have to consider what happens when we add a new good and when | |
-- we add a new function. | |
-- 1. Adding a new good | |
-- we have to add a type instance for every value (Info, Source, Container ...) (common part) | |
-- we have to add a class instance for every feature (RenderC, SourceC, ContainerC ...) | |
-- 2. Adding a new function using known values (Source, Container, Info ...) | |
-- we have to add a class | |
-- we have to add a class instance for every good | |
-- we have to handle the diffusion of the new constraint wherever we will use the new function! | |
-- last one is pretty much a killer, the definition of AGood has to change but any | |
-- function that uses the new function will have to depend on the new class constraint | |
-- but at we can do better in some respect if we use GADTs | |
----------------- GADTs TF solution ---------------- | |
-- Use one GADTs and only one class to reify the good from the type level and | |
-- pattern match on the reification to branch the code for the different goods | |
-- A GADT that represents the good type at the value level | |
data Good i where | |
Water :: Good WaterT | |
Bananas :: Good BananasT | |
-- A type class that reifies the good type | |
class GoodIndex i where | |
goodOf :: Good i | |
-- 1 only instance for every new good | |
instance GoodIndex WaterT where | |
goodOf = Water | |
instance GoodIndex BananasT where | |
goodOf = Bananas | |
-- a universal existential type good for any good present or future as long as | |
-- there is a GoodIndex instance for it | |
data AGood f = forall i. (GoodIndex i) => AGood (f i) | |
-- functions! Now can use direct pattern matching on the good type | |
container :: forall i. (GoodIndex i) => Info i -> Container i | |
container = case goodOf @i of | |
Water -> f waterContainer | |
Bananas -> f bananasContainer | |
where | |
f g (Info i) = Container $ g i | |
source :: forall i. (GoodIndex i) => Info i -> Source i | |
source = case goodOf @i of | |
Water -> f waterSource | |
Bananas -> f bananasSource | |
where | |
f g (Info i) = Source $ g i | |
render :: forall i. (GoodIndex i) => Source i -> Container i -> String | |
render (Source s) (Container c) = case goodOf @i of | |
Water -> renderWater s c | |
Bananas -> renderBananas s c | |
program :: AGood Info -> String | |
program (AGood info) = render (source info) (container info) | |
-- with this approach the cost of expanding the code becomes | |
-- 1. Adding a new good | |
-- we have to add a type instance for every value (Info, Source, Container ...) (common part) | |
-- we have to write a pattern clause for every feature (render, source, container ...) | |
-- (not much better, but no instances) | |
-- 2. Adding a new function using known values (Source, Container, Info ...) | |
-- we have to write the function using pattern matching on the good type | |
-- (GREAT SUCCESS, no class , no instances, no diffusion of constraints! | |
-- A note on cost of pattern matching | |
-- The compiler is happy to REMOVE the pattern matching if the feature functions | |
-- are inlineable. So (to be verified) we can expect the cost of pattern matching | |
-- to be zero even in linked code if we add {-# INLINABLE #-} to the feature functions | |
-------------- GADTs data family variation ---------------- | |
-- here we drop the newtype and use data families to define the types | |
data family InfoF (i :: GoodT) | |
data family SourceF (i :: GoodT) | |
data family ContainerF (i :: GoodT) | |
-- the cost is in the invention of constructors for the data families | |
data instance InfoF WaterT = InfoW WaterInfo | |
data instance ContainerF WaterT = ContainerW (Box WaterContainers) | |
data instance SourceF WaterT = SourceW WaterSource | |
data instance InfoF BananasT = InfoB BananasInfo | |
data instance SourceF BananasT = SourceB BananasSource | |
data instance ContainerF BananasT = ContainerB (Box BananasContainers) | |
renderF :: forall i. (GoodIndex i) => SourceF i -> ContainerF i -> String | |
renderF = case goodOf @i of | |
Water -> \(SourceW s) (ContainerW c) -> renderWater s c | |
Bananas -> \(SourceB s) (ContainerB c) -> renderBananas s c | |
containerF :: forall i. (GoodIndex i) => InfoF i -> ContainerF i | |
containerF = case goodOf @i of | |
Water -> \(InfoW i) -> ContainerW $ waterContainer i | |
Bananas -> \(InfoB i) -> ContainerB $ bananasContainer i | |
sourceF :: forall i. (GoodIndex i) => InfoF i -> SourceF i | |
sourceF = case goodOf @i of | |
Water -> \(InfoW i) -> SourceW $ waterSource i | |
Bananas -> \(InfoB i) -> SourceB $ bananasSource i | |
programF :: AGood InfoF -> String | |
programF (AGood info) = renderF (sourceF info) (containerF info) | |
---------------- GADTs data family with invertion ---------------- | |
--- finally a radical approach which invert the code dependency stating that the | |
-- common functionalities were known before data definition | |
-- and are the unique index of the goods data | |
-- now we can avoid the data constructor as well | |
data family InfoFF (i :: GoodT) | |
data family SourceFF (i :: GoodT) | |
data family ContainerFF (i :: GoodT) | |
data instance InfoFF WaterT = WaterInfo' | |
{ waterName' :: String | |
, waterQuality' :: Int | |
, waterSource' :: SourceFF WaterT | |
, waterContainer' :: Box (ContainerFF WaterT) | |
} | |
deriving (Show) | |
data instance ContainerFF WaterT | |
= Bottle' Int | |
| Jug' Int | |
| BagInBox' Int | |
| Tank' Int | |
deriving (Show) | |
data instance SourceFF WaterT = River' String | Lake' String | Rain' | |
deriving (Show) | |
data instance InfoFF BananasT = BananasInfo' | |
{ bananasName' :: String | |
, bananasQuality' :: String | |
, bananasColor' :: Color | |
, bananasSource' :: SourceFF BananasT | |
, bananasContainer' :: Box (ContainerFF BananasT) | |
} | |
deriving (Show) | |
data instance ContainerFF BananasT | |
= Bunch' Int | |
| Crate' Int | |
deriving (Show) | |
data instance SourceFF BananasT | |
= MyTree' | |
| Plantation' String Coo | |
| Distributor' String | |
deriving (Show) | |
renderWater' :: SourceFF WaterT -> Box (ContainerFF WaterT) -> String | |
renderWater' s c = "WATER! " ++ show s ++ " " ++ show c | |
renderBananas' :: SourceFF BananasT -> Box (ContainerFF BananasT) -> String | |
renderBananas' s c = "BANANAS! " ++ show s ++ " " ++ show c | |
renderFF :: forall i. (GoodIndex i) => SourceFF i -> (Compose Box ContainerFF) i -> String | |
renderFF = case goodOf @i of | |
Water -> \s c -> renderWater' s $ getCompose c | |
Bananas -> \s c -> renderBananas' s $ getCompose c | |
containerFF :: forall i. (GoodIndex i) => InfoFF i -> (Compose Box ContainerFF) i | |
containerFF = case goodOf @i of | |
Water -> Compose . waterContainer' | |
Bananas -> Compose . bananasContainer' | |
sourceFF :: forall i. (GoodIndex i) => InfoFF i -> SourceFF i | |
sourceFF = case goodOf @i of | |
Water -> waterSource' | |
Bananas -> bananasSource' | |
programFF :: AGood InfoFF -> String | |
programFF (AGood info) = renderFF (sourceFF info) (containerFF info) | |
-- One particular thing I do not like about the data family full approach is | |
-- that data is indexed by only one kind of types (GoodT) which means that if | |
-- we spot another angle of commonality we cannot reuse the data families and we | |
-- have to resort to type familes again. So which angle of commonality is the best | |
-- to index the data families? This looks arbitrary to me. | |
-- Moreover data families becomes a dependency of the data, so that implementing | |
-- data becomes dedicated to the data families. Reusing that data in other contexts | |
-- becomes awkward because the types now always mention the data families. | |
-- In fact most of the time we want to use this machinary on datatypes that already exists | |
-- so we are out of luck. | |
-- Finally the Compose stuff is annoying and probably doesn't scale well. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment