This file is my attempt to clarify and formalise my own thoughts about dependency resolution problem.
{-# LANGUAGE DeriveFoldable #-} | |
{-# LANGUAGE FlexibleContexts #-} | |
{-# LANGUAGE FlexibleInstances #-} | |
{-# LANGUAGE FunctionalDependencies #-} | |
{-# LANGUAGE MultiParamTypeClasses #-} | |
{-# LANGUAGE RankNTypes #-} | |
{-# LANGUAGE ScopedTypeVariables #-} | |
{-# LANGUAGE UndecidableInstances #-} | |
-- | This file is my attempt to clarify and formalise my own thoughts about | |
-- dependency resolution problem. | |
module DepProb where | |
-- Imports | |
import Data.Foldable (Foldable (foldMap), all) | |
import Data.List (find) | |
import Data.Map (Map) | |
import qualified Data.Map as Map | |
import Data.Maybe (fromMaybe) | |
import Data.Monoid (All (..), Any (..), Monoid (..), Sum (..)) | |
import Data.Set (Set) | |
import qualified Data.Set as Set | |
import Prelude hiding (all) | |
-- | To describe the dependency solving problem, we start with its solution. | |
-- | |
-- A solution to dependency resolution problem is a set of packages, identified | |
-- by name @n@ ('Distribution.Package.PackageName'), with particular version | |
-- @v@ ('Data.Version.Version') and some additional configuration @a@ (more | |
-- about this later). | |
-- | |
-- /Note:/ With dependent type, we'd like to have @Solution n v (a : n -> v -> | |
-- *) = ...@, i.e. configuration variants can be different per package version. | |
-- | |
-- /Note:/ We could also have per-component solutions (libraries, executables, | |
-- test-suites and benchmarks could be treaten as separate "packages"). | |
newtype Solution n v a = S { getSolution :: Map n (v, a) } | |
-- | The problem itself is identified by /constraints/ that we can have on | |
-- the 'Solution' | |
class Constraint n v a c | c -> n v a where | |
holdsOn :: c -> Solution n v a -> Bool | |
-- | One of the inputs to the problem, is a package database: | |
newtype DB n v a c = DB { getDB :: Map n (Map (v, a) c) } | |
-- | With initial constraint (constraining packages already installed, and | |
-- packages we'd like to install), we can write a type of decision function. | |
type Solve n v a c = Constraint n v a c => c -> DB n v a c -> Maybe (Solution n v a) | |
-- | Now it's possible to talk about whether 'Solution' is valid. We omit | |
-- a notion of installability (i.e. preventing cycles), but it's easy to add. | |
validIn | |
:: forall n v a c. (Constraint n v a c, Ord n, Ord v, Ord a) | |
=> Solution n v a -> DB n v a c -> Bool | |
validIn solution db = getAll $ Map.foldMapWithKey f (getSolution solution) | |
where | |
f :: n -> (v, a) -> All | |
f n va = All $ fromMaybe False $ do | |
m <- Map.lookup n (getDB db) | |
c <- Map.lookup va m | |
return $ c `holdsOn` solution | |
------------------------------------------------------------------------------- | |
-- Solution quality | |
------------------------------------------------------------------------------- | |
-- | Solution quality is difficult topic. Minimal solutions, i.e. plans with | |
-- less packages considered better. This is easy metric to define. Yet when | |
-- splitting packages (e.g. network and network-uri), this metric lies. | |
solutionSizeMetric :: Solution n v a -> Int | |
solutionSizeMetric = Map.size . getSolution | |
-- | There are usually various solutions differing by the versions of packages | |
-- present, we'd prefer plans with recent versions of packages. Yet pointwise | |
-- comparison of solutions isn't total order, as there could be valid solutions | |
-- @foo-1, bar-2@ and @foo-2, bar-1@, but @foo-2, bar-2@ is invalid. | |
-- | |
-- Still it's useful to define partial order. /TODO/ | |
solutionVerLeq :: Solution n v a -> Solution n v a -> Bool | |
solutionVerLeq = undefined | |
-- Thoughts about SMT-solvers: they could generate initial solutions quite fast. | |
-- the disjunction of those few first ones could be used as a lower constraints | |
-- for second iterations. That would be nice to experiment with. | |
-- | |
-- Also as Cabal-test suite has DSL for specifying package indexes, | |
-- one could play with it. | |
------------------------------------------------------------------------------- | |
-- Concrete implementations | |
------------------------------------------------------------------------------- | |
-- | The simplest type which can be 'Constraint'. | |
newtype G n v a = G { gHoldsOn :: Solution n v a -> Bool } | |
instance Constraint n v a (G n v a) where | |
holdsOn = gHoldsOn | |
-- 'G' doesn't expose any underlying structue, so the best decision procedure | |
-- we could have is to go thru all possible solutions and pick first one. | |
-- | |
-- /Nitpick:/ this could be better, as solution procedure could learn about | |
-- conflicting package version, and try to pick the next solution without them. | |
-- It would require slightly different type-signature though. | |
solveG | |
:: forall n v a. (Ord n, Ord v, Ord a) | |
=> [Solution n v a] | |
-> Solve n v a (G n v a) | |
solveG solutions = solve | |
where | |
solve :: Solve n v a (G n v a) | |
solve initialConstraint db = find pred solutions | |
where | |
pred s = initialConstraint `holdsOn` s && validIn s db | |
------------------------------------------------------------------------------- | |
-- Per-package constraints | |
------------------------------------------------------------------------------- | |
-- | Let's assume that each package has a fixed set of dependencies, | |
-- with constraints per package only. | |
-- | |
-- To return to additional configuration: let there be none, i.e. @a ~ ()@. | |
-- This still models manual flags, as their effect can be applied on 'DB' before | |
-- giving it to solving procedure. | |
-- | |
-- I'll call this type 'Old', because this is situation as per @Cabal <1.18@. | |
-- After that 'Cabal' learned about automatic flags. | |
newtype Old n v = Old { getOld :: Map n (v -> Bool) } | |
instance Ord n => Constraint n v () (Old n v) where | |
old `holdsOn` solution = getAll $ Map.foldMapWithKey f (getOld old) | |
where | |
f :: n -> (v -> Bool) -> All | |
f n pred = All $ fromMaybe False $ do | |
(v, _) <- Map.lookup n (getSolution solution) | |
return $ pred v | |
------------------------------------------------------------------------------- | |
-- Disjunctions | |
------------------------------------------------------------------------------- | |
-- | 'Old' constraint formulation reprsents more information, to be exploited | |
-- by the solver. OTOH now we have limited what kind of dependencies we can pose. | |
-- | |
-- Feature most needed, is ability to express disjunctions of different packages. | |
-- For example: | |
-- | |
-- @ | |
-- build-depends: network >= 2.6 && network-uri >= 2.6 || network < 2.6 | |
-- @ | |
-- | |
-- Currently Cabal solves this by having /automatic flags/, which solving | |
-- procedure is free to choose. This can be formulated by small adjustment to | |
-- the 'Old'. Instead of giving single `Map`, we can many. Then the @a@ - | |
-- extra argument would be an index into that list (here we'd like to have | |
-- dependent types, to make indexing safe). | |
-- | |
-- /See:/ <https://github.com/haskell/cabal/issues/2033> | |
-- | |
-- /Note:/ Cabal solver is smart, and considers the intersection of | |
-- dependencies (i.e. dependencies which are same for each configuration). The | |
-- intersection is possible to extract from this formulation as well. | |
newtype Dis n v = Dis { getDis :: [Map n (v -> Bool)] } | |
instance Ord n => Constraint n v Int (Dis n v) where | |
dis `holdsOn` solution = | |
getAny . foldMap (\m -> Any . getAll $ Map.foldMapWithKey f m) $ getDis dis | |
where | |
f :: n -> (v -> Bool) -> All | |
f n pred = All $ fromMaybe False $ do | |
-- here we don't care which index was picked for each package | |
-- it's there to be used by 'validIn'. | |
(v, _) <- Map.lookup n (getSolution solution) | |
return $ pred v | |
------------------------------------------------------------------------------- | |
-- Depends | |
------------------------------------------------------------------------------- | |
-- | Depends proposal is an alternative proposal for disjunctions: | |
-- | |
-- @ | |
-- if depend(network >= 2.6) | |
-- build-depends: network-uri >= 2.6 | |
-- @ | |
-- | |
-- It can be interpreted as an additional implication of constraints. We can apply | |
-- it on any 'Constraint' | |
-- | |
-- /See:/ <https://github.com/haskell/cabal/issues/2033#issuecomment-231772073> | |
-- | |
-- /Note:/ there could be less powerful versions of 'Depends', where premise of | |
-- depends is less powerful than full @c@ (i.e. if we have automatic flags, how | |
-- this features interact). | |
data Depends c = Depends | |
{ depC :: c | |
, depI :: [(c, c)] -- ^ List of possible implications | |
} | |
instance Constraint n v a c => Constraint n v a (Depends c) where | |
dep `holdsOn` solution = depC dep `holdsOn` solution && impl | |
where | |
impl = getAll . foldMap (All . pred) $ depI dep | |
pred (prem, conl) = prem `holdsOn` solution ==> conl `holdsOn` solution | |
------------------------------------------------------------------------------- | |
-- Provides | |
------------------------------------------------------------------------------- | |
-- | Sometimes we want to have mutually exclusive packages. One proposal to solve | |
-- this is 'provides' mechanism: | |
-- | |
-- @ | |
-- provides: orphans-base-foldable-either | |
-- @ | |
-- | |
-- With the idea, that any provided identifier could exist at most once | |
-- in the 'Solution'. | |
-- | |
-- See <https://github.com/haskell/cabal/issues/3061> | |
data Provides i c = Provides | |
{ providesC :: c | |
, providesI :: Set i -- ^ Set of provided identifiers | |
} | |
instance (Ord i, Constraint n v a c) => Constraint n v a (Provides i c) where | |
pro `holdsOn` solution = providesC pro `holdsOn` solution | |
-- | There are different ways to formulate exclusivity of @i@. | |
-- Clearenst way is to do the check on the whole 'Solution'. | |
validInWithProvides | |
:: forall n v a c i. (Constraint n v a c, Ord n, Ord v, Ord a, Ord i) | |
=> Solution n v a -> DB n v a (Provides i c) -> Bool | |
validInWithProvides solution db = validIn solution db && providesCheck | |
where | |
providesCheck = all (<= 1) . Map.foldMapWithKey f $ getSolution solution | |
f n va = fromMaybe mempty $ do | |
m <- Map.lookup n (getDB db) | |
c <- Map.lookup va m | |
return $ MapSum $ setToMap (Sum 1 :: Sum Integer) (providesI c) | |
-- If we also make possible to depend on @provides@ idenifiers, it would | |
-- be possible to force automatic flag selection ('Dis'). This could be | |
-- useful feature for packages providing instances, so they could depend truly | |
-- optionally on large set of dependencies. | |
-- | |
-- @ | |
-- if flag(profunctors) | |
-- -- This identifiers could be scoped by package with some syntax | |
-- provide: aeson-profunctors | |
-- @ | |
-- | |
-- @ | |
-- build-depends: aeson >= 0.11 | |
-- provide-depends: aeson-profunctors | |
-- @ | |
-- | |
-- /TODO/ write 'Constraint' instance | |
data ProDep i c = ProDep | |
{ proDepC :: c | |
, proDepI :: Set i | |
, proDepD :: Set i -- ^ we require this identifiers to present in final 'Solution' | |
} | |
------------------------------------------------------------------------------- | |
-- Constraint proposal | |
------------------------------------------------------------------------------- | |
-- | Alternative to the /Provides/ -proposal is a constraint proposal. | |
-- | |
-- In this framework it's much simpler than 'Provides', as one can see from its | |
-- 'Constraint' implementation. | |
-- | |
-- It also highlights one implicit property of 'Old' and 'Dis', if package name | |
-- is in the map, it should also be in the 'Solution'. | |
-- Packages in 'conCon' doesn't need to be. | |
-- | |
-- See <https://github.com/haskell/cabal/issues/3061> | |
data Con n v c = Con | |
{ conC :: c -- ^ original constraint | |
, conCon :: Map n (v -> Bool) -- ^ additional constraints in 'Old' style. We could want 'Dis' style too, but it's trivial to adopt. | |
} | |
instance (Ord n, Constraint n v a c) => Constraint n v a (Con n v c) where | |
con `holdsOn` solution = conC con `holdsOn` solution && conHolds | |
where | |
conHolds = getAll $ Map.foldMapWithKey f (conCon con) | |
-- | Here we don't require that packages listed in 'conCon' are in the 'Solution'. | |
-- Yet if they are there, constraint must hold. | |
f :: n -> (v -> Bool) -> All | |
f n pred = All $ fromMaybe True $ do | |
(v, _) <- Map.lookup n (getSolution solution) | |
return $ pred v | |
------------------------------------------------------------------------------- | |
-- Conflicts proposaal | |
------------------------------------------------------------------------------- | |
-- | Conflicts proposal is even simpler then 'constraint', yet powerful as it | |
-- introduces negation to the constraint language. | |
-- | |
-- See <https://github.com/haskell/cabal/issues/3061> | |
data Conflict c = Conflict | |
{ conflictPos :: c | |
, conflictNeg :: c | |
} | |
instance Constraint n v a c => Constraint n v a (Conflict c) where | |
c `holdsOn` solution = | |
conflictPos c `holdsOn` solution && | |
not (conflictNeg c `holdsOn` solution) | |
------------------------------------------------------------------------------- | |
-- Utilities | |
------------------------------------------------------------------------------- | |
infix 4 ==> | |
(==>) :: Bool -> Bool -> Bool | |
False ==> _ = True | |
True ==> b = b | |
setToMap :: Ord a => b -> Set a -> Map a b | |
setToMap x = Map.fromList . map f . Set.toList | |
where | |
f k = (k, x) | |
newtype MapSum n i = MapSum { getMapSum :: Map n (Sum i) } | |
instance Foldable (MapSum n) where | |
foldMap f (MapSum m) = foldMap (f . getSum) m | |
instance (Ord n, Num i) => Monoid (MapSum n i) where | |
mempty = MapSum Map.empty | |
mappend (MapSum a) (MapSum b) = MapSum (Map.unionWith mappend a b) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment