gelisam/ValueBasedDebugging.hs

## ValueBasedDebugging.hs
-- A proof of concept for a value-based debugging technique: instead of
-- debugging by imperatively stepping through the evaluation, we instead
-- produce a value representing the evaluation tree.
--
-- This value can then be manipulated like any other value, e.g. instead adding
-- a breakpoint and stepping until the breakpoint is hit, you can write an
-- ordinary function which recurs into the evaluation tree until it finds a node
-- of interest. This way, more complex conditions can be easily expressed, e.g.
-- "find the smallest subtree in which a call of 'double' does not result in an
-- output which is the double of its input".

{-# LANGUAGE PatternGuards #-}
module ValueBasedDebugging where

import Data.Foldable (minimumBy)
import Data.Ord (comparing)


---------------------
-- Lambda calculus --
---------------------

-- For this proof of concept, we use a simple lambda calculus with integers and
-- addition. Everything in this section is standard.

data Term
  = Var String
  | Lam String Term
  | App Term Term
  | IntLit Int
  | Add Term Term
  deriving (Eq, Show)

data Value
  = Closure Env String Term
  | IntVal Int
  deriving (Eq, Show)

type Env = [(String, Value)]

-- Our language does not have a 'let' construct, but it is easy to define one in
-- terms of the constructs it does have.
let_
  :: String
  -> Term
  -> Term
  -> Term
let_ x xDefinition body
  = App (Lam x body) xDefinition

-- An example program with an intentional bug. We are trying to calculate
-- @2*2*2*2 = 16@, but we accidentally defined @double x = x + 2@ instead of
-- @double x = x + x@, so the result is @2+2+2+2 = 8@.
example
  :: Term
example
  = let_ "double" (Lam "x" (Add (Var "x")
                                (IntLit 2)))  -- bug!
  $ App (Var "double")
  $ App (Var "double")
  $ App (Var "double")
  $ IntLit 2

-- |
-- Evaluation can either succeed with a value, or fail with an error message.
--
-- >>> eval $ Add (IntLit 2) (IntLit 3)
-- Right (IntVal 5)
-- >>> eval example
-- Right (IntVal 8)
-- >>> eval $ Add (IntLit 2) (Var "x")
-- Left "Variable \"x\" not in scope"
evalWith
  :: Env
  -> Term
  -> Either String Value
evalWith env (Var x) = case lookup x env of
  Just v -> do
    pure v
  Nothing -> do
    Left $ "Variable " ++ show x ++ " not in scope"
evalWith env (Lam x body) = do
  pure $ Closure env x body
evalWith env (App f e) = do
  fv <- evalWith env f
  ev <- evalWith env e
  case fv of
    Closure capturedEnv x body -> do
      evalWith ((x,ev) : capturedEnv) body
    IntVal n -> do
      Left $ "Integer " ++ show n ++ " is not a function"
evalWith _ (IntLit n) = do
  pure $ IntVal n
evalWith env (Add e1 e2) = do
  v1 <- evalWith env e1
  v2 <- evalWith env e2
  case v1 of
    IntVal n1 -> case v2 of
      IntVal n2 -> do
        pure $ IntVal (n1 + n2)
      Closure {} -> do
        error "Functions cannot be added"
    Closure {} -> do
      error "Functions cannot be added"

eval
  :: Term
  -> Either String Value
eval
  = evalWith []


--------------------------
-- Value-based debugger --
--------------------------

-- At each node of the evaluation tree, an input term is evaluated to an output
-- value in an environment. It is possible to look at the output right away, or
-- to dive down the 'details' field to look at the first evaluation step towards
-- that output.
data EvalTree = EvalTree
  { environment
      :: Env
  , input
      :: Term
  , output
      :: Either String Value
  , details
      :: EvalStep
  }
  deriving Show

-- Some input terms are already values, and thus do not require any further
-- evaluation. Other terms require the evaluation of multiple sub-terms, which
-- is why the evaluation tree is a tree.
--
-- While 'eval' chooses to evaluate those sub-terms in a specific order, for
-- debugging it is more convenient to allow the user to choose which sub-term to
-- explore next.
data EvalStep
  = EvalVar
  | EvalLam
  | EvalApp AppDetails
  | EvalIntLit
  | EvalAdd EvalTree EvalTree
  deriving Show

-- Ideally, 'appFun' evaluates to (the closure representation of) @Lam "x" body@,
-- in which case 'appArgName' is @Just "x"@ and 'appResult' is the evaluation
-- tree for @body@ in a extended environment in which @x@ is bound to the output
-- of 'appArg'. However, another possibility is that the call is about to fail
-- with a type error, because 'appFun' does not evaluate to a function. In that
-- case, 'appArgName' and 'appResult' are both 'Nothing'.
data AppDetails = AppDetails
  { appFun
      :: EvalTree
  , appArgName
      :: Maybe String
  , appArg
      :: EvalTree
  , appResult
      :: Maybe EvalTree
  }
  deriving Show

-- |
-- Evaluate an input term into an output value, while recording the evaluation
-- tree. Note that the output value is computed using the 'evalWith' function
-- from the previous section, _not_ by calculating the entire evaluation tree
-- and then throwing away the debugging information. This allows the user to
-- take advantage of laziness, by only generating debugging information for the
-- parts of the evaluation tree that are actually needed.
--
-- For example, here is a computation consisting of two sub-expressions. We will
-- generate debugging information for the @3 + 4@ sub-expression but not for the
-- @1 + 2@ sub-expression.
--
-- >>> node1234 = debug $ (IntLit 1 `Add` IntLit 2) `Add` (IntLit 3 `Add` IntLit 4)
--
-- Step into the overall expression:
-- >>> EvalAdd node12 node34 = details node1234
--
-- Step over the @1 + 2@ sub-expression:
-- >>> output node12
-- Right (IntVal 3)
--
-- Step into the @3 + 4@ sub-expression:
-- >>> EvalAdd node3 node4 = details node34
--
-- Evaluate the leaves:
-- >>> output node3
-- Right (IntVal 3)
-- >>> output node4
-- Right (IntVal 4)
--
-- Step out of the @3 + 4@ sub-expression:
-- >>> output node34
-- Right (IntVal 7)

-- Step out of the overall expression:
-- >>> output node1234
-- Right (IntVal 10)
debugWith
  :: Env
  -> Term
  -> EvalTree
debugWith env term = EvalTree
  { environment
      = env
  , input
      = term
  , output
      = evalWith env term
  , details
      = go term
  }
  where
    go
      :: Term
      -> EvalStep
    go (Var {})
      = EvalVar
    go (Lam {})
      = EvalLam
    go (App funcTerm argTerm)
      = let funcNode = debugWith env funcTerm
            argNode = debugWith env argTerm
     in EvalApp $ AppDetails
          { appFun
              = funcNode
          , appArgName
              = do
                  Right (Closure _ argName _) <- pure $ output funcNode
                  pure argName
          , appArg
              = argNode
          , appResult
              = do
                  Right (Closure capturedEnv argName body) <- pure $ output funcNode
                  Right argValue <- pure $ output argNode
                  pure $ debugWith ((argName, argValue) : capturedEnv) body
          }
    go (IntLit {})
      = EvalIntLit
    go (Add e1 e2)
      = let node1 = debugWith env e1
            node2 = debugWith env e2
     in EvalAdd node1 node2

debug
  :: Term
  -> EvalTree
debug = debugWith []


-----------------
-- Breakpoints --
-----------------

exampleEvalTree
  :: EvalTree
exampleEvalTree
  = debug example

-- We've seen what the usual "step into", "step over", and "step out" commands
-- of a debugger translate to in the value-based debugging world. What about
-- breakpoints? In an imperative debugger, running up to a breakpoint means to
-- automatically step into and out of sub-calls until a particular line is
-- encountered. Or, for data breakpoints, until a particular variable is set to
-- a value which matches a particular predicate. In a value-based debugger, we
-- generalize the concept: a breakpoint is a predicate on the evaluation tree.
-- The predicate can look at the line number (or it could if our term
-- representation had line numbers), at the values of the variables in the
-- environment, or at anything else which can be computed from the evaluation
-- tree.

-- |
-- >>> length $ allNodes $ debug $ Add (IntLit 2) (IntLit 3)
-- 3
allNodes
  :: EvalTree
  -> [EvalTree]
allNodes node
  = node
  : case details node of
      EvalVar
        -> []
      EvalLam
        -> []
      EvalApp (AppDetails f _ arg result)
        -> allNodes f ++ allNodes arg ++ maybe [] allNodes result
      EvalIntLit
        -> []
      EvalAdd e1 e2
        -> allNodes e1 ++ allNodes e2

-- |
-- Look for points in the evaluation where a particular variable is set to a
-- value of interest.
--
-- >>> nodes = allNodes exampleEvalTree
-- >>> matches = filter (dataBreakpoint "x" (== IntVal 4)) nodes
-- >>> mapM_ (putStrLn . prettyEval) matches
-- (x + 2) --> 6
-- x --> 4
-- 2 --> 2
dataBreakpoint
  :: String
  -> (Value -> Bool)
  -> EvalTree -> Bool
dataBreakpoint varName predicate node
  | Just value <- lookup varName (environment node)
    = predicate value
  | otherwise
    = False


------------------------
-- Sub-program finder --
------------------------

-- Being able to write an _arbitrary_ predicate, instead of being limited to
-- line numbers and data breakpoints, unlocks a new world of debugging queries.
--
-- For example, our example program is producing the wrong answer, but let's
-- pretend that the code is so large that we can't understand why just by
-- looking at the code. Let's use the evaluation tree to simplify the problem,
-- by finding the smaller sub-program which also produces the wrong answer.
--
-- To do this, we will use an oracle which always produces the right answer. At
-- first glance, it might sound silly to assume that we have such an oracle,
-- because if we had one, then we wouldn't need to debug our program, we could
-- just trash our program and use our oracle instead. But oracles are actually a
-- common technique for testing refactorings and optimizations: the oracle is
-- simply the program before the refactoring, or the program without the
-- optimization.

-- |
-- >>> isCallTo "double" $ App (Var "double") (IntLit 2)
-- True
-- >>> isCallTo "double" $ App (Var "triple") (IntLit 2)
-- False
isCallTo
  :: String
  -> Term -> Bool
isCallTo expectedFuncName (App (Var actualFuncName) _)
  = actualFuncName == expectedFuncName
isCallTo _ _
  = False

-- |
-- >>> mapM_ (putStrLn . prettyEval) doubleCalls
-- (double (double (double 2))) --> 8
-- (double (double 2)) --> 6
-- (double 2) --> 4
doubleCalls
  :: [EvalTree]
doubleCalls
  = filter (isCallTo "double" . input)
  $ allNodes exampleEvalTree

-- |
-- >>> mismatches = filter (not . matchesOracle (* 2)) doubleCalls
-- >>> mapM_ (putStrLn . prettyEval) mismatches
-- (double (double (double 2))) --> 8
-- (double (double 2)) --> 6
matchesOracle
  :: (Int -> Int)
  -> EvalTree -> Bool
matchesOracle oracle node
  | EvalApp (AppDetails _ _ argNode _) <- details node
  , Right (IntVal argInt) <- output argNode
  , Right (IntVal actualOutput) <- output node
    = actualOutput == oracle argInt
  | otherwise
    = False

-- |
-- >>> size $ Add (IntLit 2) (IntLit 3)
-- 3
-- >>> map (size . input) doubleCalls
-- [7,5,3]
size
  :: Term
  -> Int
size (Var {})
  = 1
size (Lam _ body)
  = 1 + size body
size (App f e)
  = 1 + size f + size e
size (IntLit {})
  = 1
size (Add e1 e2)
  = 1 + size e1 + size e2

-- |
-- >>> putStrLn $ prettyEval smallestSubprogramDemonstratingTheBug
-- (double (double 2)) --> 6
smallestSubprogramDemonstratingTheBug
  :: EvalTree
smallestSubprogramDemonstratingTheBug
  = minimumBy (comparing (size . input))
  $ filter (not . matchesOracle (* 2))
  $ doubleCalls

-- Tada! We've found the smallest sub-program which demonstrates the bug. Isn't
-- value-based debugging amazing?


------------------------
-- Minimal repro case --
------------------------

-- But we are not done yet! We didn't perform a series of actions on an
-- ephemeral instance of our program, we wrote a _function_, and functions are
-- reusable. This means we can use this same function to debug the next program.
-- Or a variant of the current program. That's right, since the evaluation tree
-- is an ordinary value, this means we are not limited to debugging the program
-- as it is, we can also generate alternative implementations on the fly and
-- examine what would have been if we had written the program differently.
--
-- For example, let's say that the sub-program we found is still too complex.
-- We can make systematic simplifications to the program and test whether it
-- still exhibits the buggy behaviour. If any further simplification causes the
-- buggy behaviour to go away, then we have found a minimal repro case.

-- |
-- Each result makes a single simplification to the code.
-- Returns the empty list if no further simplifications are possible.
--
-- >>> simplerTerms = simplify $ Add (IntLit 2) (IntLit 3)
-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
-- 2
-- 3
-- (1 + 3)
-- (2 + 2)
--
-- >>> simplerTerms = simplify example
-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
-- ((\double -> (double (double 4))) (\x -> (x + 2)))
-- ((\double -> (double (double (double 1)))) (\x -> (x + 2)))
-- ((\double -> (double (double (double 2)))) (\x -> x))
-- ((\double -> (double (double (double 2)))) (\x -> 2))
-- ((\double -> (double (double (double 2)))) (\x -> (x + 1)))
--
-- >>> simplerTerms = simplify $ input smallestSubprogramDemonstratingTheBug
-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
-- (double 4)
-- (double (double 1))
simplify
  :: Term
  -> [Term]
simplify (Var {})
  = []
simplify (Lam x body)
  = [Lam x body' | body' <- simplify body]
simplify (App f@(Var "double") e@(IntLit n))
  = [IntLit (2 * n)]  -- replace a sub-call with the oracle's correct result
 ++ [ App f' e | f' <- simplify f ]
 ++ [ App f e' | e' <- simplify e ]
simplify (App f e)
  = [ App f' e | f' <- simplify f ]
 ++ [ App f e' | e' <- simplify e ]
simplify (IntLit n)
  | n > 0
    = [IntLit (n-1)]
  | n < 0
    = [IntLit (n+1)]
  | otherwise
    = []
simplify (Add e1 e2)
  = [e1, e2]
 ++ [ Add e1' e2 | e1' <- simplify e1 ]
 ++ [ Add e1 e2' | e2' <- simplify e2 ]

-- |
-- >>> env = environment smallestSubprogramDemonstratingTheBug
-- >>> term = input smallestSubprogramDemonstratingTheBug
-- >>> incorrect = not . matchesOracle (* 2) . debugWith env
-- >>> predicate term = isCallTo "double" term && incorrect term
-- >>> reproCase = smallestReproCase predicate term
-- >>> putStrLn $ prettyEval $ debugWith env reproCase
-- (double 3) --> 5
smallestReproCase
  :: (Term -> Bool)  -- True if the program exhibits the bug
  -> Term  -- the predicate must return True for this original term
  -> Term
smallestReproCase predicate originalTerm
  = case filter predicate $ simplify originalTerm of
      simplerTerm : _
        -> smallestReproCase predicate simplerTerm
      []
        -> originalTerm

-- Tada! We've found the smallest repro case. Isn't value-based debugging
-- amazing? Let's implement something like this for a non-toy language!


---------------------
-- Pretty printing --
---------------------

-- Nothing interesting in the remainder of this file, just pretty printing
-- functions to make the output of the examples more readable.

-- |
-- >>> putStrLn $ prettyTerm $ Add (IntLit 2) (IntLit 3)
-- (2 + 3)
-- >>> putStrLn $ prettyTerm example
-- ((\double -> (double (double (double 2)))) (\x -> (x + 2)))
prettyTerm
  :: Term
  -> String
prettyTerm (Var x)
  = x
prettyTerm (Lam x body)
  = "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
prettyTerm (App f e)
  = "(" ++ prettyTerm f ++ " " ++ prettyTerm e ++ ")"
prettyTerm (IntLit n)
  = show n
prettyTerm (Add e1 e2)
  = "(" ++ prettyTerm e1 ++ " + " ++ prettyTerm e2 ++ ")"

-- |
-- >>> putStrLn $ prettyValue $ IntVal 2
-- 2
-- >>> putStrLn $ prettyValue $ Closure [] "x" $ Add (Var "x") (IntLit 2)
-- (\x -> (x + 2))
prettyValue
  :: Value
  -> String
prettyValue (Closure _ x body)
  = "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
prettyValue (IntVal n)
  = show n

-- |
-- >>> putStrLn $ prettyResult $ eval $ Add (IntLit 2) (IntLit 3)
-- 5
-- >>> putStrLn $ prettyResult $ eval example
-- 8
prettyResult
  :: Either String Value
  -> String
prettyResult (Right v)
  = prettyValue v
prettyResult (Left err)
  = "Error: " ++ err

-- |
-- >>> putStrLn $ prettyEval $ debug $ Add (IntLit 2) (IntLit 3)
-- (2 + 3) --> 5
-- >>> putStrLn $ prettyEval $ debug example
-- ((\double -> (double (double (double 2)))) (\x -> (x + 2))) --> 8
prettyEval
  :: EvalTree
  -> String
prettyEval node
  = prettyTerm (input node) ++ " --> " ++ prettyResult (output node)


main
  :: IO ()
main = do
  putStrLn "typechecks."
	-- A proof of concept for a value-based debugging technique: instead of
	-- debugging by imperatively stepping through the evaluation, we instead
	-- produce a value representing the evaluation tree.
	--
	-- This value can then be manipulated like any other value, e.g. instead adding
	-- a breakpoint and stepping until the breakpoint is hit, you can write an
	-- ordinary function which recurs into the evaluation tree until it finds a node
	-- of interest. This way, more complex conditions can be easily expressed, e.g.
	-- "find the smallest subtree in which a call of 'double' does not result in an
	-- output which is the double of its input".

	{-# LANGUAGE PatternGuards #-}
	module ValueBasedDebugging where

	import Data.Foldable (minimumBy)
	import Data.Ord (comparing)


	---------------------
	-- Lambda calculus --
	---------------------

	-- For this proof of concept, we use a simple lambda calculus with integers and
	-- addition. Everything in this section is standard.

	data Term
	= Var String
	\| Lam String Term
	\| App Term Term
	\| IntLit Int
	\| Add Term Term
	deriving (Eq, Show)

	data Value
	= Closure Env String Term
	\| IntVal Int
	deriving (Eq, Show)

	type Env = [(String, Value)]

	-- Our language does not have a 'let' construct, but it is easy to define one in
	-- terms of the constructs it does have.
	let_
	:: String
	-> Term
	-> Term
	-> Term
	let_ x xDefinition body
	= App (Lam x body) xDefinition

	-- An example program with an intentional bug. We are trying to calculate
	-- @222*2 = 16@, but we accidentally defined @double x = x + 2@ instead of
	-- @double x = x + x@, so the result is @2+2+2+2 = 8@.
	example
	:: Term
	example
	= let_ "double" (Lam "x" (Add (Var "x")
	(IntLit 2))) -- bug!
	$ App (Var "double")
	$ App (Var "double")
	$ App (Var "double")
	$ IntLit 2

	-- \|
	-- Evaluation can either succeed with a value, or fail with an error message.
	--
	-- >>> eval $ Add (IntLit 2) (IntLit 3)
	-- Right (IntVal 5)
	-- >>> eval example
	-- Right (IntVal 8)
	-- >>> eval $ Add (IntLit 2) (Var "x")
	-- Left "Variable \"x\" not in scope"
	evalWith
	:: Env
	-> Term
	-> Either String Value
	evalWith env (Var x) = case lookup x env of
	Just v -> do
	pure v
	Nothing -> do
	Left $ "Variable " ++ show x ++ " not in scope"
	evalWith env (Lam x body) = do
	pure $ Closure env x body
	evalWith env (App f e) = do
	fv <- evalWith env f
	ev <- evalWith env e
	case fv of
	Closure capturedEnv x body -> do
	evalWith ((x,ev) : capturedEnv) body
	IntVal n -> do
	Left $ "Integer " ++ show n ++ " is not a function"
	evalWith _ (IntLit n) = do
	pure $ IntVal n
	evalWith env (Add e1 e2) = do
	v1 <- evalWith env e1
	v2 <- evalWith env e2
	case v1 of
	IntVal n1 -> case v2 of
	IntVal n2 -> do
	pure $ IntVal (n1 + n2)
	Closure {} -> do
	error "Functions cannot be added"
	Closure {} -> do
	error "Functions cannot be added"

	eval
	:: Term
	-> Either String Value
	eval
	= evalWith []


	--------------------------
	-- Value-based debugger --
	--------------------------

	-- At each node of the evaluation tree, an input term is evaluated to an output
	-- value in an environment. It is possible to look at the output right away, or
	-- to dive down the 'details' field to look at the first evaluation step towards
	-- that output.
	data EvalTree = EvalTree
	{ environment
	:: Env
	, input
	:: Term
	, output
	:: Either String Value
	, details
	:: EvalStep
	}
	deriving Show

	-- Some input terms are already values, and thus do not require any further
	-- evaluation. Other terms require the evaluation of multiple sub-terms, which
	-- is why the evaluation tree is a tree.
	--
	-- While 'eval' chooses to evaluate those sub-terms in a specific order, for
	-- debugging it is more convenient to allow the user to choose which sub-term to
	-- explore next.
	data EvalStep
	= EvalVar
	\| EvalLam
	\| EvalApp AppDetails
	\| EvalIntLit
	\| EvalAdd EvalTree EvalTree
	deriving Show

	-- Ideally, 'appFun' evaluates to (the closure representation of) @Lam "x" body@,
	-- in which case 'appArgName' is @Just "x"@ and 'appResult' is the evaluation
	-- tree for @body@ in a extended environment in which @x@ is bound to the output
	-- of 'appArg'. However, another possibility is that the call is about to fail
	-- with a type error, because 'appFun' does not evaluate to a function. In that
	-- case, 'appArgName' and 'appResult' are both 'Nothing'.
	data AppDetails = AppDetails
	{ appFun
	:: EvalTree
	, appArgName
	:: Maybe String
	, appArg
	:: EvalTree
	, appResult
	:: Maybe EvalTree
	}
	deriving Show

	-- \|
	-- Evaluate an input term into an output value, while recording the evaluation
	-- tree. Note that the output value is computed using the 'evalWith' function
	-- from the previous section, _not_ by calculating the entire evaluation tree
	-- and then throwing away the debugging information. This allows the user to
	-- take advantage of laziness, by only generating debugging information for the
	-- parts of the evaluation tree that are actually needed.
	--
	-- For example, here is a computation consisting of two sub-expressions. We will
	-- generate debugging information for the @3 + 4@ sub-expression but not for the
	-- @1 + 2@ sub-expression.
	--
	-- >>> node1234 = debug $ (IntLit 1 `Add` IntLit 2) `Add` (IntLit 3 `Add` IntLit 4)
	--
	-- Step into the overall expression:
	-- >>> EvalAdd node12 node34 = details node1234
	--
	-- Step over the @1 + 2@ sub-expression:
	-- >>> output node12
	-- Right (IntVal 3)
	--
	-- Step into the @3 + 4@ sub-expression:
	-- >>> EvalAdd node3 node4 = details node34
	--
	-- Evaluate the leaves:
	-- >>> output node3
	-- Right (IntVal 3)
	-- >>> output node4
	-- Right (IntVal 4)
	--
	-- Step out of the @3 + 4@ sub-expression:
	-- >>> output node34
	-- Right (IntVal 7)

	-- Step out of the overall expression:
	-- >>> output node1234
	-- Right (IntVal 10)
	debugWith
	:: Env
	-> Term
	-> EvalTree
	debugWith env term = EvalTree
	{ environment
	= env
	, input
	= term
	, output
	= evalWith env term
	, details
	= go term
	}
	where
	go
	:: Term
	-> EvalStep
	go (Var {})
	= EvalVar
	go (Lam {})
	= EvalLam
	go (App funcTerm argTerm)
	= let funcNode = debugWith env funcTerm
	argNode = debugWith env argTerm
	in EvalApp $ AppDetails
	{ appFun
	= funcNode
	, appArgName
	= do
	Right (Closure _ argName _) <- pure $ output funcNode
	pure argName
	, appArg
	= argNode
	, appResult
	= do
	Right (Closure capturedEnv argName body) <- pure $ output funcNode
	Right argValue <- pure $ output argNode
	pure $ debugWith ((argName, argValue) : capturedEnv) body
	}
	go (IntLit {})
	= EvalIntLit
	go (Add e1 e2)
	= let node1 = debugWith env e1
	node2 = debugWith env e2
	in EvalAdd node1 node2

	debug
	:: Term
	-> EvalTree
	debug = debugWith []


	-----------------
	-- Breakpoints --
	-----------------

	exampleEvalTree
	:: EvalTree
	exampleEvalTree
	= debug example

	-- We've seen what the usual "step into", "step over", and "step out" commands
	-- of a debugger translate to in the value-based debugging world. What about
	-- breakpoints? In an imperative debugger, running up to a breakpoint means to
	-- automatically step into and out of sub-calls until a particular line is
	-- encountered. Or, for data breakpoints, until a particular variable is set to
	-- a value which matches a particular predicate. In a value-based debugger, we
	-- generalize the concept: a breakpoint is a predicate on the evaluation tree.
	-- The predicate can look at the line number (or it could if our term
	-- representation had line numbers), at the values of the variables in the
	-- environment, or at anything else which can be computed from the evaluation
	-- tree.

	-- \|
	-- >>> length $ allNodes $ debug $ Add (IntLit 2) (IntLit 3)
	-- 3
	allNodes
	:: EvalTree
	-> [EvalTree]
	allNodes node
	= node
	: case details node of
	EvalVar
	-> []
	EvalLam
	-> []
	EvalApp (AppDetails f _ arg result)
	-> allNodes f ++ allNodes arg ++ maybe [] allNodes result
	EvalIntLit
	-> []
	EvalAdd e1 e2
	-> allNodes e1 ++ allNodes e2

	-- \|
	-- Look for points in the evaluation where a particular variable is set to a
	-- value of interest.
	--
	-- >>> nodes = allNodes exampleEvalTree
	-- >>> matches = filter (dataBreakpoint "x" (== IntVal 4)) nodes
	-- >>> mapM_ (putStrLn . prettyEval) matches
	-- (x + 2) --> 6
	-- x --> 4
	-- 2 --> 2
	dataBreakpoint
	:: String
	-> (Value -> Bool)
	-> EvalTree -> Bool
	dataBreakpoint varName predicate node
	\| Just value <- lookup varName (environment node)
	= predicate value
	\| otherwise
	= False


	------------------------
	-- Sub-program finder --
	------------------------

	-- Being able to write an _arbitrary_ predicate, instead of being limited to
	-- line numbers and data breakpoints, unlocks a new world of debugging queries.
	--
	-- For example, our example program is producing the wrong answer, but let's
	-- pretend that the code is so large that we can't understand why just by
	-- looking at the code. Let's use the evaluation tree to simplify the problem,
	-- by finding the smaller sub-program which also produces the wrong answer.
	--
	-- To do this, we will use an oracle which always produces the right answer. At
	-- first glance, it might sound silly to assume that we have such an oracle,
	-- because if we had one, then we wouldn't need to debug our program, we could
	-- just trash our program and use our oracle instead. But oracles are actually a
	-- common technique for testing refactorings and optimizations: the oracle is
	-- simply the program before the refactoring, or the program without the
	-- optimization.

	-- \|
	-- >>> isCallTo "double" $ App (Var "double") (IntLit 2)
	-- True
	-- >>> isCallTo "double" $ App (Var "triple") (IntLit 2)
	-- False
	isCallTo
	:: String
	-> Term -> Bool
	isCallTo expectedFuncName (App (Var actualFuncName) _)
	= actualFuncName == expectedFuncName
	isCallTo _ _
	= False

	-- \|
	-- >>> mapM_ (putStrLn . prettyEval) doubleCalls
	-- (double (double (double 2))) --> 8
	-- (double (double 2)) --> 6
	-- (double 2) --> 4
	doubleCalls
	:: [EvalTree]
	doubleCalls
	= filter (isCallTo "double" . input)
	$ allNodes exampleEvalTree

	-- \|
	-- >>> mismatches = filter (not . matchesOracle (* 2)) doubleCalls
	-- >>> mapM_ (putStrLn . prettyEval) mismatches
	-- (double (double (double 2))) --> 8
	-- (double (double 2)) --> 6
	matchesOracle
	:: (Int -> Int)
	-> EvalTree -> Bool
	matchesOracle oracle node
	\| EvalApp (AppDetails _ _ argNode _) <- details node
	, Right (IntVal argInt) <- output argNode
	, Right (IntVal actualOutput) <- output node
	= actualOutput == oracle argInt
	\| otherwise
	= False

	-- \|
	-- >>> size $ Add (IntLit 2) (IntLit 3)
	-- 3
	-- >>> map (size . input) doubleCalls
	-- [7,5,3]
	size
	:: Term
	-> Int
	size (Var {})
	= 1
	size (Lam _ body)
	= 1 + size body
	size (App f e)
	= 1 + size f + size e
	size (IntLit {})
	= 1
	size (Add e1 e2)
	= 1 + size e1 + size e2

	-- \|
	-- >>> putStrLn $ prettyEval smallestSubprogramDemonstratingTheBug
	-- (double (double 2)) --> 6
	smallestSubprogramDemonstratingTheBug
	:: EvalTree
	smallestSubprogramDemonstratingTheBug
	= minimumBy (comparing (size . input))
	$ filter (not . matchesOracle (* 2))
	$ doubleCalls

	-- Tada! We've found the smallest sub-program which demonstrates the bug. Isn't
	-- value-based debugging amazing?


	------------------------
	-- Minimal repro case --
	------------------------

	-- But we are not done yet! We didn't perform a series of actions on an
	-- ephemeral instance of our program, we wrote a _function_, and functions are
	-- reusable. This means we can use this same function to debug the next program.
	-- Or a variant of the current program. That's right, since the evaluation tree
	-- is an ordinary value, this means we are not limited to debugging the program
	-- as it is, we can also generate alternative implementations on the fly and
	-- examine what would have been if we had written the program differently.
	--
	-- For example, let's say that the sub-program we found is still too complex.
	-- We can make systematic simplifications to the program and test whether it
	-- still exhibits the buggy behaviour. If any further simplification causes the
	-- buggy behaviour to go away, then we have found a minimal repro case.

	-- \|
	-- Each result makes a single simplification to the code.
	-- Returns the empty list if no further simplifications are possible.
	--
	-- >>> simplerTerms = simplify $ Add (IntLit 2) (IntLit 3)
	-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
	-- 2
	-- 3
	-- (1 + 3)
	-- (2 + 2)
	--
	-- >>> simplerTerms = simplify example
	-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
	-- ((\double -> (double (double 4))) (\x -> (x + 2)))
	-- ((\double -> (double (double (double 1)))) (\x -> (x + 2)))
	-- ((\double -> (double (double (double 2)))) (\x -> x))
	-- ((\double -> (double (double (double 2)))) (\x -> 2))
	-- ((\double -> (double (double (double 2)))) (\x -> (x + 1)))
	--
	-- >>> simplerTerms = simplify $ input smallestSubprogramDemonstratingTheBug
	-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
	-- (double 4)
	-- (double (double 1))
	simplify
	:: Term
	-> [Term]
	simplify (Var {})
	= []
	simplify (Lam x body)
	= [Lam x body' \| body' <- simplify body]
	simplify (App f@(Var "double") e@(IntLit n))
	= [IntLit (2 * n)] -- replace a sub-call with the oracle's correct result
	++ [ App f' e \| f' <- simplify f ]
	++ [ App f e' \| e' <- simplify e ]
	simplify (App f e)
	= [ App f' e \| f' <- simplify f ]
	++ [ App f e' \| e' <- simplify e ]
	simplify (IntLit n)
	\| n > 0
	= [IntLit (n-1)]
	\| n < 0
	= [IntLit (n+1)]
	\| otherwise
	= []
	simplify (Add e1 e2)
	= [e1, e2]
	++ [ Add e1' e2 \| e1' <- simplify e1 ]
	++ [ Add e1 e2' \| e2' <- simplify e2 ]

	-- \|
	-- >>> env = environment smallestSubprogramDemonstratingTheBug
	-- >>> term = input smallestSubprogramDemonstratingTheBug
	-- >>> incorrect = not . matchesOracle (* 2) . debugWith env
	-- >>> predicate term = isCallTo "double" term && incorrect term
	-- >>> reproCase = smallestReproCase predicate term
	-- >>> putStrLn $ prettyEval $ debugWith env reproCase
	-- (double 3) --> 5
	smallestReproCase
	:: (Term -> Bool) -- True if the program exhibits the bug
	-> Term -- the predicate must return True for this original term
	-> Term
	smallestReproCase predicate originalTerm
	= case filter predicate $ simplify originalTerm of
	simplerTerm : _
	-> smallestReproCase predicate simplerTerm
	[]
	-> originalTerm

	-- Tada! We've found the smallest repro case. Isn't value-based debugging
	-- amazing? Let's implement something like this for a non-toy language!


	---------------------
	-- Pretty printing --
	---------------------

	-- Nothing interesting in the remainder of this file, just pretty printing
	-- functions to make the output of the examples more readable.

	-- \|
	-- >>> putStrLn $ prettyTerm $ Add (IntLit 2) (IntLit 3)
	-- (2 + 3)
	-- >>> putStrLn $ prettyTerm example
	-- ((\double -> (double (double (double 2)))) (\x -> (x + 2)))
	prettyTerm
	:: Term
	-> String
	prettyTerm (Var x)
	= x
	prettyTerm (Lam x body)
	= "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
	prettyTerm (App f e)
	= "(" ++ prettyTerm f ++ " " ++ prettyTerm e ++ ")"
	prettyTerm (IntLit n)
	= show n
	prettyTerm (Add e1 e2)
	= "(" ++ prettyTerm e1 ++ " + " ++ prettyTerm e2 ++ ")"

	-- \|
	-- >>> putStrLn $ prettyValue $ IntVal 2
	-- 2
	-- >>> putStrLn $ prettyValue $ Closure [] "x" $ Add (Var "x") (IntLit 2)
	-- (\x -> (x + 2))
	prettyValue
	:: Value
	-> String
	prettyValue (Closure _ x body)
	= "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
	prettyValue (IntVal n)
	= show n

	-- \|
	-- >>> putStrLn $ prettyResult $ eval $ Add (IntLit 2) (IntLit 3)
	-- 5
	-- >>> putStrLn $ prettyResult $ eval example
	-- 8
	prettyResult
	:: Either String Value
	-> String
	prettyResult (Right v)
	= prettyValue v
	prettyResult (Left err)
	= "Error: " ++ err

	-- \|
	-- >>> putStrLn $ prettyEval $ debug $ Add (IntLit 2) (IntLit 3)
	-- (2 + 3) --> 5
	-- >>> putStrLn $ prettyEval $ debug example
	-- ((\double -> (double (double (double 2)))) (\x -> (x + 2))) --> 8
	prettyEval
	:: EvalTree
	-> String
	prettyEval node
	= prettyTerm (input node) ++ " --> " ++ prettyResult (output node)


	main
	:: IO ()
	main = do
	putStrLn "typechecks."