Skip to content

Instantly share code, notes, and snippets.

@gelisam
Last active July 29, 2023 07:23
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gelisam/820dae569c6b0747098a001dc6ae7eaf to your computer and use it in GitHub Desktop.
Save gelisam/820dae569c6b0747098a001dc6ae7eaf to your computer and use it in GitHub Desktop.
A proof of concept for a value-based debugging technique
-- A proof of concept for a value-based debugging technique: instead of
-- debugging by imperatively stepping through the evaluation, we instead
-- produce a value representing the evaluation tree.
--
-- This value can then be manipulated like any other value, e.g. instead adding
-- a breakpoint and stepping until the breakpoint is hit, you can write an
-- ordinary function which recurs into the evaluation tree until it finds a node
-- of interest. This way, more complex conditions can be easily expressed, e.g.
-- "find the smallest subtree in which a call of 'double' does not result in an
-- output which is the double of its input".
{-# LANGUAGE PatternGuards #-}
module ValueBasedDebugging where
import Data.Foldable (minimumBy)
import Data.Ord (comparing)
---------------------
-- Lambda calculus --
---------------------
-- For this proof of concept, we use a simple lambda calculus with integers and
-- addition. Everything in this section is standard.
data Term
= Var String
| Lam String Term
| App Term Term
| IntLit Int
| Add Term Term
deriving (Eq, Show)
data Value
= Closure Env String Term
| IntVal Int
deriving (Eq, Show)
type Env = [(String, Value)]
-- Our language does not have a 'let' construct, but it is easy to define one in
-- terms of the constructs it does have.
let_
:: String
-> Term
-> Term
-> Term
let_ x xDefinition body
= App (Lam x body) xDefinition
-- An example program with an intentional bug. We are trying to calculate
-- @2*2*2*2 = 16@, but we accidentally defined @double x = x + 2@ instead of
-- @double x = x + x@, so the result is @2+2+2+2 = 8@.
example
:: Term
example
= let_ "double" (Lam "x" (Add (Var "x")
(IntLit 2))) -- bug!
$ App (Var "double")
$ App (Var "double")
$ App (Var "double")
$ IntLit 2
-- |
-- Evaluation can either succeed with a value, or fail with an error message.
--
-- >>> eval $ Add (IntLit 2) (IntLit 3)
-- Right (IntVal 5)
-- >>> eval example
-- Right (IntVal 8)
-- >>> eval $ Add (IntLit 2) (Var "x")
-- Left "Variable \"x\" not in scope"
evalWith
:: Env
-> Term
-> Either String Value
evalWith env (Var x) = case lookup x env of
Just v -> do
pure v
Nothing -> do
Left $ "Variable " ++ show x ++ " not in scope"
evalWith env (Lam x body) = do
pure $ Closure env x body
evalWith env (App f e) = do
fv <- evalWith env f
ev <- evalWith env e
case fv of
Closure capturedEnv x body -> do
evalWith ((x,ev) : capturedEnv) body
IntVal n -> do
Left $ "Integer " ++ show n ++ " is not a function"
evalWith _ (IntLit n) = do
pure $ IntVal n
evalWith env (Add e1 e2) = do
v1 <- evalWith env e1
v2 <- evalWith env e2
case v1 of
IntVal n1 -> case v2 of
IntVal n2 -> do
pure $ IntVal (n1 + n2)
Closure {} -> do
error "Functions cannot be added"
Closure {} -> do
error "Functions cannot be added"
eval
:: Term
-> Either String Value
eval
= evalWith []
--------------------------
-- Value-based debugger --
--------------------------
-- At each node of the evaluation tree, an input term is evaluated to an output
-- value in an environment. It is possible to look at the output right away, or
-- to dive down the 'details' field to look at the first evaluation step towards
-- that output.
data EvalTree = EvalTree
{ environment
:: Env
, input
:: Term
, output
:: Either String Value
, details
:: EvalStep
}
deriving Show
-- Some input terms are already values, and thus do not require any further
-- evaluation. Other terms require the evaluation of multiple sub-terms, which
-- is why the evaluation tree is a tree.
--
-- While 'eval' chooses to evaluate those sub-terms in a specific order, for
-- debugging it is more convenient to allow the user to choose which sub-term to
-- explore next.
data EvalStep
= EvalVar
| EvalLam
| EvalApp AppDetails
| EvalIntLit
| EvalAdd EvalTree EvalTree
deriving Show
-- Ideally, 'appFun' evaluates to (the closure representation of) @Lam "x" body@,
-- in which case 'appArgName' is @Just "x"@ and 'appResult' is the evaluation
-- tree for @body@ in a extended environment in which @x@ is bound to the output
-- of 'appArg'. However, another possibility is that the call is about to fail
-- with a type error, because 'appFun' does not evaluate to a function. In that
-- case, 'appArgName' and 'appResult' are both 'Nothing'.
data AppDetails = AppDetails
{ appFun
:: EvalTree
, appArgName
:: Maybe String
, appArg
:: EvalTree
, appResult
:: Maybe EvalTree
}
deriving Show
-- |
-- Evaluate an input term into an output value, while recording the evaluation
-- tree. Note that the output value is computed using the 'evalWith' function
-- from the previous section, _not_ by calculating the entire evaluation tree
-- and then throwing away the debugging information. This allows the user to
-- take advantage of laziness, by only generating debugging information for the
-- parts of the evaluation tree that are actually needed.
--
-- For example, here is a computation consisting of two sub-expressions. We will
-- generate debugging information for the @3 + 4@ sub-expression but not for the
-- @1 + 2@ sub-expression.
--
-- >>> node1234 = debug $ (IntLit 1 `Add` IntLit 2) `Add` (IntLit 3 `Add` IntLit 4)
--
-- Step into the overall expression:
-- >>> EvalAdd node12 node34 = details node1234
--
-- Step over the @1 + 2@ sub-expression:
-- >>> output node12
-- Right (IntVal 3)
--
-- Step into the @3 + 4@ sub-expression:
-- >>> EvalAdd node3 node4 = details node34
--
-- Evaluate the leaves:
-- >>> output node3
-- Right (IntVal 3)
-- >>> output node4
-- Right (IntVal 4)
--
-- Step out of the @3 + 4@ sub-expression:
-- >>> output node34
-- Right (IntVal 7)
-- Step out of the overall expression:
-- >>> output node1234
-- Right (IntVal 10)
debugWith
:: Env
-> Term
-> EvalTree
debugWith env term = EvalTree
{ environment
= env
, input
= term
, output
= evalWith env term
, details
= go term
}
where
go
:: Term
-> EvalStep
go (Var {})
= EvalVar
go (Lam {})
= EvalLam
go (App funcTerm argTerm)
= let funcNode = debugWith env funcTerm
argNode = debugWith env argTerm
in EvalApp $ AppDetails
{ appFun
= funcNode
, appArgName
= do
Right (Closure _ argName _) <- pure $ output funcNode
pure argName
, appArg
= argNode
, appResult
= do
Right (Closure capturedEnv argName body) <- pure $ output funcNode
Right argValue <- pure $ output argNode
pure $ debugWith ((argName, argValue) : capturedEnv) body
}
go (IntLit {})
= EvalIntLit
go (Add e1 e2)
= let node1 = debugWith env e1
node2 = debugWith env e2
in EvalAdd node1 node2
debug
:: Term
-> EvalTree
debug = debugWith []
-----------------
-- Breakpoints --
-----------------
exampleEvalTree
:: EvalTree
exampleEvalTree
= debug example
-- We've seen what the usual "step into", "step over", and "step out" commands
-- of a debugger translate to in the value-based debugging world. What about
-- breakpoints? In an imperative debugger, running up to a breakpoint means to
-- automatically step into and out of sub-calls until a particular line is
-- encountered. Or, for data breakpoints, until a particular variable is set to
-- a value which matches a particular predicate. In a value-based debugger, we
-- generalize the concept: a breakpoint is a predicate on the evaluation tree.
-- The predicate can look at the line number (or it could if our term
-- representation had line numbers), at the values of the variables in the
-- environment, or at anything else which can be computed from the evaluation
-- tree.
-- |
-- >>> length $ allNodes $ debug $ Add (IntLit 2) (IntLit 3)
-- 3
allNodes
:: EvalTree
-> [EvalTree]
allNodes node
= node
: case details node of
EvalVar
-> []
EvalLam
-> []
EvalApp (AppDetails f _ arg result)
-> allNodes f ++ allNodes arg ++ maybe [] allNodes result
EvalIntLit
-> []
EvalAdd e1 e2
-> allNodes e1 ++ allNodes e2
-- |
-- Look for points in the evaluation where a particular variable is set to a
-- value of interest.
--
-- >>> nodes = allNodes exampleEvalTree
-- >>> matches = filter (dataBreakpoint "x" (== IntVal 4)) nodes
-- >>> mapM_ (putStrLn . prettyEval) matches
-- (x + 2) --> 6
-- x --> 4
-- 2 --> 2
dataBreakpoint
:: String
-> (Value -> Bool)
-> EvalTree -> Bool
dataBreakpoint varName predicate node
| Just value <- lookup varName (environment node)
= predicate value
| otherwise
= False
------------------------
-- Sub-program finder --
------------------------
-- Being able to write an _arbitrary_ predicate, instead of being limited to
-- line numbers and data breakpoints, unlocks a new world of debugging queries.
--
-- For example, our example program is producing the wrong answer, but let's
-- pretend that the code is so large that we can't understand why just by
-- looking at the code. Let's use the evaluation tree to simplify the problem,
-- by finding the smaller sub-program which also produces the wrong answer.
--
-- To do this, we will use an oracle which always produces the right answer. At
-- first glance, it might sound silly to assume that we have such an oracle,
-- because if we had one, then we wouldn't need to debug our program, we could
-- just trash our program and use our oracle instead. But oracles are actually a
-- common technique for testing refactorings and optimizations: the oracle is
-- simply the program before the refactoring, or the program without the
-- optimization.
-- |
-- >>> isCallTo "double" $ App (Var "double") (IntLit 2)
-- True
-- >>> isCallTo "double" $ App (Var "triple") (IntLit 2)
-- False
isCallTo
:: String
-> Term -> Bool
isCallTo expectedFuncName (App (Var actualFuncName) _)
= actualFuncName == expectedFuncName
isCallTo _ _
= False
-- |
-- >>> mapM_ (putStrLn . prettyEval) doubleCalls
-- (double (double (double 2))) --> 8
-- (double (double 2)) --> 6
-- (double 2) --> 4
doubleCalls
:: [EvalTree]
doubleCalls
= filter (isCallTo "double" . input)
$ allNodes exampleEvalTree
-- |
-- >>> mismatches = filter (not . matchesOracle (* 2)) doubleCalls
-- >>> mapM_ (putStrLn . prettyEval) mismatches
-- (double (double (double 2))) --> 8
-- (double (double 2)) --> 6
matchesOracle
:: (Int -> Int)
-> EvalTree -> Bool
matchesOracle oracle node
| EvalApp (AppDetails _ _ argNode _) <- details node
, Right (IntVal argInt) <- output argNode
, Right (IntVal actualOutput) <- output node
= actualOutput == oracle argInt
| otherwise
= False
-- |
-- >>> size $ Add (IntLit 2) (IntLit 3)
-- 3
-- >>> map (size . input) doubleCalls
-- [7,5,3]
size
:: Term
-> Int
size (Var {})
= 1
size (Lam _ body)
= 1 + size body
size (App f e)
= 1 + size f + size e
size (IntLit {})
= 1
size (Add e1 e2)
= 1 + size e1 + size e2
-- |
-- >>> putStrLn $ prettyEval smallestSubprogramDemonstratingTheBug
-- (double (double 2)) --> 6
smallestSubprogramDemonstratingTheBug
:: EvalTree
smallestSubprogramDemonstratingTheBug
= minimumBy (comparing (size . input))
$ filter (not . matchesOracle (* 2))
$ doubleCalls
-- Tada! We've found the smallest sub-program which demonstrates the bug. Isn't
-- value-based debugging amazing?
------------------------
-- Minimal repro case --
------------------------
-- But we are not done yet! We didn't perform a series of actions on an
-- ephemeral instance of our program, we wrote a _function_, and functions are
-- reusable. This means we can use this same function to debug the next program.
-- Or a variant of the current program. That's right, since the evaluation tree
-- is an ordinary value, this means we are not limited to debugging the program
-- as it is, we can also generate alternative implementations on the fly and
-- examine what would have been if we had written the program differently.
--
-- For example, let's say that the sub-program we found is still too complex.
-- We can make systematic simplifications to the program and test whether it
-- still exhibits the buggy behaviour. If any further simplification causes the
-- buggy behaviour to go away, then we have found a minimal repro case.
-- |
-- Each result makes a single simplification to the code.
-- Returns the empty list if no further simplifications are possible.
--
-- >>> simplerTerms = simplify $ Add (IntLit 2) (IntLit 3)
-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
-- 2
-- 3
-- (1 + 3)
-- (2 + 2)
--
-- >>> simplerTerms = simplify example
-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
-- ((\double -> (double (double 4))) (\x -> (x + 2)))
-- ((\double -> (double (double (double 1)))) (\x -> (x + 2)))
-- ((\double -> (double (double (double 2)))) (\x -> x))
-- ((\double -> (double (double (double 2)))) (\x -> 2))
-- ((\double -> (double (double (double 2)))) (\x -> (x + 1)))
--
-- >>> simplerTerms = simplify $ input smallestSubprogramDemonstratingTheBug
-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
-- (double 4)
-- (double (double 1))
simplify
:: Term
-> [Term]
simplify (Var {})
= []
simplify (Lam x body)
= [Lam x body' | body' <- simplify body]
simplify (App f@(Var "double") e@(IntLit n))
= [IntLit (2 * n)] -- replace a sub-call with the oracle's correct result
++ [ App f' e | f' <- simplify f ]
++ [ App f e' | e' <- simplify e ]
simplify (App f e)
= [ App f' e | f' <- simplify f ]
++ [ App f e' | e' <- simplify e ]
simplify (IntLit n)
| n > 0
= [IntLit (n-1)]
| n < 0
= [IntLit (n+1)]
| otherwise
= []
simplify (Add e1 e2)
= [e1, e2]
++ [ Add e1' e2 | e1' <- simplify e1 ]
++ [ Add e1 e2' | e2' <- simplify e2 ]
-- |
-- >>> env = environment smallestSubprogramDemonstratingTheBug
-- >>> term = input smallestSubprogramDemonstratingTheBug
-- >>> incorrect = not . matchesOracle (* 2) . debugWith env
-- >>> predicate term = isCallTo "double" term && incorrect term
-- >>> reproCase = smallestReproCase predicate term
-- >>> putStrLn $ prettyEval $ debugWith env reproCase
-- (double 3) --> 5
smallestReproCase
:: (Term -> Bool) -- True if the program exhibits the bug
-> Term -- the predicate must return True for this original term
-> Term
smallestReproCase predicate originalTerm
= case filter predicate $ simplify originalTerm of
simplerTerm : _
-> smallestReproCase predicate simplerTerm
[]
-> originalTerm
-- Tada! We've found the smallest repro case. Isn't value-based debugging
-- amazing? Let's implement something like this for a non-toy language!
---------------------
-- Pretty printing --
---------------------
-- Nothing interesting in the remainder of this file, just pretty printing
-- functions to make the output of the examples more readable.
-- |
-- >>> putStrLn $ prettyTerm $ Add (IntLit 2) (IntLit 3)
-- (2 + 3)
-- >>> putStrLn $ prettyTerm example
-- ((\double -> (double (double (double 2)))) (\x -> (x + 2)))
prettyTerm
:: Term
-> String
prettyTerm (Var x)
= x
prettyTerm (Lam x body)
= "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
prettyTerm (App f e)
= "(" ++ prettyTerm f ++ " " ++ prettyTerm e ++ ")"
prettyTerm (IntLit n)
= show n
prettyTerm (Add e1 e2)
= "(" ++ prettyTerm e1 ++ " + " ++ prettyTerm e2 ++ ")"
-- |
-- >>> putStrLn $ prettyValue $ IntVal 2
-- 2
-- >>> putStrLn $ prettyValue $ Closure [] "x" $ Add (Var "x") (IntLit 2)
-- (\x -> (x + 2))
prettyValue
:: Value
-> String
prettyValue (Closure _ x body)
= "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
prettyValue (IntVal n)
= show n
-- |
-- >>> putStrLn $ prettyResult $ eval $ Add (IntLit 2) (IntLit 3)
-- 5
-- >>> putStrLn $ prettyResult $ eval example
-- 8
prettyResult
:: Either String Value
-> String
prettyResult (Right v)
= prettyValue v
prettyResult (Left err)
= "Error: " ++ err
-- |
-- >>> putStrLn $ prettyEval $ debug $ Add (IntLit 2) (IntLit 3)
-- (2 + 3) --> 5
-- >>> putStrLn $ prettyEval $ debug example
-- ((\double -> (double (double (double 2)))) (\x -> (x + 2))) --> 8
prettyEval
:: EvalTree
-> String
prettyEval node
= prettyTerm (input node) ++ " --> " ++ prettyResult (output node)
main
:: IO ()
main = do
putStrLn "typechecks."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment