Skip to content

Instantly share code, notes, and snippets.

@kanak
Created March 24, 2011 21:20
Show Gist options
  • Save kanak/885923 to your computer and use it in GitHub Desktop.
Save kanak/885923 to your computer and use it in GitHub Desktop.
Discrete Mathematics Using a Computer Chapter 12: AVL Tree Notes and Solutions
{- Discrete Mathematics Using a Computer
Chapter 12: AVL Trees
AVL Trees: Self-balancing binary trees.
-}
module AVL where
data SearchTree k d = Leaf
| Node k d (SearchTree k d) (SearchTree k d)
deriving (Show)
-- A search tree is parametrized by two types:
-- k : the type of a key. k needs to derive Ord
-- d : the type of value. can be any type.
instance (Eq k, Eq d) => Eq (SearchTree k d) where
Leaf == Leaf = True
Leaf == _ = False
_ == Leaf = False
(Node xkey xval xleft xright) == (Node ykey yval yleft yright) =
(xkey == ykey) && (xval == yval) && (xleft == yleft) && (xright == yright)
subtree :: (Eq k, Eq d) => SearchTree k d -> SearchTree k d -> Bool
subtree Leaf _ = True
subtree _ Leaf = False
subtree x y@(Node _yk _yv yl yr) = x == y || x `subtree` yl || x `subtree` yr
properSubtree :: (Eq k, Eq d) => SearchTree k d -> SearchTree k d -> Bool
properSubtree _ Leaf = False
properSubtree x (Node _yk _yv yl yr) = x `subtree` yl || x `subtree` yr
-- book's member returns Bool
-- has a different dataElems function that returns the element
member' :: (Eq k) => k -> SearchTree k d -> Maybe d
member' _ Leaf = Nothing
member' k (Node yk yv yl yr)
| k == yk = Just yv
| otherwise = case member' k yl of
Nothing -> member' k yr
x -> x
-- I'm calling the above member' because it doesn't take advantage of searchtree properties
-- check if the whole Tree has a certain property
allTree :: (k -> d -> Bool) -> SearchTree k d -> Bool
allTree p Leaf = True
allTree p (Node yk yv yl yr) = p yk yv && allTree p yl && allTree p yr
ordered :: (Ord k) => SearchTree k d -> Bool
ordered Leaf = True
ordered (Node k _v l r) = allTree (keyLess k) l && allTree (keyMore k) r && ordered l && ordered r
where keyLess x y _ = y < x
keyMore x y _ = y > x
-- this is very wasteful. A node is visited d times where d is its depth
-- If the tree is balanced and has n nodes, the n/2 leaf nodes are at depth log n
-- So this algorithm is (n/2) * log n + (n/4) * (log n - 1) + ...
-- OMEGA n log n. Should be able to do this in linear time just by converting it to list
-- via inorder traversal and checking sortedness.
--------------------------------------------------------------------------------
{- THEOREM 81: Unique keys part 1
Forall S, ordered(S) and k in S => length (member k S) == 1.
Proof: Induction on the structure of the tree.
Base Case: Leaf
Leaf nodes have no keys. So k in S is False. So vacuously True
Inductive Case:
Each node can hold a single element.
If the tree is ordered, then every node in the left subtree has keys strictly less than
the key of the node.
Similarly, every node in the right subtree has keys strictly greater than key of the node.
So, if the node's key matches the input key, the input key cannot be found anywhere else.
Thus, if the key exists in the tree, exactly one node has that key. So length (member...) is 1
-}
{- THEOREM 82: Unique keys part 2
Forall s, ordered (S) and k not in S => member' k S = Nothing
Proof is by induction on the structure of the tree.
Base case:
A Leaf is ordered but contains no keys.
So, (ordered(S) and k not in S) is (True and True) = True
In this case, we have returned Nothing.
Inductive Hypothesis:
member' k S' where S' is a subtree returns Nothing if they don't have the keys.
Inductive case:
Suppose k is not in the node S. This means that it's not in either this node or in any of the subtrees.
Since the node's key is not equal to k, we would check if it is in the left subtree.
Left subtree will return Nothing, so we return value of right subtree.
Not in right subtree either, so we return Nothing.
-}
--------------------------------------------------------------------------------
-- Retrieving Items faster
member :: Ord k => k -> SearchTree k d -> Maybe d
member _ Leaf = Nothing
member k (Node k' v l r)
| k == k' = Just v
| k < k' = member k l
| otherwise = member k r
{- THEOREM 83: Correctness of member
forall S, ordered(S) and k in S => member k S = Just d where d is the value of the
node containing the key.
Proof: Induction on structure of the tree.
Base case: Leaf
Vacuously true since k in S is False, and False implies anything is True.
Inductive Hypothesis: if it is in either the left or right subtree, the member on the
subtree will return correct answer.
Inductive case: Node k' d' l r
There are three cases: Either this k' == k, k' > k, k' < k
Case: k' == k
Since the tree is ordered, no other node has the same key.
Thus, d' == d. So we are correct in returning Just d'
Case k' < k
The node key was smaller than the input key.
Since the tree is ordered, everything in the left subtree is smaller than k'.
So, everything in left subtree is smaller than k'
So, can't find k' in left subtree.
So, running member on the right subtree guaranteed to give the answer by induction hypothesis.
Case k' > k
Same as k' < k case, except looking in left subtree guaranteed to work.
-}
--------------------------------------------------------------------------------
-- Height and other stuff
height :: SearchTree k d -> Integer
height Leaf = 0
height (Node _k _v l r) = 1 + max (height l) (height r)
{- THEOREM: forall s, height(s) >= 0.
Proof is by induction on the structure of s.
Base case: Leaf
height Leaf = 0, 0 >= 0
Inductive case
Inductive hypothesis: height on subtree returns non-negative number
height (Node _k _v l r) = 1 + max (height l) (height r)
both height l and height r >= 0
so max of the two >= 0
so adding 1 to that, still >= 0
-}
retrievalSteps :: SearchTree k d -> Integer
retrievalSteps Leaf = 1
retrievalSteps (Node _k _v l r) = maximum [2, retrievalSteps l + 3, retrievalSteps r + 4]
-- For leaf, value is 1 because we pattern match immediately.
-- For node, if equal keys, we did 2 work: 1. match failure for leaf, 2. checking equality
-- less, we did above + checking less than, so 3 work
-- more, we did above + "otherwise", so 4 work
{- THEOREM: forall s, retrievalSteps s <= 4 * (height(s) + 1)
Proof: Induction on structure of tree.
Base case: Leaf
1 <= 4 * (0 + 1) i.e. 1 <= 4 is True.
Inductive case: Node
Induction Hypothesis: retrievalSteps subtree <= 4 * (height(subtree) + 1)
maximum [2, retrievalSteps l + 3, retrievalSteps r + 4]
By induction hypothesis:
<= maximum [2, 4 * (height(l) + 1) + 3, 4 * (height(r) + 1) + 4]
= maximum [2, 4 * height(l) + 7, 4 * height(r) + 8]
Since height is non-negative, the 2nd and 3rd terms are guaranteed to be more than 2.
= maximum [4 * height(l) + 7, 4 * height(r) + 8]
= 4 * maximum [height(l) + 7/4, height(r) + 2]
For simplicity, replace 7/4 with an overestimate: 2
= 4 * maximum [height(l) + 2, height(r) + 2]
= 4 * (maximum [height(l) + 1, height(r) + 1] + 1)
= 4 * (height(s) + 1)
QED.
This is important because when the trees are balanced, height(s) = log n.
So we can perform retrieval in logarithmic time.
-}
-- =============================================================================
-- 12.6 Balanced Trees
balanced :: SearchTree k d -> Bool
balanced Leaf = True
balanced (Node _k _v l r) = balanced l && balanced r && 1 >= (abs $ heightLeft - heightRight)
where heightLeft = height l
heightRight = height r
-- Again, this is as inefficient as ordered.
-- Going from the leaf up would probably be much faster
-- Or, while doing balanced subtree, produce the height as well. ("Fusion" as Bird-Wadler call it)
{- We want an insertion operator with the following properties:
1. If the tree is ordered, inserting a new item shouldn't make it unordered.
i.e. forall s,k,d. ordered(s) -> ordered((k,d) `insert` s)
2. If the tree is balanced, inserting a new item shouldn't make it unbalanced.
i.e. forall s,k,d. balanced(s) -> balanced((k,d) `insert` s)
3. Shouldn't delete any item from the old tree.
i.e. forall s,k,d,x. x in S -> x in ((k,d) `insert` s)
4. The new item should be inserted.
i.e. forall s,k,d. k in ((k,d) `insert` s)
5. No other keys are inserted.
i.e. forall s,k,d, x. ((x not in s) and (x != k)) -> (x not in ((k,d) `insert` s)
-}
-- The way we'll do this is to insert a node in the appropriate location in the tree
-- and "rebalance" the tree by moving some subtrees around to maintain balance.
-- We'll need height extensively for the rebalancing operations
-- So we'll augment tree to store height as well.
data AVLTree k d= AVLLeaf
| AVLNode Integer k d (AVLTree k d) (AVLTree k d)
deriving (Eq, Show)
--------------------------------------------------------------------------------
-- Case 1: Easy Right
{- Scenario:
Z (ht n + 3)
/ \
(ht n + 2) x zR (ht n)
/ \
xL xR
(ht n+1) (ht n)
Assuming that it's "outside left heavy" i.e. left of left is the one that is
problematic.
Then, we want to rotate it so that it looks like:
x
/ \
xL z
/ \
xR zR
Does this rotation satisfy the properties we talked about?
THEOREM: after easyRight, the tree is ordered.
From the original tree, we see that:
xL < x < xR < Z < zR
(where A < B, A and B are sets means everything in A is smaller than everything in B)
In the rotated tree,
xL is at the left of x. so that part is balanced
z is the right subtree of x. so x < z is retained.
xR is left subtree of z, so xr < z is retained.
zR is right subtree of z, so z < zr is also retained.
So, the tree still has the same order as before.
THEOREM: after easyRight, the tree is balanced.
xR had height n, zR had height n
=> this part balanced
=> height of z is n + 1
xL had height n + 2, z has height n + 1
=> balanced
=> height of x is n + 3
THEOREM: No keys lost
If a key was in the original tree, it was in one of: x, xL, z, xR, zR
Each of those is still present in the new tree.
So can't have lost keys.
THEOREM: No keys added
Nothing other than x, xL, z, xR, zR is in the new tree.
So, no new keys have snuck in.
-}
easyRight :: AVLTree k d -> AVLTree k d
easyRight (AVLNode _hz zk zv (AVLNode _hx xk xv xl xr) zr) = AVLNode hx' xk xv xl newRight
where newRight = AVLNode hz' zk zv xr zr
hx' = parentHeight xl newRight
hz' = parentHeight xr zr
avlHeight :: AVLTree k d -> Integer
avlHeight AVLLeaf = 0
avlHeight (AVLNode h _k _v _l _r) = h
parentHeight :: AVLTree k d -> AVLTree k d -> Integer
parentHeight left right = 1 + max (avlHeight left) (avlHeight right)
{- easyLeft is to be used in the outside right heavy case
i.e. right subtree of the right subtree has the largest height
Z x
/ \ / \
zL x => Z xR
/ \ / \
xL xR zL xL
-}
easyLeft :: AVLTree k d -> AVLTree k d
easyLeft (AVLNode _zh zk zv zL (AVLNode _xh xk xv xL xR)) = AVLNode xh' xk xv newLeft xR
where newLeft = AVLNode zh' zk zv zL xL
xh' = parentHeight newLeft xR
zh' = parentHeight zL xL
--------------------------------------------------------------------------------
-- Rebalancing the hard cases
{- Hard cases are when the tallest part of the tree is on the inside.
Inside Right heavy case:
z (n + 3) z x
/ \ Easy / \ Easy / \
(n) zL y (n + 2) => zL x => z y
/ \ Right / \ Left / \ / \
(n+1) x yR (n) on y xL y on z zL xL xR yR
/ \ / \
xL xR xR yR
n-1 n
(Still Unbalanced: 1)
* any one of xL and xR could be the larger one. So, one of the heights is n
1. height y : 1 + max (n, n) = n + 1
height x = 1 + max (n - 1, n + 1) = n + 2 NOT BALNCED!
height z = 1 + max (n, n + 2) = n + 3 NOT BALANCED!
2. In the final tree, heights are:
height y: 1 + max (ht xr, ht yr) = 1 + max (n, n) = n + 1
height z: 1 + max (ht zl, ht xl) = 1 + max (n, n -1) = 1 + n
height x: 1 + max (ht z, ht y) = 1 + max (n +1, n + 1) = n + 2
Balanced!
Also, no new keys were added and no keys were thrown away.
so all properties still hold.
-}
hardRight :: AVLTree k d -> AVLTree k d
hardRight z@(AVLNode _zh zk zv x@(AVLNode _xh _xk _xv xL xR) zR) =
if avlHeight xL < avlHeight xR
then easyRight $ AVLNode zh' zk zv newLeft zR
else easyRight z
where zh' = parentHeight newLeft zR
newLeft = easyLeft x
hardLeft :: AVLTree k d -> AVLTree k d
hardLeft z@(AVLNode _zh zk zv zL y@(AVLNode _yh _yk _yv yL yR)) =
if avlHeight yR < avlHeight yL
then easyLeft $ AVLNode zh' zk zv zL newRight
else easyLeft z
where zh' = parentHeight zL newRight
newRight = easyRight y
{- Proving that these rotations conserve keys and maintain balance is
easy because we know that easyLeft and Right work.
-}
insert :: Ord k => k -> d -> AVLTree k d -> AVLTree k d
insert k d AVLLeaf = AVLNode 1 k d AVLLeaf AVLLeaf
insert k d (AVLNode xh xk xv xL xR) =
if k < xk -- Add to left node, balance if necessary
then if avlHeight newLeft > avlHeight xR + 1
then hardRight addedLeft
else addedLeft
else if k > xk -- Add to balance if necessary
then if avlHeight newRight > avlHeight xL + 1
then hardLeft addedRight
else addedRight
else (AVLNode xh xk d xL xR) -- Update existing Value
where addedLeft = AVLNode heightA xk xv newLeft xR
newLeft = insert k d xL
heightA = parentHeight newLeft xR
addedRight = AVLNode heightB xk xv xL newRight
newRight = insert k d xR
heightB = parentHeight xL newRight
-- Let's make tree with numbers 1 .. 16 inserted in order.
-- If the insert works as promised, we should have a nice balanced tree.
-- If not, we get a chain.
makeTree :: Integer -> AVLTree Integer Integer
makeTree n = foldr avlInsert AVLLeaf [1..n]
where avlInsert new tree = insert new new tree
tinyTree, smallTree, normalTree, largeTree, hugeTree :: AVLTree Integer Integer
tinyTree = makeTree 16
smallTree = makeTree 256
normalTree = makeTree 1024
largeTree = makeTree 10000
hugeTree = makeTree 25000
{- Let's see the heights of these trees:
*AVL> avlHeight tinyTree
5
*AVL> avlHeight smallTree
9
*AVL> avlHeight normalTree
11
*AVL> avlHeight largeTree
14
*AVL> avlHeight hugeTree
15
-- JOY TO THE WORLD!
-}
avlMember :: Ord k => k -> AVLTree k d -> Maybe d
avlMember _ AVLLeaf = Nothing
avlMember k (AVLNode _h k' v l r)
| k == k' = Just v
| k < k' = avlMember k l
| otherwise = avlMember k r
{- Tiny correctness check
*AVL> avlMember 0 hugeTree
Nothing
*AVL> avlMember 100 hugeTree
Just 100
*AVL> avlMember 24999 hugeTree
Just 24999
*AVL> avlMember 25000 hugeTree
Just 25000
-- Hallelujah!
-}
--------------------------------------------------------------------------------
{- Deleting keys while maintaining order
Easy Case: Want to remove a node whose left child is a leaf i.e.
Z
\
X <- want to delete
\
Xr
In this case, just replace X with Xr
Harder case: Want to remove a node whose left child is a tree
X
/ \
Xl Xr
Strategy: Find the maximum of the Xl Subtree and replace X with it
Correct because that elt will be larger than every other in Xl, but still smaller than Xr
To do this, we need to go along the right child of Xl until we find one whose right child is a leaf
Then, it's parent's right child becomes the left child of the max
Move the max to the top.
(Sounds confusing but the code will clarify the idea):
-}
-- we'll return a 3-tuple containing key, value of max and the new tree with the max removed
shrink :: AVLTree k d -> (k, d, AVLTree k d)
shrink (AVLNode _h k d l AVLLeaf) = (k, d, l)
shrink (AVLNode _h k d l r) = (ans_key, ans_val, ans_shrunken)
where (ans_key, ans_val, shr) = shrink r
rot_shr = if avlHeight l > avlHeight shr + 1 then hardRight shr else shr -- <- balance the shrunken if necessary
ans_shrunken = AVLNode (parentHeight l rot_shr) k d l rot_shr -- <- attach shrunken to this node.
{- Proofs of correctness
1. Shrink preserves Balance
Let nodex be an AVL Node.
If balanced nodex and (k, d, shr) = shrink nodex then shr is balanced.
Proof: By structural induction on the right child of nodex
Base Case: Right child is a leaf
Then, shr = left child of nodex. Since nodex is balanced, left child is balanced as well.
Inductive case: Right child is a AVLNode (call it "r")
Inductive hypothesis: shrink r is balanced.
Since nodex is balanced before, height of l = 1 +/- height of r
When we do shrink, we remove a node. So, the height of l could be 2 more than r, 1 more than r or equal to r
Only the first scenario is troublesome.
We know that if the left subtree is heavier than the right, we can do a hardRight rotation to balance the tree.
So, the left and right subtrees of nodex are now balanced (by correctness of hardRight)
so, balancedness is not lost when we shrink
2. Shrink preserves order
i.e. ordered(nodex) and (k, d, shr) = shrink nodex => ordered(shr) and (forall k' in shr, k > shr)
Proof by induction.
Base case: Right node is a leaf
Then, (k, d, shr) = (k, d, l) where l is the left child
Since nodex was ordered, k > k' for all k' in its left child, and the left child must have been ordered as well.
So, both conditions hold
Inductive case: Right node is a AVLNode, call it "r"
Inductive hypothesis: if (k, d, shr) is returned by the node below, we know that everything below the node itself is balanced
and that k is larger than every key below.
Since the "k" came from the right child of this node, it is larger than this node, and everything in the left subtree.
Since the tree was originally ordered, everything in the shrunken subtree is larger than this node (and everything in the left).
So, order is still preserved.
3. Shrink doesn't add any keys.
i.e. If k not in nodex before means k not in the shr returned by shrink nodex.
Base case: If not in nodex, then not in nodex's left child as well. So, returning left child doesn't break correctness.
Inductive case:
The left child of the new node is the same as the previous left. So can't have added a new key.
The right child is the right child returned from the level below. This doesn't add key by induction hypothesis.
The node's key is the same as before, so still correct.
4. Shrink doesn't remove any keys.
i.e. if (k,d, shr) was returned by shrink nodex, then all the {k} U keys(shr) = keys(nodex)
Base case:
Right child is a leaf (no keys), return (k, d, left) where k is the root's key.
So, haven't lost any keys.
Inductive case:
Hypothesis: The tuple received from the level below has all the keys from that subtree.
Again, no keys lost from the left side because we join it unchanged
We make a new tree with this node at the root and the shr from below as the right child.
At this state, we have accounted for all keys except the one in the tuple that was passed.
That key is passed in the tuple by this node as well.
So, haven't lost any keys.
-}
deleteRoot :: AVLTree k d -> AVLTree k d
deleteRoot (AVLNode _h _k _v AVLLeaf r) = r
deleteRoot (AVLNode _h _k _v l@(AVLNode _hl _kl _vl _ll rl) r) = rotNew
where rotNew = if avlHeight r > avlHeight shr + 1 then hardLeft new else new
new = AVLNode (parentHeight shr rl) xk xv shr r -- Book has a typo here: it tries to add rl instead of r
(xk, xv, shr) = shrink l
-- now we can write delete
delete :: (Ord k) => k -> AVLTree k d -> AVLTree k d
delete _key AVLLeaf = AVLLeaf
delete key n@(AVLNode _h k d l r)
| key == k = deleteRoot n
| key < k = if avlHeight r > avlHeight newL + 1
then hardLeft removedLeft
else removedLeft
| otherwise = if avlHeight l > avlHeight newR + 1
then hardRight removedRight
else removedRight
where newL = key `delete` l
newR = key `delete` r
removedLeft = AVLNode (parentHeight newL r) k d newL r
removedRight = AVLNode (parentHeight l newR) k d l newR
{- *AVL> avlMember 1 (makeTree 10)
Just 1
*AVL> avlMember 1 (1 `delete` (makeTree 10))
Nothing
-}
-- =============================================================================
-- Conclusion
-- Proofs of many theorems were left out, many proofs were "less than formal"
-- Problem: would require thousands of logic formulas to complete such proofs
-- have to use theorem provers such as ACL2.
-- ACL2 can not only verify proofs, but also construct many inductive proofs
-- "Our hope is that you can use proof assistants"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment