This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(*| | |
A data structure that I've been more and more interested in recently is the disjoint set datastructure or union-find. | |
https://en.wikipedia.org/wiki/Disjoint-set_data_structure It's used in egraphs, unification, prolog, and graph connectivity. | |
I realized a cute representation that is easy to use and prover stuff about, although very inefficient compared to the usual version of disjoint set. It uses a simple functional representation based off the observation that the preimages of a function form disjoint sets. This representation is somewhat analogous to using functions to represent Maps in Coq like in Software Foundations. https://softwarefoundations.cis.upenn.edu/lf-current/Maps.html | |
The nice thing about this is that it really avoids any termination stickiness. Termination of the `find_root` operation of a union find in an explicit array or tree is not at all obvious and requires side proof or a refined type. | |
The termination ultimately comes from the fact that you used a finite number of unions to construct the disjoint sets. | |
|*) | |
Require Import Arith. | |
(*| For convenience we define disjoint sets of nats. See comments at end of post for some other options |*) | |
Definition ds := nat -> nat. | |
(*| The completely disjoint set is the identity function |*) | |
Definition init_ds : ds := fun x => x. | |
(*| `find_root` is just application |*) | |
Definition find_root (g : ds) x := g x. | |
(*| `in_same_set` is just `nat` equality checking |*) | |
Definition in_same_set (g : ds) x y := | |
Nat.eq_dec (g x) (g y). | |
(*| And finally the only interesting operation is `union`. Some comments: It is useful to lift the `find_root` operations out of the body of the returned function to compute them eagerly and share the result. This version unfortunately means that the cost of a `find_root` operation becomes proportional to the number of `union` operations used to construct the `ds` | |
|*) | |
Definition union (g : ds) x y : ds := | |
let px := find_root g x in | |
let py := find_root g y in | |
fun z => | |
let pz := find_root g z in | |
if Nat.eq_dec px pz | |
then py | |
else pz. | |
(*| | |
------------- | |
Some proofs | |
------------- | |
I couldn't find this useful lemma in the standard library. Maybe I just missed it? | |
|*) | |
Lemma nat_eq_refl : forall x, exists p, Nat.eq_dec x x = left p. | |
intros x. | |
destruct (Nat.eq_dec x x). | |
eexists. reflexivity. | |
unfold "<>" in n. exfalso. apply n. reflexivity. | |
Qed. | |
(*| | |
A useful definition is `In_Same_Set`, which states that two nats `x` and `y` are in the same set if the `in_same_set` function returns a `left` |*) | |
Definition In_Same_Set g x y := exists p, | |
in_same_set g x y = left p. | |
(*| | |
An element is always in the same set with itself | |
|*) | |
Theorem disjoint_refl : forall g x, | |
In_Same_Set g x x. | |
intros g x. unfold In_Same_Set, in_same_set. | |
destruct (Nat.eq_dec (g x) (g x)). | |
- eexists. reflexivity. | |
- exfalso. apply n. reflexivity. | |
Qed. | |
(*| | |
The `In_Same_Set g` relation is symmettric. | |
|*) | |
Theorem disjoint_sym : forall g x y, | |
In_Same_Set g x y -> | |
In_Same_Set g y x. | |
unfold In_Same_Set, in_same_set, find_root. | |
intros g x y H. | |
destruct H. | |
destruct (Nat.eq_dec (g y) (g x)) eqn:E. | |
- eexists. reflexivity. | |
- exfalso. apply n. rewrite x0. reflexivity. | |
Qed. | |
(*| | |
The `In_Same_Set g` relation is transtive. | |
|*) | |
Theorem disjoint_trans : forall g x y z, | |
In_Same_Set g x y -> | |
In_Same_Set g y z -> | |
In_Same_Set g x z. | |
unfold In_Same_Set, in_same_set, find_root. | |
intros g x y z H H0. | |
destruct H. | |
destruct H0. | |
destruct (Nat.eq_dec (g x) (g z)) eqn:E. | |
- eexists. reflexivity. | |
- exfalso. apply n. rewrite x0. rewrite x1. reflexivity. | |
Qed. | |
(*| | |
The initial disjoint set only has equal elements in the same set | |
|*) | |
Theorem disjoint_init : forall x y, x <> y -> | |
exists p, in_same_set init_ds x y = right p. | |
intros. | |
destruct in_same_set. | |
exfalso. | |
apply H. | |
unfold init_ds in e. | |
apply e. | |
eexists. reflexivity. Qed. | |
(*| | |
After unioning x and y, they are now in the same set. This proof could be tightened up and automated more. | |
|*) | |
Theorem union_works : forall g x y, | |
In_Same_Set (union g x y) x y. | |
intros. | |
unfold In_Same_Set, in_same_set, union, find_root. | |
destruct (Nat.eq_dec (g x) (g y)) eqn:E1; | |
destruct (Nat.eq_dec (g x) (g x)) eqn:E2; | |
destruct (Nat.eq_dec (g y) (g y)) eqn:E3. | |
- eexists. reflexivity. | |
- exfalso. auto. | |
- firstorder. | |
- firstorder. | |
- eexists. reflexivity. | |
- exfalso. auto. | |
- exfalso. auto. | |
- exfalso. auto. | |
Qed. | |
(*|Things remain in the same set after unioning any other elements|*) | |
Theorem union_mono : forall g x y z w, | |
In_Same_Set g x y -> | |
In_Same_Set (union g z w) x y. | |
intros g x y z w H. | |
destruct H. | |
unfold union, In_Same_Set. | |
unfold in_same_set in H. | |
unfold in_same_set. | |
unfold find_root. | |
rewrite x0. | |
apply nat_eq_refl. Qed. | |
(*| | |
Here's a small enhancement. We can define a new version of union that checks if the two elements being unioned are already in the same set. | |
|*) | |
Definition union2 (g : ds) x y : ds := | |
let px := find_root g x in | |
let py := find_root g y in | |
if Nat.eq_dec px py then | |
g | |
else | |
fun z => | |
let pz := find_root g z in | |
if Nat.eq_dec px pz | |
then py | |
else pz. | |
Check in_same_set. | |
Theorem union2_correct : forall g x y z, | |
(union2 g x y) z = (union g x y) z. | |
intros. unfold union, union2, find_root. | |
destruct (Nat.eq_dec (g x) (g y)) eqn:E2; | |
destruct (Nat.eq_dec (g x) (g z)) eqn:E1; | |
congruence. | |
Qed. | |
(*| | |
--------------------- | |
Possible Refinements | |
--------------------- | |
You could abstract over the things that you're forming disjoint sets over | |
This means you need an enumeration to define init_ds | |
|*) | |
Definition ds' (a : Type) := a -> nat. | |
(*| | |
Or you can use a self map. This requires decidable equality in for checking canonical elements in the codomain | |
*) | |
Definition ds'' (a : Type) := a -> a. | |
(*| | |
Or you could do a fun proof relevant version for equivalence relations `R`. Now to perform a union you'll need to supply a proof `R x y` | |
|*) | |
Definition ds_pf (a : Type) (R : a -> a -> Prop) := | |
forall x : a, {y : a | R x y}. | |
(*| | |
Another possibility is to use the fairly new feature of Coq, persistent arrays | |
* https://coq.inria.fr/refman/language/core/primitive.html#primitive-arrays | |
* https://coq.github.io/doc/master/stdlib/Coq.Array.PArray.html | |
This may indeed why these were added. They do require dealing with termination though. Here's one suggestion of an encoding. I dunno if this will bite you in the butt. | |
|*) | |
Require Import PArray. | |
Open Scope array_scope. | |
Fixpoint find_root' gas (a : array (sum nat Int63.int)) | |
(i : Int63.int) := | |
match gas with | |
| O => None | |
| S gas' => match a .[i] with | |
| inl x => Some i | |
| inr j => find_root' gas' a j | |
end | |
end. | |
Record disjoint_set := { | |
gas : nat; (* unions, or height *) | |
parents : array (sum nat Int63.int) ; | |
term_pf : forall i, exists n, | |
(find_root' gas parents i = Some n) | |
}. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment