Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
(*|
A data structure that I've been more and more interested in recently is the disjoint set datastructure or union-find.
https://en.wikipedia.org/wiki/Disjoint-set_data_structure It's used in egraphs, unification, prolog, and graph connectivity.
I realized a cute representation that is easy to use and prover stuff about, although very inefficient compared to the usual version of disjoint set. It uses a simple functional representation based off the observation that the preimages of a function form disjoint sets. This representation is somewhat analogous to using functions to represent Maps in Coq like in Software Foundations. https://softwarefoundations.cis.upenn.edu/lf-current/Maps.html
The nice thing about this is that it really avoids any termination stickiness. Termination of the `find_root` operation of a union find in an explicit array or tree is not at all obvious and requires side proof or a refined type.
The termination ultimately comes from the fact that you used a finite number of unions to construct the disjoint sets.
|*)
Require Import Arith.
(*| For convenience we define disjoint sets of nats. See comments at end of post for some other options |*)
Definition ds := nat -> nat.
(*| The completely disjoint set is the identity function |*)
Definition init_ds : ds := fun x => x.
(*| `find_root` is just application |*)
Definition find_root (g : ds) x := g x.
(*| `in_same_set` is just `nat` equality checking |*)
Definition in_same_set (g : ds) x y :=
Nat.eq_dec (g x) (g y).
(*| And finally the only interesting operation is `union`. Some comments: It is useful to lift the `find_root` operations out of the body of the returned function to compute them eagerly and share the result. This version unfortunately means that the cost of a `find_root` operation becomes proportional to the number of `union` operations used to construct the `ds`
|*)
Definition union (g : ds) x y : ds :=
let px := find_root g x in
let py := find_root g y in
fun z =>
let pz := find_root g z in
if Nat.eq_dec px pz
then py
else pz.
(*|
-------------
Some proofs
-------------
I couldn't find this useful lemma in the standard library. Maybe I just missed it?
|*)
Lemma nat_eq_refl : forall x, exists p, Nat.eq_dec x x = left p.
intros x.
destruct (Nat.eq_dec x x).
eexists. reflexivity.
unfold "<>" in n. exfalso. apply n. reflexivity.
Qed.
(*|
A useful definition is `In_Same_Set`, which states that two nats `x` and `y` are in the same set if the `in_same_set` function returns a `left` |*)
Definition In_Same_Set g x y := exists p,
in_same_set g x y = left p.
(*|
An element is always in the same set with itself
|*)
Theorem disjoint_refl : forall g x,
In_Same_Set g x x.
intros g x. unfold In_Same_Set, in_same_set.
destruct (Nat.eq_dec (g x) (g x)).
- eexists. reflexivity.
- exfalso. apply n. reflexivity.
Qed.
(*|
The `In_Same_Set g` relation is symmettric.
|*)
Theorem disjoint_sym : forall g x y,
In_Same_Set g x y ->
In_Same_Set g y x.
unfold In_Same_Set, in_same_set, find_root.
intros g x y H.
destruct H.
destruct (Nat.eq_dec (g y) (g x)) eqn:E.
- eexists. reflexivity.
- exfalso. apply n. rewrite x0. reflexivity.
Qed.
(*|
The `In_Same_Set g` relation is transtive.
|*)
Theorem disjoint_trans : forall g x y z,
In_Same_Set g x y ->
In_Same_Set g y z ->
In_Same_Set g x z.
unfold In_Same_Set, in_same_set, find_root.
intros g x y z H H0.
destruct H.
destruct H0.
destruct (Nat.eq_dec (g x) (g z)) eqn:E.
- eexists. reflexivity.
- exfalso. apply n. rewrite x0. rewrite x1. reflexivity.
Qed.
(*|
The initial disjoint set only has equal elements in the same set
|*)
Theorem disjoint_init : forall x y, x <> y ->
exists p, in_same_set init_ds x y = right p.
intros.
destruct in_same_set.
exfalso.
apply H.
unfold init_ds in e.
apply e.
eexists. reflexivity. Qed.
(*|
After unioning x and y, they are now in the same set. This proof could be tightened up and automated more.
|*)
Theorem union_works : forall g x y,
In_Same_Set (union g x y) x y.
intros.
unfold In_Same_Set, in_same_set, union, find_root.
destruct (Nat.eq_dec (g x) (g y)) eqn:E1;
destruct (Nat.eq_dec (g x) (g x)) eqn:E2;
destruct (Nat.eq_dec (g y) (g y)) eqn:E3.
- eexists. reflexivity.
- exfalso. auto.
- firstorder.
- firstorder.
- eexists. reflexivity.
- exfalso. auto.
- exfalso. auto.
- exfalso. auto.
Qed.
(*|Things remain in the same set after unioning any other elements|*)
Theorem union_mono : forall g x y z w,
In_Same_Set g x y ->
In_Same_Set (union g z w) x y.
intros g x y z w H.
destruct H.
unfold union, In_Same_Set.
unfold in_same_set in H.
unfold in_same_set.
unfold find_root.
rewrite x0.
apply nat_eq_refl. Qed.
(*|
Here's a small enhancement. We can define a new version of union that checks if the two elements being unioned are already in the same set.
|*)
Definition union2 (g : ds) x y : ds :=
let px := find_root g x in
let py := find_root g y in
if Nat.eq_dec px py then
g
else
fun z =>
let pz := find_root g z in
if Nat.eq_dec px pz
then py
else pz.
Check in_same_set.
Theorem union2_correct : forall g x y z,
(union2 g x y) z = (union g x y) z.
intros. unfold union, union2, find_root.
destruct (Nat.eq_dec (g x) (g y)) eqn:E2;
destruct (Nat.eq_dec (g x) (g z)) eqn:E1;
congruence.
Qed.
(*|
---------------------
Possible Refinements
---------------------
You could abstract over the things that you're forming disjoint sets over
This means you need an enumeration to define init_ds
|*)
Definition ds' (a : Type) := a -> nat.
(*|
Or you can use a self map. This requires decidable equality in for checking canonical elements in the codomain
*)
Definition ds'' (a : Type) := a -> a.
(*|
Or you could do a fun proof relevant version for equivalence relations `R`. Now to perform a union you'll need to supply a proof `R x y`
|*)
Definition ds_pf (a : Type) (R : a -> a -> Prop) :=
forall x : a, {y : a | R x y}.
(*|
Another possibility is to use the fairly new feature of Coq, persistent arrays
* https://coq.inria.fr/refman/language/core/primitive.html#primitive-arrays
* https://coq.github.io/doc/master/stdlib/Coq.Array.PArray.html
This may indeed why these were added. They do require dealing with termination though. Here's one suggestion of an encoding. I dunno if this will bite you in the butt.
|*)
Require Import PArray.
Open Scope array_scope.
Fixpoint find_root' gas (a : array (sum nat Int63.int))
(i : Int63.int) :=
match gas with
| O => None
| S gas' => match a .[i] with
| inl x => Some i
| inr j => find_root' gas' a j
end
end.
Record disjoint_set := {
gas : nat; (* unions, or height *)
parents : array (sum nat Int63.int) ;
term_pf : forall i, exists n,
(find_root' gas parents i = Some n)
}.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment