Skip to content

Instantly share code, notes, and snippets.

@ncfavier
Last active June 18, 2022 00:14
Show Gist options
  • Save ncfavier/df34e17e8a7eeb45973ea11c1b9c21ea to your computer and use it in GitHub Desktop.
Save ncfavier/df34e17e8a7eeb45973ea11c1b9c21ea to your computer and use it in GitHub Desktop.
Require Import Arith ZArith Lia List Ascii String Program.Basics.
Open Scope string_scope.
Open Scope program_scope.
Import ListNotations.
(** The head character of a string, returned as a new string.
If the string is empty, returns the empty string. *)
Definition head s := match s with
| "" => ""
| String a _ => String a ""
end.
(* Coq's string library sucks *)
Lemma append_empty : forall s, s ++ "" = s.
Proof.
induction s. easy. simpl. now f_equal.
Qed.
Lemma append_String : forall a b c, (a ++ String b "") ++ c = a ++ String b c.
Proof.
intros. induction a. easy. simpl. now f_equal.
Qed.
(** PARTIE I : Dictionnaires *)
(* Type pour les dictionnaires vu ici comme des tables
d'associations entre mots et nombres. *)
Definition dict : Set := list (string * nat).
Definition empty_dict : dict := [].
Definition add_dict : string -> nat -> dict -> dict := curry cons.
(* Recherche d'un mot ou d'un nombre dans un dictionnaire. *)
Definition assoc1 : string -> dict -> option nat := fun s d =>
option_map snd (find (fun p => s =? fst p) d).
Definition assoc2 : nat -> dict -> option string := fun n d =>
option_map fst (find (fun p => n =? snd p)%nat d).
Definition from_option {A : Type} (a : A) (o : option A) := match o with
| Some x => x
| None => a
end.
(* Variantes pouvant etre utilisees si une erreur de recherche est
impossible. On pourra utiliser une valeur arbitraire pour le cas d'erreur.*)
Definition assoc1_noerr : string -> dict -> nat := fun s d =>
from_option 0 (assoc1 s d).
Definition assoc2_noerr : nat -> dict -> string := fun n d =>
from_option "" (assoc2 n d).
Lemma assoc1_noerr_spec : forall s d,
assoc1 s d <> None -> assoc1 s d = Some (assoc1_noerr s d).
Proof.
intros. unfold assoc1_noerr. destruct (assoc1 s d). easy. contradiction.
Qed.
(** Predicats concernants les dictionnaires. *)
(* Est-ce qu'un mot ou nombre particulier apparait dans le dictionaire ? *)
Definition In1 : string -> dict -> Prop := fun s d => exists n, In (s, n) d.
Definition In2 : nat -> dict -> Prop := fun n d => exists s, In (s, n) d.
(* Est-ce qu'une association precise apparait dans le dictionnaire ? *)
Definition In : string*nat -> dict -> Prop := @List.In (string * nat).
(* Est-ce que le dictionnaire est bijectif, autrement dit tout mot ou nombre
apparait-il toujours au plus une fois ? *)
Definition Bijective d : Prop :=
forall s1 n1 s2 n2, In (s1, n1) d -> In (s2, n2) d -> s1 = s2 <-> n1 = n2.
Lemma bij_uncons : forall x xs, Bijective (x :: xs) -> Bijective xs.
Proof.
intros x xs B s1 n1 s2 n2 i1 i2. apply (B s1 n1 s2 n2); now apply in_cons.
Qed.
(** Proprietes de base de nos dictionnaires *)
(* Attention! Dans le lot s'est glissé quelques propriétés vraies uniquement
dans le cas de dictionnaires bijectifs. Saurez-vous les retrouver ? *)
Lemma In1_assoc1: forall s d, In1 s d <-> assoc1 s d <> None.
Proof.
unfold assoc1, find. split; intros.
- destruct H. induction d, H.
+ now rewrite H, String.eqb_refl.
+ destruct (String.eqb s (fst a)). discriminate. apply (IHd H).
- induction d.
+ now contradict H.
+ destruct (String.eqb_spec s (fst a)).
* destruct a. exists n. left. now rewrite e.
* destruct (IHd H). exists x. now right.
Qed.
Lemma In2_assoc2: forall n d, In2 n d <-> assoc2 n d <> None.
Proof.
unfold assoc2, find. split; intros.
- destruct H. induction d, H.
+ now rewrite H, Nat.eqb_refl.
+ destruct (Nat.eqb n (snd a)). discriminate. apply (IHd H).
- induction d.
+ now contradict H.
+ destruct (Nat.eqb_spec n (snd a)).
* destruct a. exists s. left. now rewrite e.
* destruct (IHd H). exists x. now right.
Qed.
Lemma In_assoc1: forall s d n, Bijective d -> In (s, n) d -> assoc1 s d = Some n.
Proof.
unfold assoc1, find. intros. induction d, H0, a; simpl.
- inversion H0. now rewrite String.eqb_refl.
- destruct (String.eqb_spec s s0); simpl.
+ f_equal. apply (H s0 n0 s n). apply in_eq. now apply in_cons. easy.
+ apply IHd. now apply bij_uncons in H. easy.
Qed.
Lemma In_assoc2: forall n d s, Bijective d -> In (s, n) d -> assoc2 n d = Some s.
Proof.
unfold assoc2, find. intros. induction d, H0, a; simpl.
- inversion H0. now rewrite Nat.eqb_refl.
- destruct (Nat.eqb_spec n n0); simpl.
+ f_equal. apply (H s0 n0 s n). apply in_eq. now apply in_cons. easy.
+ apply IHd. now apply bij_uncons in H. easy.
Qed.
Lemma assoc1_In: forall s d n, assoc1 s d = Some n -> In (s, n) d.
Proof.
unfold assoc1, find. intros. induction d. easy.
destruct a. simpl in *. destruct (String.eqb_spec s s0); simpl in *.
- left. inversion H. now f_equal.
- right. now apply IHd.
Qed.
Lemma assoc2_In: forall n d s, assoc2 n d = Some s -> In (s, n) d.
Proof.
unfold assoc2, find. intros. induction d. easy.
destruct a. simpl in *. destruct (Nat.eqb_spec n n0); simpl in *.
- left. inversion H. now f_equal.
- right. now apply IHd.
Qed.
Lemma assoc1_assoc2: forall d s n, Bijective d -> assoc1 s d = Some n -> assoc2 n d = Some s.
Proof.
intros. apply In_assoc2. easy. now apply assoc1_In.
Qed.
Lemma In1_add: forall d s0 s n, In1 s0 d -> In1 s0 (add_dict s n d).
Proof.
intros. destruct H. exists x. now right.
Qed.
Lemma In2_add: forall d p0 s n, In2 p0 d -> In2 p0 (add_dict s n d).
Proof.
intros. destruct H. exists x. now right.
Qed.
(** PARTIE Ib: Un invariant sur les dictionnaires *)
(* (Invariant d max) doit exprimer le fait que :
- d est bijectif
- tout caractere ascii est dans d
- tous les nombres < max sont dans d, et seulements ceux-ci
*)
Definition Invariant : dict -> nat -> Prop := fun d max =>
Bijective d
/\ (forall (c : ascii), In1 (String c "") d)
/\ (forall (n : nat), In2 n d <-> n < max).
(* Ces invariants restent valides si l'on enrichit correctement un
dictionnaire: *)
Lemma Invariant_propagates : forall d max s,
assoc1 s d = None ->
Invariant d max ->
Invariant (add_dict s max d) (S max).
Proof.
intros. destruct H0 as [B [H1 H2]]. split; [|split].
- intros s1 n1 s2 n2 i1 i2. destruct i1, i2.
+ inversion H0. inversion H3. now subst s1 n1 s2 n2.
+ inversion H0. subst s1 n1. split; intros.
* subst s2. contradict H. apply In1_assoc1. now exists n2.
* subst n2. absurd (max < max). lia. apply (H2 max). now exists s2.
+ inversion H3. subst s2 n2. split; intros.
* subst s1. contradict H. apply In1_assoc1. now exists n1.
* subst n1. absurd (max < max). lia. apply (H2 max). now exists s1.
+ now apply B.
- intros. apply In1_add, H1.
- intros. destruct H2 with n. split.
+ intros. destruct H4, H4. inversion H4. lia.
lapply H0. lia. now exists x.
+ intros. destruct (Nat.eqb_spec n max). exists s. left. now rewrite e.
lapply H3. apply In2_add. lia.
Qed.
(** PARTIE Ic : Construction d'un dictionnaire initial *)
(* Nous aurons besoin d'un dictionnaire initial contenant exactement toutes
les paires formees d'un caractere et de son code ascii. *)
Definition init_dict : dict := map (fun c => (String (ascii_of_nat c) "", c)) (seq 0 256).
Lemma init_dict_spec : forall s n,
In (s, n) init_dict <-> exists a:ascii, s = String a "" /\ n = nat_of_ascii a.
Proof.
unfold In, init_dict. intros. rewrite in_map_iff. split; intros.
- destruct H as [x [[=]]]. exists (ascii_of_nat x).
rewrite in_seq in *. now rewrite nat_ascii_embedding.
- destruct H as [a []]. exists n. subst n. split.
now rewrite ascii_nat_embedding, H. rewrite in_seq. pose (nat_ascii_bounded a). lia.
Qed.
(* Ce dictionnaire verifie donc en particulier les invariants voulus. *)
Lemma init_dict_ok : Invariant init_dict 256.
Proof.
split; [|split].
- intros s1 n1 s2 n2 i1 i2. rewrite init_dict_spec in *. destruct i1 as [a1 []], i2 as [a2 []].
subst s1 s2 n1 n2. split; intros.
+ now inversion H.
+ f_equal. apply (f_equal ascii_of_nat) in H. now rewrite ascii_nat_embedding, ascii_nat_embedding in H.
- intros c. exists (nat_of_ascii c). rewrite init_dict_spec. now exists c.
- intros. split; intros.
+ destruct H. rewrite init_dict_spec in H. destruct H as [a [H1 H2]].
subst n. apply nat_ascii_bounded.
+ exists (String (ascii_of_nat n) ""). rewrite init_dict_spec. exists (ascii_of_nat n).
now rewrite nat_ascii_embedding.
Qed.
(* La recherche dans ce dictionnaire est symetrique: *)
Lemma init_assoc : forall (n:nat)(a:ascii),
assoc1_noerr (String a "") init_dict = n -> assoc2_noerr n init_dict = String a "".
Proof.
unfold assoc1_noerr, assoc2_noerr. intros. cut (In (String a "", nat_of_ascii a) init_dict).
- intros. destruct init_dict_ok as [B [I1 I2]].
rewrite In_assoc2 with n init_dict (String a ""); try easy.
rewrite In_assoc1 with (String a "") init_dict (nat_of_ascii a) in H; try easy.
simpl in H. now subst n.
- rewrite init_dict_spec. now exists a.
Qed.
(** PARTIE II: algorithmes de compression / decompression LZW *)
(* Voir la description de LZW dans le sujet du projet. *)
(** Compression *)
(* Parametres de encode :
s: chaine de caractere a encoder
buf: chaine temporaire contenant des caracteres en cours d'examen
max: taille de d ( = numero a utiliser lors du prochain ajout dans d)
d: dictionnaire
*)
Fixpoint encode s buf max d :=
match s with
| "" => match buf with
| "" => []
| _ => [assoc1_noerr buf d]
end
| String c s => let bufc := buf ++ String c "" in
match assoc1 bufc d with
| Some _ => encode s bufc max d
| None => assoc1_noerr buf d :: encode s (String c "") (S max) (add_dict bufc max d)
end
end.
Definition compress s := encode s "" 256 init_dict.
(** Decompression *)
(* La fonction auxiliaires suivante est utilisee pour determiner la chaine
correspondant a un code (p), sachant la derniere chaine decodee (prev)
et un dictionnaire (d) datant un peu (un cycle de retard).
*)
Definition nextstr (n:nat)(prev:string)(d:dict) :=
match assoc2 n d with
| Some str => str
| None => prev ++ head prev
end.
(* La boucle principale de decodage. Parametres:
l : liste des codes a decoder
prev : derniere chaine decodee
max : taille de d ( = numero a utiliser lors du prochain ajout dans d)
d : dictionnaire
*)
Fixpoint decode l prev max d := match l with
| [] => ""
| n :: l => let buf := nextstr n prev d in
buf ++ decode l buf (S max) (add_dict (prev ++ head buf) max d)
end.
(* Fonction principale de decompression. Avant de pouvoir lancer
decode, il faut s'occuper d'au moins un caractere, pour pouvoir
remplir le parametre prev.
*)
Definition uncompress l := match l with
| nil => ""
| n :: l =>
let prev := assoc2_noerr n init_dict in
prev++(decode l prev 256 init_dict)
end.
(** Preuve de correction. *)
(* Propriete de nextstr: *)
Lemma nextstr_spec : forall buf max prev n de dd,
prev <> "" ->
assoc1 buf de = Some n ->
de = add_dict (prev++head buf) max dd ->
Invariant dd max ->
nextstr n prev dd = buf.
Proof.
unfold nextstr. intros. apply assoc1_In in H0. subst de.
destruct H2 as [B [I J]], H0 as [[=]|]; case_eq (assoc2 n dd); intros.
- apply assoc2_In in H0. absurd (n < max). lia. apply J. now exists s.
- rewrite <- H1. f_equal. rewrite <- H1. unfold head.
destruct prev; try contradiction. now simpl.
- apply assoc2_In in H1. now rewrite (B s n buf n H1 H0).
- contradict H1. rewrite <- In2_assoc2. now exists buf.
Qed.
(* Le lemme fondamental. *)
Lemma decode_encode : forall s buf prev max de dd,
buf <> "" ->
prev <> "" ->
assoc1 buf de <> None ->
assoc1 (prev++head buf) dd = None ->
de = add_dict (prev++head buf) max dd ->
Invariant dd max ->
decode (encode s buf (S max) de) prev max dd = buf++s.
Proof.
intros. destruct buf as [|b uf]; try contradiction. clear H.
revert b uf prev max de dd H0 H1 H2 H3 H4. induction s; simpl; intros.
- rewrite append_empty, append_empty.
apply nextstr_spec with max de; try easy.
now apply assoc1_noerr_spec.
- case_eq (assoc1 (String b (uf ++ String a "")) de); intros; simpl.
+ rewrite <- (append_String uf a s). apply IHs; try easy. now rewrite H.
+ rewrite nextstr_spec with (buf := String b uf) (max := max) (de := de); try easy.
* simpl. f_equal. f_equal. subst de. apply IHs; try easy.
** rewrite <- In1_assoc1.
destruct H4 as [B [I1 I2]]. destruct I1 with a.
apply In1_add, In1_add. now exists x.
** now apply Invariant_propagates.
* now apply assoc1_noerr_spec.
Qed.
(* Un petit dernier pour la route. *)
Lemma encode_non_empty : forall s buf max d,
buf <> "" ->
encode s buf max d <> nil.
Proof.
intros. destruct buf; try contradiction. clear H. revert buf.
induction s; intros; simpl. easy.
destruct (assoc1 (String a (buf ++ String a0 "")) d). apply IHs. easy.
Qed.
(* Finalement, le theoreme de correction. *)
(* Attention! Dans la preuve qui vient, utiliser simpl sur un terme
contenant init_dict peut mener à un calcul __long__. Pour eviter cela,
et pouvoir simplifier malgre tout, une solution est d'utiliser
la tactique:
remember init_dict as d
avant le simpl fatal. De la sorte, init_dict est remplace par une
variable d, distincte de init_dict, mais prouvablement egale.
*)
Theorem uncompress_compress :
forall s : string, uncompress (compress s) = s.
Proof.
intros. unfold uncompress, compress. destruct s; simpl. easy.
destruct init_dict_ok as [B [I1 I2]]. pose proof (I1 a).
rewrite In1_assoc1 in H. destruct (assoc1 (String a "") init_dict); try contradiction.
destruct s; simpl.
- set (m := assoc1_noerr (String a "") init_dict). rewrite init_assoc with m a; easy.
- case_eq (assoc1 (String a (String a0 "")) init_dict); intros.
+ apply assoc1_In, init_dict_spec in H0. destruct H0 as [a1 []]. easy.
+ set (m := assoc1_noerr (String a "") init_dict). rewrite init_assoc with m a; try easy.
simpl. f_equal. apply decode_encode; try easy.
pose proof (I1 a0). apply In1_add with (s := String a (String a0 "")) (n := 256) in H1.
now rewrite In1_assoc1 in H1.
Qed.
(* ANNEXE: Des exemples, si vous voulez jouer avec vos fonctions *)
Definition ex1 := "repetition".
Definition ex2 := "repetition repetition repetition repetition".
Definition ex3 := "aaa".
Definition ex4 := "aaaaaa".
Eval vm_compute in (compress ex1).
Eval vm_compute in (uncompress (compress ex1)).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment