Skip to content

Instantly share code, notes, and snippets.

@Agnishom
Last active May 13, 2023 07:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Agnishom/adcb9072fe71db31878a092a7a85b6eb to your computer and use it in GitHub Desktop.
Save Agnishom/adcb9072fe71db31878a092a7a85b6eb to your computer and use it in GitHub Desktop.
Parse Tree Preference
Require Import Coq.Lists.List.
Declare Scope regex_scope.
Open Scope regex_scope.
Section Regex.
Variable (A : Type).
Inductive Regex : Type :=
| Epsilon : Regex
| CharClass : (A -> bool) -> Regex
| Concat : Regex -> Regex -> Regex
| Union : Regex -> Regex -> Regex
| Star : Regex -> Regex
.
Definition ε := Epsilon.
Infix "·" := Concat (at level 60, right associativity) : regex_scope.
Infix "∪" := Union (at level 50, left associativity) : regex_scope.
Notation "r *" := (Star r) (at level 40, no associativity) : regex_scope.
Inductive parse_tree : Regex -> list A -> nat -> nat -> Type :=
| parse_epsilon : forall (w : list A) (start : nat),
parse_tree ε w start 0
| parse_charclass : forall (a : A) (pred : A -> bool) (w : list A) (start : nat),
(pred a = true)
-> (nth_error w start = Some a)
-> parse_tree (CharClass pred) w start 1
| parse_concat : forall (r1 r2 : Regex) (w : list A) (start d1 d2 : nat),
parse_tree r1 w start d1
-> parse_tree r2 w (start + d1) d2
-> parse_tree (r1 · r2) w start (d1 + d2)
| parse_union_l : forall (r1 r2 : Regex) (w : list A) (start d : nat),
parse_tree r1 w start d
-> parse_tree (r1 ∪ r2) w start d
| parse_union_r : forall (r1 r2 : Regex) (w : list A) (start d : nat),
parse_tree r2 w start d
-> parse_tree (r1 ∪ r2) w start d
| parse_star_nil : forall (r : Regex) (w : list A) (start : nat),
parse_tree (r *) w start 0
| parse_star_cons : forall (r : Regex) (w : list A) (start d1 d2 : nat),
d1 > 0
-> parse_tree r w start d1
-> parse_tree (r *) w (start + d1) d2
-> parse_tree (r *) w start (d1 + d2)
.
Inductive prefer : forall (r : Regex) (w : list A) (i1 d1 i2 d2: nat),
parse_tree r w i1 d1 -> parse_tree r w i2 d2 -> Prop :=
| prefer_union_l :
forall (r1 r2 : Regex) (w : list A) (i1 d1 i2 d2: nat)
(p1 : parse_tree r1 w i1 d1) (p2 : parse_tree r2 w i2 d2),
prefer (r1 ∪ r2) w i1 d1 i2 d2
(parse_union_l r1 r2 w i1 d1 p1)
(parse_union_r r1 r2 w i2 d2 p2)
| prefer_concat_lex1 :
forall (r s : Regex) (w : list A) (ir1 dr1 ir2 dr2 ds1 ds2 : nat)
(pr1 : parse_tree r w ir1 dr1) (pr2 : parse_tree r w ir2 dr2)
(ps1 : parse_tree s w (ir1 + dr1) ds1) (ps2 : parse_tree s w (ir2 + dr2) ds2),
prefer r w ir1 dr1 ir2 dr2 pr1 pr2
-> prefer (r · s) w ir1 (dr1 + ds1) ir2 (dr2 + ds2)
(parse_concat r s w ir1 dr1 ds1 pr1 ps1)
(parse_concat r s w ir2 dr2 ds2 pr2 ps2)
| prefer_concat_lex2 :
forall (r s : Regex) (w : list A) (i dr ds1 ds2 : nat)
(pr : parse_tree r w i dr)
(ps1 : parse_tree s w (i + dr) ds1) (ps2 : parse_tree s w (i + dr) ds2),
prefer s w (i + dr) ds1 (i + dr) ds2 ps1 ps2
-> prefer (r · s) w i (dr + ds1) i (dr + ds2)
(parse_concat r s w i dr ds1 pr ps1)
(parse_concat r s w i dr ds2 pr ps2)
| prefer_cons_nil :
forall (r : Regex) (w : list A) (i1 i2 d1 d1' : nat)
(d1_gt_0 : d1 > 0)
(p1 : parse_tree r w i1 d1) (p1' : parse_tree (r *) w (i1 + d1) d1'),
prefer (r *) w i1 (d1 + d1') i2 0
(parse_star_cons r w i1 d1 d1' d1_gt_0 p1 p1')
(parse_star_nil r w i2)
| prefer_cons_lex1 :
forall (r : Regex) (w : list A) (i1 d1 d1' i2 d2 d2' : nat)
(d1_gt_0 : d1 > 0) (d2_gt_0 : d2 > 0)
(p1 : parse_tree r w i1 d1) (p1' : parse_tree (r *) w (i1 + d1) d1')
(p2 : parse_tree r w i2 d2) (p2' : parse_tree (r *) w (i2 + d2) d2'),
prefer r w i1 d1 i2 d2 p1 p2
-> prefer (r *) w i1 (d1 + d1') i2 (d2 + d2')
(parse_star_cons r w i1 d1 d1' d1_gt_0 p1 p1')
(parse_star_cons r w i2 d2 d2' d2_gt_0 p2 p2')
| prefer_cons_lex2 :
forall (r : Regex) (w : list A) (i d d1 d2 : nat)
(d_gt_0 : d > 0) (p : parse_tree r w i d)
(p1 : parse_tree (r *) w (i + d) d1) (p2 : parse_tree (r *) w (i + d) d2),
prefer (r *) w (i + d) d1 (i + d) d2 p1 p2
-> prefer (r *) w i (d + d1) i (d + d2)
(parse_star_cons r w i d d1 d_gt_0 p p1)
(parse_star_cons r w i d d2 d_gt_0 p p2)
.
Lemma prefer_eps_antirefl :
forall (w : list A) (i d : nat) (p : parse_tree _ _ _ _),
~ prefer ε w i d i d p p.
Proof.
unfold not. intros.
remember ε as r.
destruct p; inversion Heqr.
- inversion H.
Qed.
Lemma prefer_charclass_antirefl :
forall (c : A -> bool) (w : list A) (i d : nat) (p : parse_tree _ _ _ _),
~ prefer (CharClass c) w i d i d p p.
Proof.
unfold not. intros.
remember (CharClass c) as r.
destruct p; inversion Heqr.
- inversion H.
Qed.
Lemma prefer_union_antirefl :
forall (r1 r2 : Regex) (w : list A) (i d : nat) (p : parse_tree _ _ _ _),
~ prefer (r1 ∪ r2) w i d i d p p.
Proof.
unfold not. intros.
remember (r1 ∪ r2) as r.
destruct p; inversion Heqr; inversion H.
Qed.
Lemma prefer_antirefl :
forall (r : Regex) (w : list A) (i d : nat) (p : parse_tree _ _ _ _),
~ prefer r w i d i d p p.
Proof.
unfold not. induction r; intros.
- apply prefer_eps_antirefl in H. assumption.
- apply prefer_charclass_antirefl in H. assumption.
- remember (r1 · r2) as r.
destruct p; inversion Heqr.
subst.
inversion H.
+ admit.
+ admit.
- apply prefer_union_antirefl in H. assumption.
- remember (r *) as s.
destruct p; inversion Heqs.
subst.
+ inversion H.
+ admit.
Admitted.
Inductive greedy_leq : forall (r : Regex) (w : list A) (i1 d1 i2 d2 : nat),
parse_tree r w i1 d1 -> parse_tree r w i2 d2 -> Prop :=
| greedy_leq_refl :
forall (r : Regex) (w : list A) (i d : nat)
(p : parse_tree r w i d),
greedy_leq r w i d i d p p
| greedy_leq_step :
forall (r : Regex) (w : list A) (i1 d1 i2 d2 : nat)
(p1 : parse_tree r w i1 d1) (p2 : parse_tree r w i2 d2),
prefer r w i1 d1 i2 d2 p1 p2
-> greedy_leq r w i1 d1 i2 d2 p1 p2
| greedy_leq_trans :
forall (r : Regex) (w : list A) (i1 d1 i2 d2 i3 d3 : nat)
(p1 : parse_tree r w i1 d1) (p2 : parse_tree r w i2 d2) (p3 : parse_tree r w i3 d3),
greedy_leq r w i1 d1 i2 d2 p1 p2
-> greedy_leq r w i2 d2 i3 d3 p2 p3
-> greedy_leq r w i1 d1 i3 d3 p1 p3
.
Require Import Coq.Lists.List.
Import ListNotations.
Section Regex.
Variable (A : Type).
Inductive SimpleRegex : Type :=
| CharClass : (A -> bool) -> SimpleRegex
| Concat : SimpleRegex -> SimpleRegex -> SimpleRegex
.
Inductive parse_tree : SimpleRegex -> list A -> Type :=
| parse_charclass : forall f x,
f x = true ->
parse_tree (CharClass f) [x]
| parse_concat : forall r1 r2 s1 s2,
parse_tree r1 s1 ->
parse_tree r2 s2 ->
parse_tree (Concat r1 r2) (s1 ++ s2)
.
Inductive prefer : forall (r : SimpleRegex) (w1 w2 : list A),
parse_tree r w1 -> parse_tree r w2 -> Prop :=
| prefer_concat_lex1 :
forall (r s : SimpleRegex) (wr1 wr2 ws1 ws2 : list A)
(pwr1 : parse_tree r wr1) (pwr2 : parse_tree r wr2)
(pws1 : parse_tree s ws1) (pws2 : parse_tree s ws2),
prefer r wr1 wr2 pwr1 pwr2
-> prefer (Concat r s) (wr1 ++ ws1) (wr2 ++ ws2)
(parse_concat r s wr1 ws1 pwr1 pws1) (parse_concat r s wr2 ws2 pwr2 pws2)
| prefer_concat_lex2 :
forall (r s : SimpleRegex) (wr ws1 ws2 : list A)
(pwr : parse_tree r wr)
(pws1 : parse_tree s ws1) (pws2 : parse_tree s ws2),
prefer s ws1 ws2 pws1 pws2
-> prefer (Concat r s) (wr ++ ws1) (wr ++ ws2)
(parse_concat r s wr ws1 pwr pws1) (parse_concat r s wr ws2 pwr pws2)
.
Lemma prefer_charclass_antirefl :
forall (c : A -> bool) (w : list A) (p : parse_tree (CharClass c) w),
~ prefer (CharClass c) w w p p.
Proof.
unfold not. intros.
remember (CharClass c) as r.
destruct p; inversion Heqr.
- inversion H.
Qed.
Lemma prefer_antirefl : forall r w (p : parse_tree r w),
~ prefer r w w p p.
Proof.
unfold not. induction r; intros.
- now apply prefer_charclass_antirefl in H.
- remember (Concat r1 r2) as r.
destruct p; inversion Heqr.
(* This inversion above does away with all the cases
where p is not related to Concat
*)
inversion H. subst.
+ admit.
+ admit.
Admitted.
@andrejbauer
Copy link

I would expect your definition to look like this:

I would expect your definition to look like this (avoid `nth_error`, and parse complete lists):

Inductive parse_tree' : Regex -> list A -> Type :=
| parse_epsilon' : parse_tree' ε nil
| parse_charclass' : forall a pred, pred a = true -> parse_tree' (CharClass pred) (a :: nil)
| parse_concat' : forall r1 r2 l1 l2, parse_tree' r1 l1 -> parse_tree' r2 l2 -> parse_tree' (Concat r1 r2) (l1 ++ l2)
| parse_union_l' : forall r1 r2 l, parse_tree' r1 l -> parse_tree' (Union r1 r2) l
| parse_union_r' : forall r1 r2 l, parse_tree' r2 l -> parse_tree' (Union r1 r2) l
| parse_star_nil' : forall r, parse_tree' (Star r) nil
| parse_star_cons' : forall r l1 l2, parse_tree' r l1 -> parse_tree' r l2 -> parse_tree' (Star r) (l1 ++ l2).


That is, avoid `nth_error` and use the structure of lists. If you would like partial parses, so that not the entire list is parsed, you can create an auxiliary definition for that purpose:

Inductive parse_partial : Regex -> list A -> Type :=
| parse_initial : forall r l1 l2, parse_tree' r l1 -> parse_partial r (l1 ++ l2).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment