Skip to content

Instantly share code, notes, and snippets.

@neel-krishnaswami
Created April 20, 2017 10:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save neel-krishnaswami/1cb3f33ea46ca056310ac1361af11d30 to your computer and use it in GitHub Desktop.
Save neel-krishnaswami/1cb3f33ea46ca056310ac1361af11d30 to your computer and use it in GitHub Desktop.
Typed algebraic parsing
(* Types *)
module C = Set.Make(Char)
type tp = { null : bool; first : C.t; follow : C.t }
(* Grammars *)
type t =
| Var of string
| Fix of string * tp * t
| Eps
| Char of char
| Cat of t * t
| Bot
| Alt of t * t
| Nonnull of t
(* Type operators *)
let tp_eps = { null = true; first = C.empty; follow = C.empty }
let tp_bot = { null = false; first = C.empty; follow = C.empty }
let tp_char c = { null = false; first = C.singleton c; follow = C.empty }
let tp_alt tp tp' = {
null = tp.null || tp'.null ;
first = C.union tp.first tp'.first;
follow = C.union tp.follow tp'.follow }
let tp_seq tp tp' = {
null = tp.null && tp'.null;
first = tp.first;
follow = C.union tp'.follow (if tp'.null then tp.follow else C.empty) }
let tp_nonnull tp = { tp with null = false }
let apart tp tp' = not (tp.null && tp'.null)
&& C.is_empty (C.inter tp.first tp'.first)
let seq tp tp' = not tp.null && C.is_empty (C.inter tp.follow tp'.first)
(* Contexts and typechecking. We use a unified context with visibility
markers on hypotheses rather than two zones. *)
type hyp = { tp : tp; visible : bool }
type ctx = (string * hyp) list
let lookup x ctx = let h = List.assoc x ctx in
if h.visible then h.tp else raise Not_found
let make_visible ctx = List.map (fun (x,h) -> (x, {h with visible = true})) ctx
let rec typecheck ctx = function
| Var x -> lookup x ctx
| Fix (x,tp,g) -> typecheck ((x, {tp = tp; visible = false}) :: ctx) g
| Eps -> tp_eps
| Char c -> tp_char c
| Cat (g1, g2) -> let tp1 = typecheck ctx g1 in
let tp2 = typecheck (make_visible ctx) g2 in
if seq tp1 tp2 then
tp_seq tp1 tp2
else
failwith "Ambiguous sequential composition"
| Bot -> tp_bot
| Alt (g1, g2) -> let tp1 = typecheck ctx g1 in
let tp2 = typecheck ctx g2 in
if apart tp1 tp2 then
tp_alt tp1 tp2
else
failwith "Alternatives not disjoint"
| Nonnull g -> tp_nonnull (typecheck ctx g)
(* Substitution. We don't care about renaming because we only substitute values. *)
let rec subst g' x = function
| Var _ -> assert false
| Fix (y,tp,g) when x = y -> Fix (y,tp,g)
| Fix (y,tp,g) -> Fix (y,tp,subst g' x g)
| Eps -> Eps
| Char c -> Char c
| Cat (g1, g2) -> Cat(subst g' x g1, subst g' x g2)
| Bot -> Bot
| Alt (g1, g2) -> Alt(subst g' x g1, subst g' x g2)
| Nonnull g -> Nonnull (subst g' x g)
let cat g1 g2 = if g1 = Bot || g2 = Bot then Bot else Cat(g1, g2)
let alt g1 g2 = if g1 = Bot then g2 else if g2 = Bot then g1 else Alt(g1, g2)
(* Null checking and derivatives. Again we only care about closed terms. *)
let rec isnull = function
| Var x -> assert false
| Fix (y,tp,g) -> isnull g
| Eps -> true
| Alt (g1, g2) -> isnull g1 || isnull g2
| _ -> false
let rec deriv c = function
| Var _ -> assert false
| Fix (x,tp,g) -> subst (Fix (x,tp,g)) x (deriv c g)
| Eps -> Bot
| Char c' -> if c = c' then Eps else Bot
| Cat (g1,g2) -> let g1' = deriv c g1 in
if isnull g1' then
alt (cat (Nonnull g1') g2) g2
else
cat g1' g2
| Bot -> Bot
| Alt (g1, g2) -> alt (deriv c g1) (deriv c g2)
| Nonnull g -> deriv c g
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment