Skip to content

Instantly share code, notes, and snippets.

@Guest0x0
Last active July 7, 2023 02:19
Show Gist options
  • Save Guest0x0/844688233e1ea27b0a2307734271644d to your computer and use it in GitHub Desktop.
Save Guest0x0/844688233e1ea27b0a2307734271644d to your computer and use it in GitHub Desktop.
a minimal pattern matching compiler demo
(* a minimal pattern-matching compiler *)
(* use integer as variable for easy generation *)
type var = int
type typename = string
type constructor = string
type typ =
| Cons of typename * (constructor * typ list) list
type pattern =
(* variable binding is not handled for simplicity *)
| PAny
| PCon of constructor * pattern list
(* for simplicity, only to distinguish different match body *)
type match_body = int
let seed = ref 0
let fresh () =
incr seed; !seed
(* the output of compilation. assume we are initially matching variable $0 *)
type decision_tree =
| Fail
| Leaf of match_body
(* the variable to match + all cases.
every case contains:
- the constructor
- a list of variables that bind the data of the constructor
- a sub tree for remaining job *)
| Branch of typename * var * (constructor * var list * decision_tree) list
type warning =
| Unreachable of match_body
(* the pattern describes what values are unmatched *)
| Unmatched of pattern
let warnings = ref []
(* assume we found an unmatched constructor somewhere,
when reporting error, we want to reconstruct a complete value (w.r.t. the initial match expr).
[context] is used for this: it tells where we are in the whole match now.
A context is a pattern with a hole.
One can fill a pattern into the hole to obtain a complete pattern *)
type match_context =
| Outermost
| In_cons of {
parent : match_context;
constr : constructor;
before : pattern list; (* in reverse order *)
(* the "hole" is here *)
after : pattern list (* in normal order *)
}
(* fill a context with a pattern *)
let rec fill_context (ctx : match_context) (pat : pattern) : pattern =
match ctx with
| Outermost -> pat
| In_cons { parent; constr; before; after } ->
fill_context parent (PCon(constr, List.rev_append before (pat :: after)))
(* shift the hole in the context to the "next" position,
with [pat] filled into the original hole *)
let rec next_hole (ctx : match_context) (pat : pattern) : match_context =
match ctx with
| Outermost -> failwith "next_hole"
| In_cons { parent; constr; before; after } ->
match after with
| [] ->
next_hole parent (PCon(constr, List.rev_append before [pat]))
| _ :: rest ->
In_cons { parent; constr; before = pat :: before; after = rest }
(* When we are compiling something like:
match p with
| _ , true -> ...
| false, _ -> ...
| ...
there are multiple values to match,
and the result of matching the first value
will affect the result of matching the second.
So, compilation requires matching *multiple* values simutaneously.
We know that in each match arm,
the number of patterns should match the number of values matched.
Hence this kind of multiple-match problem would have the shape of a matrix:
match x1, x2, ..., xn with
| p_1_1, p_1_2, ..., p_1_n -> body_1
| p_2_1, p_2_2, ..., p_2_n -> body_2
| ...
| p_m_1, p_m_2, ..., p_m_n -> body_m *)
(* Here for simplicity, we use a list of match arms to represent the problem,
and leave the invariant that the number of patterns in eacr arm
is equal to the number of values implicit. *)
type match_arm =
{ pats : pattern list
; body : match_body }
let delete_first_pattern arm =
match arm.pats with
| PAny :: pats' -> { arm with pats = pats' }
| _ -> failwith "impossible"
let rec compile_aux
(* the values to match *)
(heads : (var * typ) list)
(* the arms of the match.
invariant: for any [arm] in [arms], [List.length heads = List.length arm.pats] *)
(arms : match_arm list)
(* a hash set used to track which branches are reachable *)
~(reachable : (match_body, unit) Hashtbl.t)
~(context : match_context)
: decision_tree =
match heads with
| [] ->
(* there is nothing left to match,
and we should receive the correct match body to execute *)
begin match arms with
| { pats = []; body } :: _ ->
Hashtbl.add reachable body ();
Leaf body
| _ ->
(* match failures should have been resolved before this stage *)
failwith "impossible"
end
| (var, typ) :: heads' ->
(* whether it is necessary to match this value *)
let is_necessary =
List.exists
(function { pats = (PCon _) :: _; _ } -> true | _ -> false)
arms
in
if not is_necessary
then
(* there's no need to match this value, simply move to the next position *)
compile_aux ~reachable ~context:(next_hole context PAny)
heads' (List.map delete_first_pattern arms)
else
let remaining_arms_for_constr constr item_typs =
arms |> List.filter_map @@ fun arm ->
match arm.pats with
| [] -> failwith "impossible"
| PAny :: pats' ->
(* maintain the correct number of patterns by filling [PAny] *)
Some { arm with pats = List.map (fun _ -> PAny) item_typs @ pats' }
| PCon(constr', item_pats) :: pats' ->
if constr' = constr
(* if the constructor matches, continue to match sub-patterns *)
then Some { arm with pats = item_pats @ pats' }
else None
in
let (Cons(typename, constrs)) = typ in
let decision_tree_branches = constrs |> List.map @@ fun (constr, item_typs) ->
(* assign a variable to each item of the constructor *)
let new_heads = List.map (fun typ -> fresh (), typ) item_typs in
let remaining_arms = remaining_arms_for_constr constr item_typs in
let subtree =
match remaining_arms with
| [] ->
(* match failure *)
let unmatched = fill_context context (PCon(constr, List.map (fun _ -> PAny) item_typs)) in
warnings := (Unmatched unmatched) :: !warnings;
Fail
| _ ->
let context' =
match new_heads with
| [] ->
(* the next hole is not inside this constructor.
find the next hole in the current context *)
if heads' = []
then context
else next_hole context (PCon(constr, []))
| _ :: rest ->
(* the next hole is inside this constructor.
initialize the items with [PAny] *)
In_cons { parent = context; constr; before = []; after = List.map (fun _ -> PAny) rest }
in
(* when the constructor matches,
the next thing to do is to match sub-patterns under the constructor.
So we assign a variable name to each item of the constructor,
and add these variables as new heads (aka. matched values) *)
compile_aux ~reachable ~context:context' (new_heads @ heads') remaining_arms
in
(constr, List.map fst new_heads, subtree)
in
Branch(typename, var, decision_tree_branches)
let compile ((typ, arms) : typ * (pattern * match_body) list) : decision_tree * warning list =
let reachable = Hashtbl.create 10 in
warnings := [];
let tree = compile_aux ~reachable ~context:Outermost
[0, typ]
(List.map (fun (pat, body) -> { pats = [pat]; body }) arms)
in
let unreachable = arms |> List.filter_map @@ fun (_, body) ->
if not (Hashtbl.mem reachable body)
then Some (Unreachable body)
else None
in
(tree, unreachable @ !warnings)
let bool : typ = Cons("bool", ["True", []; "False", []])
let pair x y : typ = Cons("pair", ["Pair", [x; y]])
let option x : typ = Cons("option", ["None", []; "Some", [x]])
let either x y : typ = Cons("either", ["Left", [x]; "Right", [y]])
let pAny = PAny
let pTrue = PCon("True", [])
let pFalse = PCon("False", [])
let pPair p1 p2 = PCon("Pair", [p1; p2])
let pNone = PCon("None", [])
let pSome p = PCon("Some", [p])
let pLeft p = PCon("Left", [p])
let pRight p = PCon("Right", [p])
let (@=>) pat body = (pat, body)
let ex1 =
( bool
, [ pTrue @=> 1
; pFalse @=> 2 ])
let ex2 =
( bool
, [ pTrue @=> 1
; pTrue @=> 2 ])
let ex3 =
( pair bool bool
, [ pPair pTrue pTrue @=> 1
; pPair pTrue pFalse @=> 2
; pPair pFalse pTrue @=> 3
; pPair pFalse pFalse @=> 4 ])
let ex4 =
( pair bool bool
, [ pPair pAny pTrue @=> 1
; pPair pTrue pAny @=> 2
; pPair pFalse pFalse @=> 3 ])
let ex5 =
( pair bool bool
, [ pPair pAny pTrue @=> 1
; pPair pTrue pAny @=> 2 ])
let ex6 =
( pair bool (pair bool bool)
, [ pPair pTrue (pPair pTrue pFalse) @=> 1
; pPair pAny (pPair pAny pFalse) @=> 2
; pPair pTrue (pPair pAny pTrue ) @=> 3
; pPair pFalse (pPair pTrue pAny ) @=> 4 ])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment