bobatkey/gadts.sml

## gadts.sml
(* This is a demonstration of the use of the SML module system to
   encode (Generalized Algebraic Datatypes) GADTs via Church
   encodings. The basic idea is to use the Church encoding of GADTs in
   System Fomega and translate the System Fomega type into the module
   system. As I demonstrate below, this allows things like the
   singleton type of booleans, and the equality type, to be
   represented.

   This was inspired by Jon Sterling's blog post about encoding proofs
   in the SML module system:

       http://www.jonmsterling.com/posts/2014-01-04-standard-ml-as-a-theorem-prover.html

   The implementation below actually implements the boolean and
   equality elimination rules postulated in that post, and fixes a
   problem with the definition of the boolean eliminator.

   Notes:

   1. Each GADT is defined as structures containing its parameters,
   and its eliminator, as would be expected for a Church encoding.

   2. It isn't possible to directly copy the Fomega definitions into
   SML, because SML lacks higher-kinded polymorphism in the core
   language. The encoding presented here relies on the fact that the
   SML module system (with SML/NJ extensions) allows for some of
   System F omega to be encoded. See the link below to Rossberg, Russo
   and Dreyer's paper on encoding modules into System F omega.

   3. As hinted in the last point, this is not standard Standard
   ML. The encoding relies on being able to put functors inside
   modules, which AFAIK is an SML/NJ extension. This code doesn't work
   in, e.g., PolyML.

   4. This encoding is effectively useless for writing actual SML
   programs, unless you use an ML system with first-class module
   support. Values of each of the defined GADTs are built up by using
   structures and functor applications, which can only happen at
   compile-time. Thus we cannot construct or eliminate values of GADTs
   at runtime.

   In OCaml, which does have support for first-class modules, we can
   reflect the module types defined below as value types, as so
   construct them, pass them around, and eliminate them, at
   runtime. In OCaml:

       type ('a,'b) = (module Eq with type a = 'a and type b = 'b)

       let refl = (module Refl)

   3. Each of the eliminators is defined in terms of eliminating to a
   value type (e.g., 'a f in the signature BoolIndData). In each of
   the proof examples below, this is eventually wrapped up into a new
   structure of the right signature. Unforunately, this means that
   proofs are often constructed in a roundabout
   continuation-passing-style (see the proof of NotNotX_eq_X
   below).

   Ideally, I would've liked to define eliminators in terms of
   elimination into a given signature with a type parameter. However,
   I'm pretty sure that this would require the ability to abstract
   over signatures, which SML(/NJ) does cater for, and OCaml only has
   limited support.

   In a system with first-class module support, this problem is
   eliminated (lol) because we can lower everything into the value
   language. (Although OCaml's lack of real higher-kinded type
   variables eventually causes pain).

   4. Neither SML(/NJ) nor OCaml have support for defining type-level
   functions that aren't just either synoyms or fresh datatype
   definitions. Therefore, to simulate type-level functions, I copied
   Jon Sterling's approach and simply postulated the defining
   equations of the functions I needed.

   5. One might reasonably worry whether or not the GADTs defined
   below are really the genuine thing. An answer to this is provided
   by (a) an encoding of the SML module system in Fomega by Rossberg,
   Russo and Dreyer's "F-ing modules":

      http://www.mpi-sws.org/~dreyer/papers/f-ing/main.pdf

   and (b) by my paper that constructs a relationally parametric
   module of System F omega and proves that the Church encodings of
   GADTs are adequate:

      http://bentnib.org/fomega-parametricity.html

   These together do not form a fully formal proof in this specific
   case however, since neither paper deals with the extra features of
   SML. In particular, the effect of effects on relational
   parametricity has not yet, to my knowledge, been studied on
   conjunction with higher kinds. *)

(**********************************************************************)
(* Encoding of the singleton boolean GADT.

   In Haskell:

       data Bool :: * -> * where
          TT :: Bool TT
          FF :: Bool FF

   In System F omega, Church-encoded:

       Bool b = forall f : * -> *. f tt -> f ff -> f b
*)

(* Type-level representations of the bool constructors. *)
type tt = unit
type ff = unit

(* A generic bool-elimination job description. *)
signature BoolIndData =
sig
  type 'a f
  val case_tt : tt f
  val case_ff : ff f
end

signature Bool =
sig
  type v
  functor Elim (I : BoolIndData) : sig val proof : v I.f end
end

structure TT : Bool where type v = tt =
struct
  type v = tt
  functor Elim (I : BoolIndData) =
  struct
    val proof = I.case_tt
  end
end

structure FF : Bool where type v = ff =
struct
  type v = ff
  functor Elim (I : BoolIndData) =
  struct
    val proof = I.case_ff
  end
end

(**********************************************************************)
(* Encoding of the equality type as 'the least reflexive relation':

   In Haskell:

       data Eq :: * -> * -> * where
          Refl :: Eq a a

   In System F omega:

       Eq a b = forall f : * -> * -> *. (forall x. f x x) -> f a b

   Alternative encoding ("Leibniz equality"):

       Eq a b = forall c : * -> *. c a -> c b *)

signature EqIndData =
sig
  type ('a,'b) f
  val case_refl : ('a,'a) f
end

signature Eq =
sig
  type a
  type b
  functor Elim (I : EqIndData) : sig val proof : (a,b) I.f end
end

functor Refl (T : sig type t end) : Eq where type a = T.t and type b = T.t =
struct
  type a = T.t
  type b = T.t
  functor Elim (I : EqIndData) =
  struct
    val proof = I.case_refl
  end
end

(* Symmetry, transitivity, and congruence can be derived: *)

(* Equality is symmetric. *)
functor Symm (E : Eq) : Eq where type a = E.b and type b = E.a =
struct
  type a = E.b
  type b = E.a
  functor Elim (I : EqIndData) =
  struct
    structure P = E.Elim (struct type ('a,'b) f = ('b,'a) I.f
                                      val case_refl = I.case_refl
                               end)
    val proof = P.proof
  end
end

(* Equality is transitive. *)
functor Trans (E1 : Eq) (E2 : Eq where type a = E1.b) : Eq where type a = E1.a and type b = E2.b =
struct
  type a = E1.a
  type b = E2.b
  functor Elim (I : EqIndData) =
  struct
    structure P1 = E1.Elim
      (struct type ('a,'b) f = ('b,E2.b) I.f -> ('a,E2.b) I.f
              val case_refl = fn x => x
       end)
    structure P2 = E2.Elim
      (struct type ('a,'b) f = ('a,'b) I.f
              val case_refl = I.case_refl
       end)
    (* P1.proof : (E1.b,E2.b) I.f -> (E1.a,E2.b) I.f  (where E1.b = E2.a) *)
    (* P2.proof : (E2.a,E2.b) I.f *)
    val proof = P1.proof P2.proof
  end
end

(* Equality is a congruence. *)
functor Cong (E : Eq) (F : sig type 'a t end) : Eq where type a = E.a F.t and type b = E.b F.t =
struct
  type a = E.a F.t
  type b = E.b F.t
  functor Elim (I : EqIndData) =
  struct
    structure P = E.Elim
      (struct type ('a,'b) f = ('a F.t, 'b F.t) I.f
              val case_refl = I.case_refl
       end)
    val proof = P.proof
  end
end

(**********************************************************************)
(* I postulate the existence of certain type-level functions by
   assuming their defining equations. In this case I postulate the
   existence of a type-level function 'not' with the two obvious
   equations.

   In Haskell/GHC, we could've used a type family to define 'real'
   type-level functions:

       type family Not b :: *
       type instance Not TT = FF
       type instance Not FF = TT

   and the type checker would then hold definitionally for us, and we
   wouldn't have to directly use these equations in the proof of
   involution below.

   Note that System F omega doesn't allow for type-level functions to
   be defined by pattern matching on objects of kind * either (and
   neither does Coq or Agda). The 'proper' way to do this would be to
   define a new kind of type-level booleans, which come with a
   (non-dependent) elimination principle. *)

signature NOT =
sig
  type 'a not

  structure not_tt : Eq where type a = tt not and type b = ff
  structure not_ff : Eq where type a = ff not and type b = tt
end

(* Proof that any 'not' satisfying NOT is involutive over the
   booleans. *)
functor Proofs (NotImpl : NOT) =
struct
  open NotImpl

  structure Lem1 : Eq where type a = tt not not and type b = tt =
    Trans (Cong (not_tt) (type 'a t = 'a not)) (not_ff)

  structure Lem2 : Eq where type a = ff not not and type b = ff =
    Trans (Cong (not_ff) (type 'a t = 'a not)) (not_tt)

  functor NotNotX_eq_X (B : Bool) : Eq where type a = B.v not not and type b = B.v =
  struct
    type a = B.v not not
    type b = B.v
    functor Elim (I : EqIndData) =
    struct
      structure P1 = Lem1.Elim (I)
      structure P2 = Lem2.Elim (I)
      structure P = B.Elim
        (struct
           type 'a f = ('a not not, 'a) I.f
           val case_tt = P1.proof
           val case_ff = P2.proof
          end)
        val proof = P.proof
    end
  end
end

(**********************************************************************)
(* Encoding of the singleton natural numbers GADT:

   Nat x =
      forall f : * -> *. f zero -> (forall n. f n -> f (succ n)) -> f x
*)
type zero = unit
type 'a succ = unit

signature NatIndData =
sig
  type 'a f
  val case_zero : zero f
  val case_succ : 'a f -> 'a succ f
end

signature Nat =
sig
  type v

  functor Elim (I : NatIndData) : sig val proof : v I.f end
end

structure Zero : Nat where type v = zero =
struct
  type v = zero

  functor Elim (I : NatIndData) =
  struct
    val proof = I.case_zero
  end
end

functor Succ (N : Nat) : Nat where type v = N.v succ =
struct
  type v = N.v succ

  functor Elim (I : NatIndData) =
  struct
    structure P = N.Elim (I)
    val proof = I.case_succ P.proof
  end
end
	(* This is a demonstration of the use of the SML module system to
	encode (Generalized Algebraic Datatypes) GADTs via Church
	encodings. The basic idea is to use the Church encoding of GADTs in
	System Fomega and translate the System Fomega type into the module
	system. As I demonstrate below, this allows things like the
	singleton type of booleans, and the equality type, to be
	represented.

	This was inspired by Jon Sterling's blog post about encoding proofs
	in the SML module system:

	http://www.jonmsterling.com/posts/2014-01-04-standard-ml-as-a-theorem-prover.html

	The implementation below actually implements the boolean and
	equality elimination rules postulated in that post, and fixes a
	problem with the definition of the boolean eliminator.

	Notes:

	1. Each GADT is defined as structures containing its parameters,
	and its eliminator, as would be expected for a Church encoding.

	2. It isn't possible to directly copy the Fomega definitions into
	SML, because SML lacks higher-kinded polymorphism in the core
	language. The encoding presented here relies on the fact that the
	SML module system (with SML/NJ extensions) allows for some of
	System F omega to be encoded. See the link below to Rossberg, Russo
	and Dreyer's paper on encoding modules into System F omega.

	3. As hinted in the last point, this is not standard Standard
	ML. The encoding relies on being able to put functors inside
	modules, which AFAIK is an SML/NJ extension. This code doesn't work
	in, e.g., PolyML.

	4. This encoding is effectively useless for writing actual SML
	programs, unless you use an ML system with first-class module
	support. Values of each of the defined GADTs are built up by using
	structures and functor applications, which can only happen at
	compile-time. Thus we cannot construct or eliminate values of GADTs
	at runtime.

	In OCaml, which does have support for first-class modules, we can
	reflect the module types defined below as value types, as so
	construct them, pass them around, and eliminate them, at
	runtime. In OCaml:

	type ('a,'b) = (module Eq with type a = 'a and type b = 'b)

	let refl = (module Refl)

	3. Each of the eliminators is defined in terms of eliminating to a
	value type (e.g., 'a f in the signature BoolIndData). In each of
	the proof examples below, this is eventually wrapped up into a new
	structure of the right signature. Unforunately, this means that
	proofs are often constructed in a roundabout
	continuation-passing-style (see the proof of NotNotX_eq_X
	below).

	Ideally, I would've liked to define eliminators in terms of
	elimination into a given signature with a type parameter. However,
	I'm pretty sure that this would require the ability to abstract
	over signatures, which SML(/NJ) does cater for, and OCaml only has
	limited support.

	In a system with first-class module support, this problem is
	eliminated (lol) because we can lower everything into the value
	language. (Although OCaml's lack of real higher-kinded type
	variables eventually causes pain).

	4. Neither SML(/NJ) nor OCaml have support for defining type-level
	functions that aren't just either synoyms or fresh datatype
	definitions. Therefore, to simulate type-level functions, I copied
	Jon Sterling's approach and simply postulated the defining
	equations of the functions I needed.

	5. One might reasonably worry whether or not the GADTs defined
	below are really the genuine thing. An answer to this is provided
	by (a) an encoding of the SML module system in Fomega by Rossberg,
	Russo and Dreyer's "F-ing modules":

	http://www.mpi-sws.org/~dreyer/papers/f-ing/main.pdf

	and (b) by my paper that constructs a relationally parametric
	module of System F omega and proves that the Church encodings of
	GADTs are adequate:

	http://bentnib.org/fomega-parametricity.html

	These together do not form a fully formal proof in this specific
	case however, since neither paper deals with the extra features of
	SML. In particular, the effect of effects on relational
	parametricity has not yet, to my knowledge, been studied on
	conjunction with higher kinds. *)

	(**********************************************************************)
	(* Encoding of the singleton boolean GADT.

	In Haskell:

	data Bool :: * -> * where
	TT :: Bool TT
	FF :: Bool FF

	In System F omega, Church-encoded:

	Bool b = forall f : * -> *. f tt -> f ff -> f b
	*)

	(* Type-level representations of the bool constructors. *)
	type tt = unit
	type ff = unit

	(* A generic bool-elimination job description. *)
	signature BoolIndData =
	sig
	type 'a f
	val case_tt : tt f
	val case_ff : ff f
	end

	signature Bool =
	sig
	type v
	functor Elim (I : BoolIndData) : sig val proof : v I.f end
	end

	structure TT : Bool where type v = tt =
	struct
	type v = tt
	functor Elim (I : BoolIndData) =
	struct
	val proof = I.case_tt
	end
	end

	structure FF : Bool where type v = ff =
	struct
	type v = ff
	functor Elim (I : BoolIndData) =
	struct
	val proof = I.case_ff
	end
	end

	(**********************************************************************)
	(* Encoding of the equality type as 'the least reflexive relation':

	In Haskell:

	data Eq :: * -> * -> * where
	Refl :: Eq a a

	In System F omega:

	Eq a b = forall f : * -> * -> *. (forall x. f x x) -> f a b

	Alternative encoding ("Leibniz equality"):

	Eq a b = forall c : * -> . c a -> c b )

	signature EqIndData =
	sig
	type ('a,'b) f
	val case_refl : ('a,'a) f
	end

	signature Eq =
	sig
	type a
	type b
	functor Elim (I : EqIndData) : sig val proof : (a,b) I.f end
	end

	functor Refl (T : sig type t end) : Eq where type a = T.t and type b = T.t =
	struct
	type a = T.t
	type b = T.t
	functor Elim (I : EqIndData) =
	struct
	val proof = I.case_refl
	end
	end

	(* Symmetry, transitivity, and congruence can be derived: *)

	(* Equality is symmetric. *)
	functor Symm (E : Eq) : Eq where type a = E.b and type b = E.a =
	struct
	type a = E.b
	type b = E.a
	functor Elim (I : EqIndData) =
	struct
	structure P = E.Elim (struct type ('a,'b) f = ('b,'a) I.f
	val case_refl = I.case_refl
	end)
	val proof = P.proof
	end
	end

	(* Equality is transitive. *)
	functor Trans (E1 : Eq) (E2 : Eq where type a = E1.b) : Eq where type a = E1.a and type b = E2.b =
	struct
	type a = E1.a
	type b = E2.b
	functor Elim (I : EqIndData) =
	struct
	structure P1 = E1.Elim
	(struct type ('a,'b) f = ('b,E2.b) I.f -> ('a,E2.b) I.f
	val case_refl = fn x => x
	end)
	structure P2 = E2.Elim
	(struct type ('a,'b) f = ('a,'b) I.f
	val case_refl = I.case_refl
	end)
	(* P1.proof : (E1.b,E2.b) I.f -> (E1.a,E2.b) I.f (where E1.b = E2.a) *)
	(* P2.proof : (E2.a,E2.b) I.f *)
	val proof = P1.proof P2.proof
	end
	end

	(* Equality is a congruence. *)
	functor Cong (E : Eq) (F : sig type 'a t end) : Eq where type a = E.a F.t and type b = E.b F.t =
	struct
	type a = E.a F.t
	type b = E.b F.t
	functor Elim (I : EqIndData) =
	struct
	structure P = E.Elim
	(struct type ('a,'b) f = ('a F.t, 'b F.t) I.f
	val case_refl = I.case_refl
	end)
	val proof = P.proof
	end
	end

	(**********************************************************************)
	(* I postulate the existence of certain type-level functions by
	assuming their defining equations. In this case I postulate the
	existence of a type-level function 'not' with the two obvious
	equations.

	In Haskell/GHC, we could've used a type family to define 'real'
	type-level functions:

	type family Not b :: *
	type instance Not TT = FF
	type instance Not FF = TT

	and the type checker would then hold definitionally for us, and we
	wouldn't have to directly use these equations in the proof of
	involution below.

	Note that System F omega doesn't allow for type-level functions to
	be defined by pattern matching on objects of kind * either (and
	neither does Coq or Agda). The 'proper' way to do this would be to
	define a new kind of type-level booleans, which come with a
	(non-dependent) elimination principle. *)

	signature NOT =
	sig
	type 'a not

	structure not_tt : Eq where type a = tt not and type b = ff
	structure not_ff : Eq where type a = ff not and type b = tt
	end

	(* Proof that any 'not' satisfying NOT is involutive over the
	booleans. *)
	functor Proofs (NotImpl : NOT) =
	struct
	open NotImpl

	structure Lem1 : Eq where type a = tt not not and type b = tt =
	Trans (Cong (not_tt) (type 'a t = 'a not)) (not_ff)

	structure Lem2 : Eq where type a = ff not not and type b = ff =
	Trans (Cong (not_ff) (type 'a t = 'a not)) (not_tt)

	functor NotNotX_eq_X (B : Bool) : Eq where type a = B.v not not and type b = B.v =
	struct
	type a = B.v not not
	type b = B.v
	functor Elim (I : EqIndData) =
	struct
	structure P1 = Lem1.Elim (I)
	structure P2 = Lem2.Elim (I)
	structure P = B.Elim
	(struct
	type 'a f = ('a not not, 'a) I.f
	val case_tt = P1.proof
	val case_ff = P2.proof
	end)
	val proof = P.proof
	end
	end
	end

	(**********************************************************************)
	(* Encoding of the singleton natural numbers GADT:

	Nat x =
	forall f : * -> *. f zero -> (forall n. f n -> f (succ n)) -> f x
	*)
	type zero = unit
	type 'a succ = unit

	signature NatIndData =
	sig
	type 'a f
	val case_zero : zero f
	val case_succ : 'a f -> 'a succ f
	end

	signature Nat =
	sig
	type v

	functor Elim (I : NatIndData) : sig val proof : v I.f end
	end

	structure Zero : Nat where type v = zero =
	struct
	type v = zero

	functor Elim (I : NatIndData) =
	struct
	val proof = I.case_zero
	end
	end

	functor Succ (N : Nat) : Nat where type v = N.v succ =
	struct
	type v = N.v succ

	functor Elim (I : NatIndData) =
	struct
	structure P = N.Elim (I)
	val proof = I.case_succ P.proof
	end
	end