Skip to content

Instantly share code, notes, and snippets.

@gbluma
Created September 18, 2014 17:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gbluma/796cc37ed9606f29b6bf to your computer and use it in GitHub Desktop.
Save gbluma/796cc37ed9606f29b6bf to your computer and use it in GitHub Desktop.
A Hindley-Milner type system in PHP
<?php
/** Lambda abstraction */
class Lambda
{
function __construct($v, $body) {
$this->v = $v;
$this->body = $body;
}
function __toString() {
return "(fn {$this->v} => {$this->body})";
}
}
/** Identfier */
class Ident
{
function __construct($name) {
$this->name = $name;
}
function __toString() {
return $this->name;
}
}
/** Function application */
class Apply
{
function __construct($fn, $arg) {
$this->fn = $fn;
$this->arg = $arg;
}
function __toString() {
return "({$this->fn} {$this->arg})";
}
}
/** Let binding */
class Let
{
function __construct($v, $defn, $body) {
$this->v = $v;
$this->defn = $defn;
$this->body = $body;
}
function __toString() {
return "(let {$this->v} = {$this->defn} in {$this->body})";
}
}
/** Letrec binding */
class Letrec
{
function __construct($v, $defn, $body) {
$this->v = $v;
$this->defn = $defn;
$this->body = $body;
}
function __toString() {
return "(letrec {$this->v} = {$this->defn} in {$this->body})";
}
}
//=======================================================
// Exception types
/** Raised if the type inference algorithm cannot infer types successfully */
class TypeError extends Exception
{
function __construct($message) {
$this->message = $message;
}
function __toString() {
return (string) $this->message;
}
}
/** Raised if the type environment supplied for is incomplete */
class ParseError extends Exception
{
function __construct($message) {
$this->message = $message;
}
function __toString() {
return (string) $this->message;
}
}
//=======================================================
// Types and type constructors
/**
* A type variable standing for an arbitrary type.
*
* All type variables have a unique id, but names are only assigned lazily,
* when required.
*/
class TypeVariable
{
static $next_variable_id = 0;
static $next_variable_name = 'a';
function __construct() {
$this->id = TypeVariable::$next_variable_id;
TypeVariable::$next_variable_id += 1;
$this->instance = null;
$this->name = null;
}
/** Names are allocated to TypeVariables lazily, so that only TypeVariables present */
function getName() {
if ($this->name === null) {
$this->name = TypeVariable::$next_variable_name;
TypeVariable::$next_variable_name = chr( ord(TypeVariable::$next_variable_name) + 1);
}
return $this->name;
}
function __toString() {
if ($this->instance === null) {
return (string) $this->instance;
} else {
return $this->name;
}
}
function __repr() {
return "TypeVariable(id = {$this->id})";
}
}
/** An n-ary type constructor which builds a new type from old */
class TypeOperator
{
function __construct($name, $types) {
$this->name = $name;
if (is_array($types))
$this->types = $types;
else
$this->types = array($types);
}
function __toString() {
$num_types = count($this->types);
if ($num_types === 0) {
return $this->name;
} else if ($num_types == 2) {
return (string) $this->types[0] . " " . $this->name . " " . (string) $this->types[1];
} else {
return ($this->name . ' ' . implode(" ", $this->types));
}
}
}
/** A binary type constructor which builds function types */
class Func extends TypeOperator
{
function __construct($from_type, $to_type) {
parent::__construct("->", $from_type, $to_type);
}
}
// Basic types are constructed with a nullary type constructor
$Integer = new TypeOperator("int", array()); # Basic integer
$Bool = new TypeOperator("bool", array()); # Basic bool
$String = new TypeOperator("string", array());
//=======================================================#
// Type inference machinery
/**
* Computes the type of the expression given by node.
*
* The type of the node is computed in the context of the context of the
* supplied type environment env. Data types can be introduced into the
* language simply by having a predefined set of identifiers in the initial
* environment. environment; this way there is no need to change the syntax or, more
* importantly, the type-checking program when extending the language.
*
* Args:
* node: The root of the abstract syntax tree.
* env: The type environment is a mapping of expression identifier names
* to type assignments.
* to type assignments.
* non_generic: A set of non-generic variables, or None
*
* Returns:
* The computed type of the expression.
*
*/
function analyse($node, $env, $non_generic=null) {
if ($non_generic === null)
$non_generic = array();
if ($node instanceof Ident) {
return readType($node->name, $env, $non_generic);
}
else if ($node instanceof Apply) {
$fun_type = analyse( $node->fn, $env, $non_generic );
$arg_type = analyse( $node->arg, $env, $non_generic );
$result_type = new TypeVariable();
unify(new Func($arg_type, $result_type), $fun_type);
return $result_type;
}
else if ($node instanceof Lambda) {
$arg_type = new TypeVariable();
$new_env = $env;
$new_env[$node->v] = $arg_type;
$new_non_generic = $non_generic;
$new_non_generic[] = $arg_type;
$result_type = analyse($node->body, $new_env, $new_non_generic);
return new Func($arg_type, $result_type);
}
else if ($node instanceof Let) {
$defn_type = analyse($node->defn, $env, $non_generic);
$new_env = $env;
$new_env[$node->v] = $defn_type;
return analyse($node->body, $new_env, $non_generic);
}
else if ($node instanceof Letrec) {
$new_type = new TypeVariable();
$new_env = $env;
$new_env[$node->v] = $new_type;
$new_non_generic = $non_generic;
$new_non_generic[] = $new_type;
$defn_type = analyse($node->defn, $new_env, $new_non_generic);
unify($new_type, $defn_type);
return analyse($node->body, $new_env, $non_generic);
}
die("Unhandled syntax node " . gettype(t));
}
/**
* Get the type of identifier name from the type environment env.
*
* Args:
* name: The identifier name
* env: The type environment mapping from identifier names to types
* non_generic: A set of non-generic TypeVariables
*
* Raises:
* ParseError: Raised if name is an undefined symbol in the type
* environment.
*/
function readType($name, $env, $non_generic) {
global $Integer;
if (isset($env[$name])) {
return fresh($env[$name], $non_generic);
} elseif (ctype_digit($name) || is_int($name)) {
return $Integer;
} else {
throw new Exception("Undefined symbol $name");
}
}
/**
* Makes a copy of a type expression.
*
* The type t is copied. The the generic variables are duplicated and the
* non_generic variables are shared.
*
* Args:
* t: A type to be copied.
* non_generic: A set of non-generic TypeVariables
*/
function fresh( $t, $non_generic) {
$mappings = array(); # A mapping of TypeVariables to TypeVariables
$freshrec =
function ($tp) use (&$mappings, &$non_generic, &$freshrec) {
$p = prune($tp);
if ($p instanceof TypeVariable) {
if (isGeneric($p, $non_generic)) {
if (count($mappings) === 0 || !isset($mappings[$p])) {
@$mappings[$p] = new TypeVariable();
}
return @$mappings[$p];
} else {
return $p;
}
}
else if ($p instanceof TypeOperator) {
$fresh_types = array();
foreach($p->types as $x) {
$fresh_types[] = $freshrec($x);
}
return new TypeOperator($p->name, $fresh_types);
}
};
return $freshrec($t);
}
/**
* helper function to zip in php
*/
function zip() {
$args = func_get_args();
$zipped = array();
$n = count($args);
for ($i=0; $i<$n; ++$i) {
reset($args[$i]);
}
while ($n) {
$tmp = array();
for ($i=0; $i<$n; ++$i) {
if (key($args[$i]) === null) {
break 2;
}
$tmp[] = current($args[$i]);
next($args[$i]);
}
$zipped[] = $tmp;
}
return $zipped;
}
/**
* Unify the two types t1 and t2.
*
* Makes the types t1 and t2 the same.
*
* Args:
* t1: The first type to be made equivalent
* t2: The second type to be be equivalent
*
* Returns:
* None
*
* Raises:
* TypeError: Raised if the types cannot be unified.
*/
function unify($t1, $t2)
{
$a = prune($t1);
$b = prune($t2);
if ($a instanceof TypeVariable) {
if ($a != $b) {
if (occursInType($a, $b))
throw new TypeError("recursive unification");
$a->instance = $b;
}
} else if (($a instanceof TypeOperator) and ($b instanceof TypeVariable)) {
unify($b, $a);
} else if (($a instanceof TypeOperator) and ($b instanceof TypeOperator)) {
if ( ($a->name != $b->name) or (count($a->types) != count($b->types)))
throw new TypeError("Type mismatch: ".(string)$a." != ".(string)$b);
foreach( zip($a->types, $b->types) as $row) {
unify($row[0], $row[1]);
}
} else {
die("Not unified");
}
}
/**
* Returns the currently defining instance of t.
*
* As a side effect, collapses the list of type instances. The function Prune
* is used whenever a type expression has to be inspected: it will always
* return a type expression which is either an uninstantiated type variable or
* a type operator; i.e. it will skip instantiated variables, and will
* actually prune them from expressions to remove long chains of instantiated
* variables.
*
* Args:
* t: The type to be pruned
*
* Returns:
* An uninstantiated TypeVariable or a TypeOperator
*/
function prune($t) {
if ($t instanceof TypeVariable) {
if ($t->instance !== null) {
$t->instance = prune($t->instance);
return $t->instance;
}
}
return $t;
}
/**
* Checks whether a given variable occurs in a list of non-generic variables
*
* Note that a variables in such a list may be instantiated to a type term,
* in which case the variables contained in the type term are considered
* non-generic.
*
* Note: Must be called with v pre-pruned
*
* Args:
* v: The TypeVariable to be tested for genericity
* non_generic: A set of non-generic TypeVariables
*
* Returns:
* True if v is a generic variable, otherwise False
*/
function isGeneric($v, $non_generic) {
return !(occursIn($v, $non_generic));
}
/**
* Checks whether a type variable occurs in a type expression.
*
* Note: Must be called with v pre-pruned
*
* Args:
* v: The TypeVariable to be tested for
* type2: The type in which to search
*
* Returns:
* True if v occurs in type2, otherwise False
*/
function occursInType($v, $type2) {
$pruned_type2 = prune($type2);
if ($pruned_type2 === $v)
return true;
else if ($pruned_type2 instanceof TypeOperator)
return occursIn($v, $pruned_type2->types);
return false;
}
/**
* Checks whether a types variable occurs in any other types.
*
* Args:
* v: The TypeVariable to be tested for
* types: The sequence of types in which to search
*
* Returns:
* True if t occurs in any of types, otherwise False
*/
function occursIn($t, $types) {
foreach($types as $t2) {
if (occursInType($t, $t2) === true)
return true;
}
return false;
}
//==================================================================#
// Example code to infrastructure above
/**
* Try to evaluate a type printing the result or reporting errors.
*
* Args:
* env: The type environment in which to evaluate the expression.
* node: The root node of the abstract syntax tree of the expression.
*
* Returns:
* None
*/
function tryExp($env, $node) {
print( (string) $node . " : ");
try {
$t = analyse($node, $env);
print( (string) $t);
} catch (ParseError $e) {
print($e);
} catch (TypeError $e) {
print($e);
}
}
$var1 = new TypeVariable();
$var2 = new TypeVariable();
$pair_type = new TypeOperator("*", array($var1, $var2));
$var3 = new TypeVariable();
$my_env = array(
"pair" => new Func($var1, new Func($var2, $pair_type)),
"true" => $Bool,
"cond" => new Func($Bool, new Func($var3, new Func($var3, $var3))),
"zero" => new Func($Integer, $Bool),
"pred" => new Func($Integer, $Integer),
"times"=> new Func($Integer, new Func($Integer, $Integer)),
"concat" => new Func($String, new Func($String, $String))
);
$pair = new Apply(new Apply(new Ident("pair"), new Apply(new Ident("f"), new Ident("4"))), new Apply(new Ident("f"), new Ident("true")));
// This expression should pass
tryExp($my_env,
new Letrec("factorial", # letrec factorial =
new Lambda("n", # fn n =>
new Apply(
new Apply( # cond (zero n) 1
new Apply(new Ident("cond"), # cond (zero n)
new Apply(new Ident("zero"), new Ident("n"))),
new Ident("1")),
new Apply( # times n
new Apply(new Ident("times"), new Ident("n")),
new Apply(new Ident("factorial"),
new Apply(new Ident("pred"), new Ident("n")))
)
)
), # in
new Apply(new Ident("factorial"), new Ident("5"))
)
);
echo "\n\n";
# This expression should fail
tryExp($my_env,
# fn x => (pair(x(3) (x(true)))
new Lambda("x",
new Apply(
new Apply(new Ident("pair"),
new Apply(new Ident("x"), new Ident("3"))),
new Apply(new Ident("x"), new Ident("true"))))
);
/* OUTPUT:
(letrec factorial = (fn n => (((cond (zero n)) 1) ((times n) (factorial (pred n))))) in (factorial 5)) :
(fn x => ((pair (x 3)) (x true))) : Type mismatch: bool != int
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment