Skip to content

Instantly share code, notes, and snippets.

@key-moon

key-moon/x.cs Secret

Created July 12, 2020 08:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save key-moon/87baf8f0eb4eb81f20916833908598c0 to your computer and use it in GitHub Desktop.
Save key-moon/87baf8f0eb4eb81f20916833908598c0 to your computer and use it in GitHub Desktop.
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Numerics;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using static System.Math;
class Compiler
{
class Arr
{
public dynamic[] data;
public Arr() => data = new dynamic[0];
public Arr(Arr arr)
{
data = new dynamic[] { arr };
}
public Arr(string s)
{
data = s.Select(x => (dynamic)(int)x).ToArray();
}
public Arr(params dynamic[] args)
{
data = args.ToArray();
}
public dynamic this[int index]
{
get => data[index];
set => data[index] = value;
}
public static implicit operator Arr(string s) => new Arr(s);
public static implicit operator string(Arr s) => string.Join("", s.data.Select(x => x is int ? (char)x : x.ToString()));
public static bool operator ==(Arr a, Arr b) => a.data.SequenceEqual(b.data);
public static bool operator !=(Arr a, Arr b) => !(a == b);
public static bool operator ==(int a, Arr b) => b.data.Length == 1 && a == b[0];
public static bool operator !=(int a, Arr b) => !(a == b);
public static bool operator ==(Arr b, int a) => b.data.Length == 1 && a == b[0];
public static bool operator !=(Arr b, int a) => !(a == b);
public static Arr operator +(Arr a, Arr b) => new Arr() { data = a.data.Concat(b.data).ToArray() };
public override string ToString() => this;
}
StreamReader reader;
public Compiler(string path)
{
reader = new StreamReader(path);
}
int read() => reader.EndOfStream ? '~' : reader.Read();
StringBuilder builder = new StringBuilder();
//void debug(params dynamic[] a) => Debug.WriteLine(string.Join(": ", a));
void debug(params dynamic[] a) => builder.AppendLine(string.Join(": ", a));
void write(Arr a) => Console.WriteLine((string)a);
int len(Arr a) => a.data.Length;
void error(Arr s)
{
write(builder.ToString());
throw new Exception(s);
}
int indexof(Arr v, int x)
{
dynamic i = 0;
while (i < len(v))
{
if (v[i] == x) return i;
else i = i + 1;
}
return -1;
}
int indexof(Arr v, Arr x)
{
dynamic i = 0;
while (i < len(v))
{
if (v[i] == x) return i;
else i = i + 1;
}
return -1;
}
bool is_head(Arr v, Arr w)
{
dynamic i = 0;
bool res = true;
while (i < len(v))
{
if (i >= len(w)) res = false;
else if (v[i] != w[i]) res = false;
i = i + 1;
}
return res;
}
dynamic assoc(Arr v, int x)
{
dynamic i = 0;
while (i < len(v))
{
if (v[i][0] == x) return v[i][1];
i = i + 1;
}
debug(new Arr("assoc not found"), v, x);
throw new Exception();
}
dynamic assoc(Arr v, Arr x)
{
dynamic i = 0;
while (i < len(v))
{
if (v[i][0] == x) return v[i][1];
i = i + 1;
}
debug(new Arr("assoc not found"), v, x);
throw new Exception();
}
int str2int(Arr s)
{
dynamic res = 0;
dynamic sign = 1;
dynamic i = 0;
if (s[0] == new Arr("-"))
{
sign = -1;
i = 1;
}
while (i < len(s))
{
res = res * 10 + s[i] - 48;
i = i + 1;
}
return res * sign;
}
Arr int2str(int n)
{
return new Arr(n.ToString());
}
bool is_ident_char(int c)
{
return (48 <= c && c <= 57) || (65 <= c && c <= 90) || (97 <= c && c <= 122) || c == 95;
}
bool is_number(int c)
{
return (48 <= c && c <= 57);
}
bool is_digit(Arr s)
{
int i = 0;
while (i < len(s))
{
if (!is_number(s[i])) return false;
i = i + 1;
}
return true;
}
Arr tokenize()
{
var syms = new Arr(new Arr("+"), new Arr("-"), new Arr("*"), new Arr("/"), new Arr("%"), new Arr("("), new Arr(","), new Arr(")"), new Arr("{"), new Arr("}"), new Arr(";"), new Arr("["), new Arr("]"), new Arr("=="), new Arr("!="), new Arr("="), new Arr("<="), new Arr(">="), new Arr(">"), new Arr("<"), new Arr("||"), new Arr("!"), new Arr("&&"), new Arr("/*"));
Arr res = new Arr();
bool cont = true;
int mc = -1;
int c;
Arr v;
bool cont2;
while (cont)
{
if (mc < 0) c = read();
else
{
c = mc; mc = -1;
}
debug(c, is_ident_char(c), is_number(c));
if (c == new Arr("~")) cont = false;
else if (c == 9 || c == 10 || c == 32 || c == 13 || c == 11 || c == 12)
{ /* \t\s\n\r\x0b\x0c */
/* pass */
}
else if (is_number(c))
{
v = new Arr(c);
cont2 = true;
while (cont2)
{
c = read();
if (!is_number(c))
{
mc = c;
cont2 = false;
}
else v = v + new Arr(c);
}
res = res + new Arr(v);
}
else if (is_ident_char(c))
{
v = new Arr(c);
cont2 = true;
while (cont2)
{
c = read();
if (!is_ident_char(c))
{
mc = c;
cont2 = false;
}
else v = v + new Arr(c);
}
res = res + new Arr(v);
}
else if (c == 34)
{ /* " */
v = new Arr(c);
cont2 = true;
while (cont2)
{
c = read();
if (c == 34) cont2 = false;
v = v + new Arr(c);
}
res = res + new Arr(v);
}
else
{
int i;
v = new Arr();
bool ok = true;
while (ok)
{
ok = false;
var tv = v + new Arr(c);
i = 0;
while (i < len(syms))
{
if (is_head(tv, syms[i])) ok = true;
i = i + 1;
}
if (ok)
{
v = v + new Arr(c);
c = read();
}
}
ok = false;
i = 0;
while (i < len(syms))
{
ok = ok || (v == syms[i]);
i = i + 1;
}
if (!ok) error(new Arr("tokenize failed ") + v);
if (v == new Arr("/*"))
{
var bc = c;
cont2 = true;
while (cont2)
{
c = read();
if (new Arr(bc, c) == new Arr("*/")) cont2 = false;
bc = c;
mc = -1;
}
}
else
{
res = res + new Arr(v);
mc = c;
}
}
}
return res;
}
void assert(bool b, Arr s)
{
if (!b) error(s);
}
Arr s;
int p;
void expect(int d, dynamic t)
{
assert(s[p + d] == t, new Arr("assert_token: expect ") + t + new Arr(" got: ") + s[p + d]);
p = p + d + 1;
}
int get_priority(Arr op)
{
var prs = new Arr(
new Arr(new Arr("||"), new Arr("&&"), new Arr("!")),
new Arr(new Arr("=="), new Arr("!="), new Arr(">"), new Arr("<"), new Arr("<="), new Arr(">=")),
new Arr(new Arr("+"), new Arr("-")),
new Arr(new Arr("*"), new Arr("/"), new Arr("%"))
);
int i = 0;
while (i < len(prs))
{
if (indexof(prs[i], op) != -1) return i;
i = i + 1;
}
error(new Arr("unknown priority operator: ") + op);
throw new Exception();
}
Arr parse_expr(int pri)
{
Arr v;
dynamic pri2, op, n, vs, arr, i;
n = s[p];
if (n == new Arr("("))
{
p = p + 1;
v = parse_expr(-1);
expect(0, new Arr(")"));
}
else if (n == new Arr("["))
{
vs = get_arguments(false, new Arr("["), new Arr("]"));
v = new Arr(new Arr("[]"), vs);
}
else if (n == new Arr("-"))
{
p = p + 1;
v = new Arr(new Arr("Minus"), new Arr(parse_expr(get_priority(new Arr("-")))));
}
else if (n == new Arr("!"))
{
p = p + 1;
v = new Arr(new Arr("Not"), new Arr(parse_expr(get_priority(new Arr("!")))));
}
else
{
if (is_digit(n))
{
v = new Arr(new Arr("Int"), n);
}
else if (n[0] == 34)
{ /* " */
arr = new Arr();
i = 1;
while (i + 1 < len(n))
{
arr = arr + new Arr(new Arr(new Arr("Int"), int2str(n[i])));
i = i + 1;
}
v = new Arr(new Arr("[]"), arr);
}
else
{
v = new Arr(new Arr("Var"), n);
}
p = p + 1;
}
debug(s, len(s), p);
while (true)
{
op = s[p];
if (op == new Arr(")") || op == new Arr(";") || op == new Arr(",") || op == new Arr("]")) return v;
else if (op == new Arr("("))
{
assert(v[0] == new Arr("Var"), new Arr("function call for ") + v[0] + new Arr(" is not defined"));
vs = get_arguments(false, new Arr("("), new Arr(")"));
v = new Arr(new Arr("Call"), new Arr(v[1], vs));
}
else if (op == new Arr("["))
{
p = p + 1;
v = new Arr(new Arr("Get"), new Arr(v, parse_expr(-1)));
expect(0, new Arr("]"));
}
else
{
pri2 = get_priority(op);
if (pri >= pri2) return v;
else
{
p = p + 1;
v = new Arr(op, new Arr(v, parse_expr(pri2)));
}
}
}
}
Arr get_arguments(bool is_decl, dynamic fr, dynamic to)
{
bool cont;
Arr res;
Arr v;
expect(0, fr);
if (s[p] == to)
{
p = p + 1;
return new Arr();
}
else
{
if (is_decl)
{
v = s[p];
p = p + 1;
}
else v = parse_expr(-1);
res = new Arr(v);
cont = true;
while (cont)
{
if (s[p] == to)
{
p = p + 1;
cont = false;
}
else
{
expect(0, new Arr(","));
if (is_decl)
{
v = s[p];
p = p + 1;
}
else v = parse_expr(-1);
res = res + new Arr(v);
}
}
return res;
}
}
Arr parse_statement()
{
dynamic n = s[p];
Arr e;
Arr stf;
Arr stt;
Arr st;
if (n == new Arr("if"))
{
expect(1, new Arr("("));
e = parse_expr(-1);
expect(0, new Arr(")"));
stt = parse_statements_or_statement();
if (s[p] == new Arr("else"))
{
p = p + 1;
stf = parse_statements_or_statement();
}
else stf = new Arr();
return new Arr(new Arr("If"), e, stt, stf);
}
else if (n == new Arr("while"))
{
expect(1, new Arr("("));
e = parse_expr(-1);
expect(0, new Arr(")"));
st = parse_statements_or_statement();
return new Arr(new Arr("While"), e, st);
}
else if (n == new Arr("return"))
{
p = p + 1;
e = parse_expr(-1);
expect(0, new Arr(";"));
return new Arr(new Arr("Return"), e);
}
else if (s[p + 1] == new Arr("="))
{
p = p + 2;
e = parse_expr(-1);
expect(0, new Arr(";"));
return new Arr(new Arr("Assign"), n, e);
}
else
{
e = parse_expr(-1);
expect(0, new Arr(";"));
return new Arr(new Arr("Expr"), e);
}
}
Arr parse_statements()
{
expect(0, new Arr("{"));
Arr res = new Arr();
while (true)
{
if (s[p] == new Arr("}"))
{
p = p + 1;
return res;
}
else
{
res = res + new Arr(parse_statement());
}
}
}
Arr parse_statements_or_statement()
{
if (s[p] == new Arr("{")) return parse_statements();
else return new Arr(parse_statement());
}
Arr parse(Arr gs)
{
s = gs;
p = 0;
Arr res = new Arr();
while (p < len(s))
{
dynamic n = s[p];
if (s[p + 1] == new Arr("("))
{
p = p + 1;
Arr args = get_arguments(true, new Arr("("), new Arr(")"));
res = res + new Arr(new Arr(new Arr("Funcdecl"), n, args, parse_statements()));
}
else
{
expect(1, new Arr(";"));
res = res + new Arr(new Arr(new Arr("Vardecl"), n));
}
}
return res;
}
int ip;
Arr executable;
Arr resolver;
Arr resolver_functions;
Arr func_arg_nums;
Arr funcip;
void inst(Arr ins)
{
executable = executable + new Arr(ins);
ip = ip + 1;
}
int dummyinst()
{
int res = ip;
inst(new Arr("dummy"));
return res;
}
void epilogue()
{
inst(new Arr("mov sp bp"));
inst(new Arr("pop bp"));
inst(new Arr("ret"));
}
Arr env;
int dbp;
Arr genvar()
{
Arr res = new Arr("bp[") + int2str(dbp) + new Arr("]");
dbp = dbp + 1;
return res;
}
Arr compile_expr(Arr e)
{
Arr op = e[0];
Arr retv;
Arr fn;
int efn;
Arr vs;
int i;
Arr v;
int mp;
if (op == new Arr("Call"))
{
fn = e[1][0];
vs = e[1][1];
if (fn == new Arr("debug")) return new Arr("debugdummy");
retv = genvar();
efn = assoc(func_arg_nums, fn);
assert(efn == len(vs), new Arr("compile_expr: expect ") + int2str(efn) + new Arr(" arguments, got ") + int2str(len(vs)));
i = 0;
while (i < len(vs))
{
v = compile_expr(vs[i]);
inst(new Arr("push ") + v);
i = i + 1;
}
inst(new Arr("push 0"));
if (fn == new Arr("read") || fn == new Arr("write") || fn == new Arr("len")) inst(fn);
else
{
mp = dummyinst();
resolver_functions = resolver_functions + new Arr(new Arr(mp, fn, new Arr("call ")));
}
inst(new Arr("pop ") + retv);
inst(new Arr("sub sp sp ") + int2str(len(vs)));
return retv;
}
else if (op == new Arr("Int"))
{
return e[1];
}
else if (op == new Arr("Var"))
{
return assoc(env, e[1]);
}
else
{
vs = new Arr();
i = 0;
while (i < len(e[1]))
{
vs = vs + new Arr(compile_expr(e[1][i]));
i = i + 1;
}
v = genvar();
if (op == new Arr("[]"))
{
if (len(vs) == 0) s = new Arr("[]");
else
{
s = new Arr("[") + vs[0];
i = 1;
while (i < len(vs))
{
s = s + new Arr(",") + vs[i];
i = i + 1;
}
s = s + new Arr("]");
}
inst(new Arr("makelist ") + v + new Arr(" ") + s);
}
else if (op == new Arr("Minus"))
{
inst(new Arr("sub ") + v + new Arr(" 0 ") + vs[0]);
}
else if (op == new Arr("Not"))
{
inst(new Arr("eq ") + v + new Arr(" ") + vs[0] + new Arr(" 0"));
}
else if (op == new Arr("+"))
{
inst(new Arr("add ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
}
else if (op == new Arr("-"))
{
inst(new Arr("sub ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
}
else if (op == new Arr("*"))
{
inst(new Arr("mul ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
}
else if (op == new Arr("/"))
{
inst(new Arr("div ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
}
else if (op == new Arr("%"))
{
inst(new Arr("div ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
inst(new Arr("mul ") + v + new Arr(" ") + v + new Arr(" ") + vs[1]);
inst(new Arr("sub ") + v + new Arr(" ") + vs[0] + new Arr(" ") + v);
}
else if (op == new Arr("<"))
{
inst(new Arr("lt ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
}
else if (op == new Arr(">"))
{
inst(new Arr("lt ") + v + new Arr(" ") + vs[1] + new Arr(" ") + vs[0]);
}
else if (op == new Arr("<="))
{
inst(new Arr("lt ") + v + new Arr(" ") + vs[1] + new Arr(" ") + vs[0]);
inst(new Arr("eq ") + v + new Arr(" 0 ") + v);
}
else if (op == new Arr(">="))
{
inst(new Arr("lt ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
inst(new Arr("eq ") + v + new Arr(" 0 ") + v);
}
else if (op == new Arr("=="))
{
inst(new Arr("eq ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
}
else if (op == new Arr("!="))
{
inst(new Arr("eq ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
inst(new Arr("eq ") + v + new Arr(" ") + v + new Arr(" 0"));
}
else if (op == new Arr("&&"))
{
inst(new Arr("mul ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
inst(new Arr("eq ") + v + new Arr(" ") + v + new Arr(" 0"));
inst(new Arr("eq ") + v + new Arr(" ") + v + new Arr(" 0"));
}
else if (op == new Arr("||"))
{
Arr w = genvar();
inst(new Arr("eq ") + v + new Arr(" ") + vs[0] + new Arr(" 0"));
inst(new Arr("eq ") + w + new Arr(" ") + vs[1] + new Arr(" 0"));
inst(new Arr("mul ") + v + new Arr(" ") + v + new Arr(" ") + w);
inst(new Arr("eq ") + v + new Arr(" ") + v + new Arr(" 0"));
}
else if (op == new Arr("Get"))
{
inst(new Arr("get ") + v + new Arr(" ") + vs[0] + new Arr(" ") + vs[1]);
}
else error(new Arr("unknown operator ") + op);
return v;
}
}
void compile_statements(Arr stmts)
{
int i = 0;
int mp;
Arr e;
int mp2;
while (i < len(stmts))
{
Arr st = stmts[i];
if (st[0] == new Arr("While"))
{
mp = ip;
e = compile_expr(st[1]);
mp2 = dummyinst();
compile_statements(st[2]);
inst(new Arr("jz 0 ") + int2str(mp));
resolver = resolver + new Arr(new Arr(mp2, new Arr("jz ") + e + new Arr(" ") + int2str(ip)));
}
else if (st[0] == new Arr("If"))
{
e = compile_expr(st[1]);
mp = dummyinst();
compile_statements(st[2]);
mp2 = dummyinst();
resolver = resolver + new Arr(new Arr(mp, new Arr("jz ") + e + new Arr(" ") + int2str(ip)));
compile_statements(st[3]);
resolver = resolver + new Arr(new Arr(mp2, new Arr("jz 0 ") + int2str(ip)));
}
else if (st[0] == new Arr("Return"))
{
e = compile_expr(st[1]);
inst(new Arr("mov bp[-2] ") + e);
epilogue();
}
else if (st[0] == new Arr("Assign"))
{
e = compile_expr(st[2]);
inst(new Arr("mov ") + assoc(env, st[1]) + new Arr(" ") + e);
}
else if (st[0] == new Arr("Expr"))
{
compile_expr(st[1]);
}
else error(new Arr("unknown statement type") + st[0]);
i = i + 1;
}
}
Arr globals;
Arr collect_locals(Arr stmts, Arr args, Arr locals)
{
int i = 0;
while (i < len(stmts))
{
Arr st = stmts[i];
if (st[0] == new Arr("While"))
{
locals = collect_locals(st[2], args, locals);
}
else if (st[0] == new Arr("If"))
{
locals = collect_locals(st[2], args, locals);
locals = collect_locals(st[3], args, locals);
}
else if (st[0] == new Arr("Assign"))
{
dynamic n = st[1];
if (indexof(args + globals + locals, n) == -1)
{
locals = locals + new Arr(n);
}
}
i = i + 1;
}
return locals;
}
void compile_function(Arr fn, Arr args, Arr body)
{
Arr locals = collect_locals(body, args, new Arr());
funcip = funcip + new Arr(new Arr(fn, ip));
inst(new Arr("push bp"));
inst(new Arr("mov bp sp"));
int mplo = dummyinst(); /* inst( new Arr("add sp sp ") + int2str(len(locals))); */
/* -len(args)-2 -3 | -2 | -1 | 0 | 1 len(locals) | */
/* args[-1] ... args[0] | retv | oldip | oldbp | locals[0] ... locals[-1] | dbp[0] ... */
env = new Arr();
i = 0;
while (i < len(args))
{
env = env + new Arr(new Arr(args[len(args) - i - 1], new Arr("bp[") + int2str(-i - 3) + new Arr("]")));
i = i + 1;
}
i = 0;
while (i < len(globals))
{
if (indexof(args, globals[i]) == -1) env = env + new Arr(new Arr(globals[i], new Arr("#") + int2str(i)));
i = i + 1;
}
i = 0;
while (i < len(locals))
{
env = env + new Arr(new Arr(locals[i], new Arr("bp[") + int2str(i + 1) + new Arr("]")));
i = i + 1;
}
debug(new Arr("env"), fn, env);
dbp = len(locals) + 1;
compile_statements(body);
resolver = resolver + new Arr(new Arr(mplo, new Arr("add sp sp ") + int2str(dbp - 1)));
inst(new Arr("mov bp[-2] 0"));
epilogue();
}
public void Test()
{
}
public string GenTokenized()
{
s = tokenize();
return string.Join(",", s.data.Select(x => $"\"{x}\""));
Console.WriteLine();
//Console.WriteLine(string.Join(",", s.data.Select(x => $"[{string.Join(",", ((string)x).Select(y => (int)y))}]")));
}
public string main()
{
s = tokenize();
Arr prog = parse(s);
int i = 0;
func_arg_nums = new Arr(new Arr(new Arr("read"), 0), new Arr(new Arr("write"), 1), new Arr(new Arr("len"), 1));
while (i < len(prog))
{
if (prog[i][0] == new Arr("Funcdecl"))
{
func_arg_nums = func_arg_nums + new Arr(new Arr(prog[i][1], len(prog[i][2])));
}
i = i + 1;
}
debug(new Arr(new Arr("func_arg_nums")), func_arg_nums);
ip = 0;
i = 0;
globals = new Arr();
executable = new Arr();
resolver = new Arr();
funcip = new Arr();
int ipz = dummyinst();
resolver_functions = new Arr(new Arr(dummyinst(), new Arr("main"), new Arr("call ")));
inst(new Arr("hlt"));
while (i < len(prog))
{
Arr d = prog[i];
if (d[0] == new Arr("Funcdecl"))
{
compile_function(d[1], d[2], d[3]);
}
else if (d[0] == new Arr("Vardecl"))
{
globals = globals + new Arr(d[1]);
}
else error(new Arr("unknown global type") + prog[i][0]);
i = i + 1;
}
debug(executable);
resolver = resolver + new Arr(new Arr(ipz, new Arr("add sp sp ") + int2str(len(globals))));
i = 0;
while (i < len(resolver_functions))
{
Arr d = resolver_functions[i];
resolver = resolver + new Arr(new Arr(d[0], d[2] + int2str(assoc(funcip, d[1]))));
i = i + 1;
}
Arr output = new Arr();
i = 0;
while (i < len(executable))
{
s = executable[i];
if (s == new Arr("dummy"))
{
s = assoc(resolver, i);
}
output = output + s + new Arr(10);
i = i + 1;
}
return output;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment