Skip to content

Instantly share code, notes, and snippets.

@JLChnToZ
Last active October 26, 2021 09:56
Show Gist options
  • Save JLChnToZ/42bade0f3d4b9e671bcedaafe1000c86 to your computer and use it in GitHub Desktop.
Save JLChnToZ/42bade0f3d4b9e671bcedaafe1000c86 to your computer and use it in GitHub Desktop.
Whitespace Language Compiler for .NET
using System;
using System.IO;
using System.Collections.Generic;
using System.Reflection;
using System.Reflection.Emit;
using Whitespace.Utils;
namespace Whitespace {
public delegate void CompiledMethod(TextReader input, TextWriter output);
public sealed class Compiler {
#region Prefetched Reflection Info
private static readonly Type stackType = typeof(Stack<int>);
private static readonly Type heapType = typeof(Heap<int>);
private static readonly Type helperType = typeof(Helper);
private static readonly Type textReaderType = typeof(TextReader);
private static readonly Type textWriterType = typeof(TextWriter);
private static readonly Type[] intParams = new[] { typeof(int) };
private static readonly Type[] charParams = new[] { typeof(char) };
private static readonly Type[] textReaderParams = new[] { typeof(TextReader) };
private static readonly MethodInfo stackPopFn = stackType.GetMethod("Pop", Type.EmptyTypes);
private static readonly MethodInfo stackPushFn = stackType.GetMethod("Push", intParams);
private static readonly PropertyInfo heapIndexer = heapType.GetProperty("Item", intParams);
private static readonly MethodInfo heapStoreFn = heapType.GetMethod("Store", intParams);
private static readonly MethodInfo heapStoreSpecificFn = heapIndexer.GetSetMethod();
private static readonly MethodInfo heapRestoreFn = heapIndexer.GetGetMethod();
private static readonly MethodInfo readCharFn = textReaderType.GetMethod("Read", Type.EmptyTypes);
private static readonly MethodInfo readIntFn = helperType.GetMethod("ReadInteger", textReaderParams);
private static readonly MethodInfo writeCharFn = textWriterType.GetMethod("Write", charParams);
private static readonly MethodInfo writeIntFn = textWriterType.GetMethod("Write", intParams);
#endregion
#region Local Variables
private readonly Parser parser;
private readonly ILGenerator il;
private readonly Dictionary<int, Label> labels = new Dictionary<int, Label>();
private readonly List<Label> returnLabels = new List<Label>();
private LocalBuilder swap1, swap2, cbStack, heap;
private Label returnSelLabel, defaultReturnLabel;
#endregion
#region Constructors
private Compiler() { }
public Compiler(Parser parser, ILGenerator il) {
this.il = il;
this.parser = parser;
}
#endregion
#region Compiler
private void Compile() {
il.Emit(OpCodes.Nop);
swap1 = il.DeclareLocal(typeof(int));
if(parser.hasSwap)
swap2 = il.DeclareLocal(typeof(int));
if(parser.hasCbStack) {
cbStack = il.DeclareLocal(stackType);
returnSelLabel = il.DefineLabel();
defaultReturnLabel = il.DefineLabel();
il.Emit(OpCodes.Newobj, stackType.GetConstructor(Type.EmptyTypes));
il.Emit(OpCodes.Stloc, cbStack);
}
heap = il.DeclareLocal(heapType);
il.Emit(OpCodes.Newobj, heapType.GetConstructor(Type.EmptyTypes));
il.Emit(OpCodes.Stloc, heap);
il.Emit(OpCodes.Nop);
foreach(OpParam op in parser.Instructions)
Emit(op.op, op.parameter);
if(parser.hasCbStack) {
il.Emit(OpCodes.Br, defaultReturnLabel);
il.MarkLabel(returnSelLabel);
il.Emit(OpCodes.Ldloc, cbStack);
il.Emit(OpCodes.Call, stackPopFn);
returnLabels.Add(defaultReturnLabel);
il.Emit(OpCodes.Switch, returnLabels.ToArray());
il.MarkLabel(defaultReturnLabel);
il.Emit(OpCodes.Nop);
}
il.Emit(OpCodes.Ret);
}
private void Emit(InstOpCode op, int parameter) {
switch(op) {
case InstOpCode.Pus:
il.Emit(OpCodes.Ldc_I4, parameter);
break;
case InstOpCode.Dup:
il.Emit(OpCodes.Dup);
break;
case InstOpCode.Swp:
il.Emit(OpCodes.Stloc, swap2);
EmitSwap(swap2);
break;
case InstOpCode.Pop:
il.Emit(OpCodes.Pop);
break;
case InstOpCode.Add:
il.Emit(OpCodes.Add);
break;
case InstOpCode.Sub:
il.Emit(OpCodes.Sub);
break;
case InstOpCode.Mul:
il.Emit(OpCodes.Mul);
break;
case InstOpCode.Div:
il.Emit(OpCodes.Div);
break;
case InstOpCode.Mod:
il.Emit(OpCodes.Rem);
break;
case InstOpCode.Sto:
EmitHeapStoreFn();
break;
case InstOpCode.Ret:
EmitSwap(heap);
il.Emit(OpCodes.Call, heapRestoreFn);
break;
case InstOpCode.Mrk:
il.MarkLabel(GetLabel(parameter));
break;
case InstOpCode.Cas:
int returnId;
Label returnLabel = GetReturnLabel(out returnId);
il.Emit(OpCodes.Ldloc, cbStack);
il.Emit(OpCodes.Ldc_I4, returnId);
il.Emit(OpCodes.Call, stackPushFn);
il.Emit(OpCodes.Br, GetLabel(parameter));
il.MarkLabel(returnLabel);
break;
case InstOpCode.Jmp:
il.Emit(OpCodes.Br, GetLabel(parameter));
break;
case InstOpCode.Jmz:
il.Emit(OpCodes.Brfalse, GetLabel(parameter));
break;
case InstOpCode.Jmn:
il.Emit(OpCodes.Ldc_I4_0);
il.Emit(OpCodes.Clt);
il.Emit(OpCodes.Brtrue, GetLabel(parameter));
break;
case InstOpCode.Ens:
il.Emit(OpCodes.Br, returnSelLabel);
break;
case InstOpCode.End:
il.Emit(OpCodes.Ret);
break;
case InstOpCode.Wrc:
EmitSwap(OpCodes.Ldarg_1);
il.Emit(OpCodes.Callvirt, writeCharFn);
break;
case InstOpCode.Wrn:
EmitSwap(OpCodes.Ldarg_1);
il.Emit(OpCodes.Callvirt, writeIntFn);
break;
case InstOpCode.Rec:
EmitSwap(heap);
il.Emit(OpCodes.Ldarg_0);
il.Emit(OpCodes.Callvirt, readCharFn);
il.Emit(OpCodes.Call, heapStoreSpecificFn);
break;
case InstOpCode.Ren:
EmitSwap(heap);
il.Emit(OpCodes.Ldarg_0);
il.Emit(OpCodes.Call, readIntFn);
il.Emit(OpCodes.Call, heapStoreSpecificFn);
break;
}
}
private void EmitHeapStoreFn() {
EmitSwap(heap);
il.Emit(OpCodes.Call, heapStoreFn);
}
private void EmitSwap(OpCode ldOpcode) {
il.Emit(OpCodes.Stloc, swap1);
il.Emit(ldOpcode);
il.Emit(OpCodes.Ldloc, swap1);
}
private void EmitSwap(LocalBuilder loc) {
il.Emit(OpCodes.Stloc, swap1);
il.Emit(OpCodes.Ldloc, loc);
il.Emit(OpCodes.Ldloc, swap1);
}
#endregion
#region Helpers
private Label GetLabel(int id) {
Label label;
if(!labels.TryGetValue(id, out label)) {
label = il.DefineLabel();
labels.Add(id, label);
}
return label;
}
private Label GetReturnLabel(out int returnId) {
Label label = il.DefineLabel();
returnId = returnLabels.Count;
returnLabels.Add(label);
return label;
}
#endregion
#region Public API
// Generated method's parameters: Compiled(TextReader reader, TextWriter writer);
public static void Compile(string source, ILGenerator il) {
if(source == null)
throw new ArgumentNullException("source");
if(il == null)
throw new ArgumentNullException("il");
Parser parser = new Parser(source);
Compiler compiler = new Compiler(parser, il);
parser.Parse();
compiler.Compile();
}
public static CompiledMethod Compile(string source) {
if(source == null)
throw new ArgumentNullException("source");
DynamicMethod method = new DynamicMethod("_Compiled_", null, new[] {
textReaderType, textWriterType
});
method.DefineParameter(0, ParameterAttributes.In, "input");
method.DefineParameter(1, ParameterAttributes.In, "output");
Parser parser = new Parser(source);
Compiler compiler = new Compiler(parser, method.GetILGenerator());
parser.Parse();
compiler.Compile();
return method.CreateDelegate(typeof(CompiledMethod)) as CompiledMethod;
}
#endregion
}
}
using System.Collections.Generic;
namespace Whitespace {
public enum InstOpCode: byte {
Unknown,
Pus, Dup, Swp, Pop,
Add, Sub, Mul, Div, Mod,
Sto, Ret,
Mrk, Cas, Jmp, Jmz, Jmn, Ens, End, Wrc, Wrn, Rec, Ren
}
public struct OpParam {
public InstOpCode op;
public int parameter;
}
public class Parser {
#region Constants
private const char SChar = ' '; // S = Space
private const char TChar = '\t'; // T = Tab
private const char LChar = '\n'; // L = Line Feed
// Raw packed op codes sequence
private enum RawIMPOpCodes {
// Stack Manipulation [S*]
Pus = (SChar << 8) | SChar, // Push the number onto the stack [SS]
Dup = (SChar << 16) | (LChar << 8) | SChar, // Duplicate the top item on the stack [SLS]
Swp = (SChar << 16) | (LChar << 8) | TChar, // Swap the top two items on the stack [SLT]
Pop = (SChar << 16) | (LChar << 8) | LChar, // Discard the top item on the stack [SLL]
// Arithmetic [TS*]
Add = (TChar << 24) | (SChar << 16) | (SChar << 8) | SChar, // Addition [TSSS]
Sub = (TChar << 24) | (SChar << 16) | (SChar << 8) | TChar, // Subtraction [TSST]
Mul = (TChar << 24) | (SChar << 16) | (SChar << 8) | LChar, // Multiplication [TSSL]
Div = (TChar << 24) | (SChar << 16) | (TChar << 8) | SChar, // Integer Division [TSTS]
Mod = (TChar << 24) | (SChar << 16) | (TChar << 8) | TChar, // Modulo [TSTT]
// Heap Access [TT*]
Sto = (TChar << 16) | (TChar << 8) | SChar, // Store [TTS]
Ret = (TChar << 16) | (TChar << 8) | TChar, // Retrieve [TTT]
// Flow Control [L*]
Mrk = (LChar << 16) | (SChar << 8) | SChar, // Mark location in the program [LSS]
Cas = (LChar << 16) | (SChar << 8) | TChar, // Call a subroutine [LST]
Jmp = (LChar << 16) | (SChar << 8) | LChar, // Jump unconditionally to a label [LSL]
Jmz = (LChar << 16) | (TChar << 8) | SChar, // Jump to a label if the top of the stack is zero [LTS]
Jmn = (LChar << 16) | (TChar << 8) | TChar, // Jump to a label if the top of the stack is negative [LTT]
Ens = (LChar << 16) | (TChar << 8) | LChar, // End a subroutine and transfer control back to the caller [LTL]
End = (LChar << 16) | (LChar << 8) | LChar, // End the program [LLL]
// I/O [TL*]
Wrc = (TChar << 24) | (LChar << 16) | (SChar << 8) | SChar, // Output the character at the top of the stack [TLSS]
Wrn = (TChar << 24) | (LChar << 16) | (SChar << 8) | TChar, // Output the number at the top of the stack [TLST]
Rec = (TChar << 24) | (LChar << 16) | (TChar << 8) | SChar, // Read a character and place it in the location given by the top of the stack [TLTS]
Ren = (TChar << 24) | (LChar << 16) | (TChar << 8) | TChar, // Read a number and place it in the location given by the top of the stack [TLTT]
}
#endregion
private readonly string source;
private readonly List<OpParam> ops = new List<OpParam>();
// Specific situation flags for compiler use
internal bool hasCbStack, hasSwap;
public string Source {
get { return source; }
}
public IList<OpParam> Instructions {
get { return ops.AsReadOnly(); }
}
private Parser() { }
public Parser(string source) {
this.source = source;
}
public void Parse() {
bool requireParam = false, requireSign = false;
int op = 0, parameter = 0;
bool negativeParam = false;
foreach(char c in source) {
switch(c) {
case SChar:
if(requireSign) {
negativeParam = false;
requireSign = false;
continue;
} else if(requireParam) {
parameter = parameter << 1;
continue;
}
break;
case TChar:
if(requireSign) {
negativeParam = true;
requireSign = false;
continue;
} else if(requireParam) {
parameter = (parameter << 1) | 1;
continue;
}
break;
case LChar:
if(requireParam) {
if(negativeParam) {
parameter = (int)(parameter | 0x80000000);
negativeParam = false;
}
if(AddOp(ConvertIMPOpCode(op), parameter))
return;
requireParam = false;
requireSign = false;
op = 0;
parameter = 0;
continue;
}
break;
default: continue;
}
op = (op << 8) | c;
if(CheckAndAddOp(ref op, out requireParam, out requireSign))
return;
}
}
private bool CheckAndAddOp(ref int rawOpCode, out bool requireParam, out bool signed) {
InstOpCode op = ConvertIMPOpCode(rawOpCode);
switch(op) {
case InstOpCode.Unknown:
requireParam = false;
signed = false;
return false;
case InstOpCode.Pus:
requireParam = true;
signed = true;
return false;
case InstOpCode.Mrk:
case InstOpCode.Cas:
case InstOpCode.Jmp:
case InstOpCode.Jmz:
case InstOpCode.Jmn:
requireParam = true;
signed = false;
return false;
default:
requireParam = false;
signed = false;
rawOpCode = 0;
return AddOp(op, 0);
}
}
private bool AddOp(InstOpCode opCode, int parameter) {
ops.Add(new OpParam {
op = opCode,
parameter = parameter
});
switch(opCode) {
case InstOpCode.End:
return true;
case InstOpCode.Cas:
case InstOpCode.Ens:
hasCbStack = true;
break;
case InstOpCode.Swp:
hasSwap = true;
break;
}
return false;
}
private static InstOpCode ConvertIMPOpCode(int opCode) {
switch((RawIMPOpCodes)opCode) {
case RawIMPOpCodes.Pus: return InstOpCode.Pus;
case RawIMPOpCodes.Dup: return InstOpCode.Dup;
case RawIMPOpCodes.Swp: return InstOpCode.Swp;
case RawIMPOpCodes.Pop: return InstOpCode.Pop;
case RawIMPOpCodes.Add: return InstOpCode.Add;
case RawIMPOpCodes.Sub: return InstOpCode.Sub;
case RawIMPOpCodes.Mul: return InstOpCode.Mul;
case RawIMPOpCodes.Div: return InstOpCode.Div;
case RawIMPOpCodes.Mod: return InstOpCode.Mod;
case RawIMPOpCodes.Sto: return InstOpCode.Sto;
case RawIMPOpCodes.Ret: return InstOpCode.Ret;
case RawIMPOpCodes.Mrk: return InstOpCode.Mrk;
case RawIMPOpCodes.Cas: return InstOpCode.Cas;
case RawIMPOpCodes.Jmp: return InstOpCode.Jmp;
case RawIMPOpCodes.Jmz: return InstOpCode.Jmz;
case RawIMPOpCodes.Jmn: return InstOpCode.Jmn;
case RawIMPOpCodes.Ens: return InstOpCode.Ens;
case RawIMPOpCodes.End: return InstOpCode.End;
case RawIMPOpCodes.Wrc: return InstOpCode.Wrc;
case RawIMPOpCodes.Wrn: return InstOpCode.Wrn;
case RawIMPOpCodes.Rec: return InstOpCode.Rec;
case RawIMPOpCodes.Ren: return InstOpCode.Ren;
default: return InstOpCode.Unknown;
}
}
}
}
using System.Collections.Generic;
namespace Whitespace.Utils {
public class Heap<T> {
private readonly Dictionary<int, T> collection = new Dictionary<int, T>();
private int nextIdx = 0;
public T this[int address] {
get {
T entry;
collection.TryGetValue(address, out entry);
return entry;
}
set {
collection[address] = value;
if(nextIdx <= address)
nextIdx = address + 1;
}
}
public int Store(T value) {
collection[nextIdx] = value;
return nextIdx++;
}
}
}
using System.IO;
namespace Whitespace.Utils {
public static class Helper {
public static int ReadInt32(TextReader textReader) {
int result;
while(!int.TryParse(textReader.ReadLine(), out result)) ;
return result;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment