Last active
October 26, 2021 09:56
-
-
Save JLChnToZ/42bade0f3d4b9e671bcedaafe1000c86 to your computer and use it in GitHub Desktop.
Whitespace Language Compiler for .NET
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.Collections.Generic; | |
using System.Reflection; | |
using System.Reflection.Emit; | |
using Whitespace.Utils; | |
namespace Whitespace { | |
public delegate void CompiledMethod(TextReader input, TextWriter output); | |
public sealed class Compiler { | |
#region Prefetched Reflection Info | |
private static readonly Type stackType = typeof(Stack<int>); | |
private static readonly Type heapType = typeof(Heap<int>); | |
private static readonly Type helperType = typeof(Helper); | |
private static readonly Type textReaderType = typeof(TextReader); | |
private static readonly Type textWriterType = typeof(TextWriter); | |
private static readonly Type[] intParams = new[] { typeof(int) }; | |
private static readonly Type[] charParams = new[] { typeof(char) }; | |
private static readonly Type[] textReaderParams = new[] { typeof(TextReader) }; | |
private static readonly MethodInfo stackPopFn = stackType.GetMethod("Pop", Type.EmptyTypes); | |
private static readonly MethodInfo stackPushFn = stackType.GetMethod("Push", intParams); | |
private static readonly PropertyInfo heapIndexer = heapType.GetProperty("Item", intParams); | |
private static readonly MethodInfo heapStoreFn = heapType.GetMethod("Store", intParams); | |
private static readonly MethodInfo heapStoreSpecificFn = heapIndexer.GetSetMethod(); | |
private static readonly MethodInfo heapRestoreFn = heapIndexer.GetGetMethod(); | |
private static readonly MethodInfo readCharFn = textReaderType.GetMethod("Read", Type.EmptyTypes); | |
private static readonly MethodInfo readIntFn = helperType.GetMethod("ReadInteger", textReaderParams); | |
private static readonly MethodInfo writeCharFn = textWriterType.GetMethod("Write", charParams); | |
private static readonly MethodInfo writeIntFn = textWriterType.GetMethod("Write", intParams); | |
#endregion | |
#region Local Variables | |
private readonly Parser parser; | |
private readonly ILGenerator il; | |
private readonly Dictionary<int, Label> labels = new Dictionary<int, Label>(); | |
private readonly List<Label> returnLabels = new List<Label>(); | |
private LocalBuilder swap1, swap2, cbStack, heap; | |
private Label returnSelLabel, defaultReturnLabel; | |
#endregion | |
#region Constructors | |
private Compiler() { } | |
public Compiler(Parser parser, ILGenerator il) { | |
this.il = il; | |
this.parser = parser; | |
} | |
#endregion | |
#region Compiler | |
private void Compile() { | |
il.Emit(OpCodes.Nop); | |
swap1 = il.DeclareLocal(typeof(int)); | |
if(parser.hasSwap) | |
swap2 = il.DeclareLocal(typeof(int)); | |
if(parser.hasCbStack) { | |
cbStack = il.DeclareLocal(stackType); | |
returnSelLabel = il.DefineLabel(); | |
defaultReturnLabel = il.DefineLabel(); | |
il.Emit(OpCodes.Newobj, stackType.GetConstructor(Type.EmptyTypes)); | |
il.Emit(OpCodes.Stloc, cbStack); | |
} | |
heap = il.DeclareLocal(heapType); | |
il.Emit(OpCodes.Newobj, heapType.GetConstructor(Type.EmptyTypes)); | |
il.Emit(OpCodes.Stloc, heap); | |
il.Emit(OpCodes.Nop); | |
foreach(OpParam op in parser.Instructions) | |
Emit(op.op, op.parameter); | |
if(parser.hasCbStack) { | |
il.Emit(OpCodes.Br, defaultReturnLabel); | |
il.MarkLabel(returnSelLabel); | |
il.Emit(OpCodes.Ldloc, cbStack); | |
il.Emit(OpCodes.Call, stackPopFn); | |
returnLabels.Add(defaultReturnLabel); | |
il.Emit(OpCodes.Switch, returnLabels.ToArray()); | |
il.MarkLabel(defaultReturnLabel); | |
il.Emit(OpCodes.Nop); | |
} | |
il.Emit(OpCodes.Ret); | |
} | |
private void Emit(InstOpCode op, int parameter) { | |
switch(op) { | |
case InstOpCode.Pus: | |
il.Emit(OpCodes.Ldc_I4, parameter); | |
break; | |
case InstOpCode.Dup: | |
il.Emit(OpCodes.Dup); | |
break; | |
case InstOpCode.Swp: | |
il.Emit(OpCodes.Stloc, swap2); | |
EmitSwap(swap2); | |
break; | |
case InstOpCode.Pop: | |
il.Emit(OpCodes.Pop); | |
break; | |
case InstOpCode.Add: | |
il.Emit(OpCodes.Add); | |
break; | |
case InstOpCode.Sub: | |
il.Emit(OpCodes.Sub); | |
break; | |
case InstOpCode.Mul: | |
il.Emit(OpCodes.Mul); | |
break; | |
case InstOpCode.Div: | |
il.Emit(OpCodes.Div); | |
break; | |
case InstOpCode.Mod: | |
il.Emit(OpCodes.Rem); | |
break; | |
case InstOpCode.Sto: | |
EmitHeapStoreFn(); | |
break; | |
case InstOpCode.Ret: | |
EmitSwap(heap); | |
il.Emit(OpCodes.Call, heapRestoreFn); | |
break; | |
case InstOpCode.Mrk: | |
il.MarkLabel(GetLabel(parameter)); | |
break; | |
case InstOpCode.Cas: | |
int returnId; | |
Label returnLabel = GetReturnLabel(out returnId); | |
il.Emit(OpCodes.Ldloc, cbStack); | |
il.Emit(OpCodes.Ldc_I4, returnId); | |
il.Emit(OpCodes.Call, stackPushFn); | |
il.Emit(OpCodes.Br, GetLabel(parameter)); | |
il.MarkLabel(returnLabel); | |
break; | |
case InstOpCode.Jmp: | |
il.Emit(OpCodes.Br, GetLabel(parameter)); | |
break; | |
case InstOpCode.Jmz: | |
il.Emit(OpCodes.Brfalse, GetLabel(parameter)); | |
break; | |
case InstOpCode.Jmn: | |
il.Emit(OpCodes.Ldc_I4_0); | |
il.Emit(OpCodes.Clt); | |
il.Emit(OpCodes.Brtrue, GetLabel(parameter)); | |
break; | |
case InstOpCode.Ens: | |
il.Emit(OpCodes.Br, returnSelLabel); | |
break; | |
case InstOpCode.End: | |
il.Emit(OpCodes.Ret); | |
break; | |
case InstOpCode.Wrc: | |
EmitSwap(OpCodes.Ldarg_1); | |
il.Emit(OpCodes.Callvirt, writeCharFn); | |
break; | |
case InstOpCode.Wrn: | |
EmitSwap(OpCodes.Ldarg_1); | |
il.Emit(OpCodes.Callvirt, writeIntFn); | |
break; | |
case InstOpCode.Rec: | |
EmitSwap(heap); | |
il.Emit(OpCodes.Ldarg_0); | |
il.Emit(OpCodes.Callvirt, readCharFn); | |
il.Emit(OpCodes.Call, heapStoreSpecificFn); | |
break; | |
case InstOpCode.Ren: | |
EmitSwap(heap); | |
il.Emit(OpCodes.Ldarg_0); | |
il.Emit(OpCodes.Call, readIntFn); | |
il.Emit(OpCodes.Call, heapStoreSpecificFn); | |
break; | |
} | |
} | |
private void EmitHeapStoreFn() { | |
EmitSwap(heap); | |
il.Emit(OpCodes.Call, heapStoreFn); | |
} | |
private void EmitSwap(OpCode ldOpcode) { | |
il.Emit(OpCodes.Stloc, swap1); | |
il.Emit(ldOpcode); | |
il.Emit(OpCodes.Ldloc, swap1); | |
} | |
private void EmitSwap(LocalBuilder loc) { | |
il.Emit(OpCodes.Stloc, swap1); | |
il.Emit(OpCodes.Ldloc, loc); | |
il.Emit(OpCodes.Ldloc, swap1); | |
} | |
#endregion | |
#region Helpers | |
private Label GetLabel(int id) { | |
Label label; | |
if(!labels.TryGetValue(id, out label)) { | |
label = il.DefineLabel(); | |
labels.Add(id, label); | |
} | |
return label; | |
} | |
private Label GetReturnLabel(out int returnId) { | |
Label label = il.DefineLabel(); | |
returnId = returnLabels.Count; | |
returnLabels.Add(label); | |
return label; | |
} | |
#endregion | |
#region Public API | |
// Generated method's parameters: Compiled(TextReader reader, TextWriter writer); | |
public static void Compile(string source, ILGenerator il) { | |
if(source == null) | |
throw new ArgumentNullException("source"); | |
if(il == null) | |
throw new ArgumentNullException("il"); | |
Parser parser = new Parser(source); | |
Compiler compiler = new Compiler(parser, il); | |
parser.Parse(); | |
compiler.Compile(); | |
} | |
public static CompiledMethod Compile(string source) { | |
if(source == null) | |
throw new ArgumentNullException("source"); | |
DynamicMethod method = new DynamicMethod("_Compiled_", null, new[] { | |
textReaderType, textWriterType | |
}); | |
method.DefineParameter(0, ParameterAttributes.In, "input"); | |
method.DefineParameter(1, ParameterAttributes.In, "output"); | |
Parser parser = new Parser(source); | |
Compiler compiler = new Compiler(parser, method.GetILGenerator()); | |
parser.Parse(); | |
compiler.Compile(); | |
return method.CreateDelegate(typeof(CompiledMethod)) as CompiledMethod; | |
} | |
#endregion | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Generic; | |
namespace Whitespace { | |
public enum InstOpCode: byte { | |
Unknown, | |
Pus, Dup, Swp, Pop, | |
Add, Sub, Mul, Div, Mod, | |
Sto, Ret, | |
Mrk, Cas, Jmp, Jmz, Jmn, Ens, End, Wrc, Wrn, Rec, Ren | |
} | |
public struct OpParam { | |
public InstOpCode op; | |
public int parameter; | |
} | |
public class Parser { | |
#region Constants | |
private const char SChar = ' '; // S = Space | |
private const char TChar = '\t'; // T = Tab | |
private const char LChar = '\n'; // L = Line Feed | |
// Raw packed op codes sequence | |
private enum RawIMPOpCodes { | |
// Stack Manipulation [S*] | |
Pus = (SChar << 8) | SChar, // Push the number onto the stack [SS] | |
Dup = (SChar << 16) | (LChar << 8) | SChar, // Duplicate the top item on the stack [SLS] | |
Swp = (SChar << 16) | (LChar << 8) | TChar, // Swap the top two items on the stack [SLT] | |
Pop = (SChar << 16) | (LChar << 8) | LChar, // Discard the top item on the stack [SLL] | |
// Arithmetic [TS*] | |
Add = (TChar << 24) | (SChar << 16) | (SChar << 8) | SChar, // Addition [TSSS] | |
Sub = (TChar << 24) | (SChar << 16) | (SChar << 8) | TChar, // Subtraction [TSST] | |
Mul = (TChar << 24) | (SChar << 16) | (SChar << 8) | LChar, // Multiplication [TSSL] | |
Div = (TChar << 24) | (SChar << 16) | (TChar << 8) | SChar, // Integer Division [TSTS] | |
Mod = (TChar << 24) | (SChar << 16) | (TChar << 8) | TChar, // Modulo [TSTT] | |
// Heap Access [TT*] | |
Sto = (TChar << 16) | (TChar << 8) | SChar, // Store [TTS] | |
Ret = (TChar << 16) | (TChar << 8) | TChar, // Retrieve [TTT] | |
// Flow Control [L*] | |
Mrk = (LChar << 16) | (SChar << 8) | SChar, // Mark location in the program [LSS] | |
Cas = (LChar << 16) | (SChar << 8) | TChar, // Call a subroutine [LST] | |
Jmp = (LChar << 16) | (SChar << 8) | LChar, // Jump unconditionally to a label [LSL] | |
Jmz = (LChar << 16) | (TChar << 8) | SChar, // Jump to a label if the top of the stack is zero [LTS] | |
Jmn = (LChar << 16) | (TChar << 8) | TChar, // Jump to a label if the top of the stack is negative [LTT] | |
Ens = (LChar << 16) | (TChar << 8) | LChar, // End a subroutine and transfer control back to the caller [LTL] | |
End = (LChar << 16) | (LChar << 8) | LChar, // End the program [LLL] | |
// I/O [TL*] | |
Wrc = (TChar << 24) | (LChar << 16) | (SChar << 8) | SChar, // Output the character at the top of the stack [TLSS] | |
Wrn = (TChar << 24) | (LChar << 16) | (SChar << 8) | TChar, // Output the number at the top of the stack [TLST] | |
Rec = (TChar << 24) | (LChar << 16) | (TChar << 8) | SChar, // Read a character and place it in the location given by the top of the stack [TLTS] | |
Ren = (TChar << 24) | (LChar << 16) | (TChar << 8) | TChar, // Read a number and place it in the location given by the top of the stack [TLTT] | |
} | |
#endregion | |
private readonly string source; | |
private readonly List<OpParam> ops = new List<OpParam>(); | |
// Specific situation flags for compiler use | |
internal bool hasCbStack, hasSwap; | |
public string Source { | |
get { return source; } | |
} | |
public IList<OpParam> Instructions { | |
get { return ops.AsReadOnly(); } | |
} | |
private Parser() { } | |
public Parser(string source) { | |
this.source = source; | |
} | |
public void Parse() { | |
bool requireParam = false, requireSign = false; | |
int op = 0, parameter = 0; | |
bool negativeParam = false; | |
foreach(char c in source) { | |
switch(c) { | |
case SChar: | |
if(requireSign) { | |
negativeParam = false; | |
requireSign = false; | |
continue; | |
} else if(requireParam) { | |
parameter = parameter << 1; | |
continue; | |
} | |
break; | |
case TChar: | |
if(requireSign) { | |
negativeParam = true; | |
requireSign = false; | |
continue; | |
} else if(requireParam) { | |
parameter = (parameter << 1) | 1; | |
continue; | |
} | |
break; | |
case LChar: | |
if(requireParam) { | |
if(negativeParam) { | |
parameter = (int)(parameter | 0x80000000); | |
negativeParam = false; | |
} | |
if(AddOp(ConvertIMPOpCode(op), parameter)) | |
return; | |
requireParam = false; | |
requireSign = false; | |
op = 0; | |
parameter = 0; | |
continue; | |
} | |
break; | |
default: continue; | |
} | |
op = (op << 8) | c; | |
if(CheckAndAddOp(ref op, out requireParam, out requireSign)) | |
return; | |
} | |
} | |
private bool CheckAndAddOp(ref int rawOpCode, out bool requireParam, out bool signed) { | |
InstOpCode op = ConvertIMPOpCode(rawOpCode); | |
switch(op) { | |
case InstOpCode.Unknown: | |
requireParam = false; | |
signed = false; | |
return false; | |
case InstOpCode.Pus: | |
requireParam = true; | |
signed = true; | |
return false; | |
case InstOpCode.Mrk: | |
case InstOpCode.Cas: | |
case InstOpCode.Jmp: | |
case InstOpCode.Jmz: | |
case InstOpCode.Jmn: | |
requireParam = true; | |
signed = false; | |
return false; | |
default: | |
requireParam = false; | |
signed = false; | |
rawOpCode = 0; | |
return AddOp(op, 0); | |
} | |
} | |
private bool AddOp(InstOpCode opCode, int parameter) { | |
ops.Add(new OpParam { | |
op = opCode, | |
parameter = parameter | |
}); | |
switch(opCode) { | |
case InstOpCode.End: | |
return true; | |
case InstOpCode.Cas: | |
case InstOpCode.Ens: | |
hasCbStack = true; | |
break; | |
case InstOpCode.Swp: | |
hasSwap = true; | |
break; | |
} | |
return false; | |
} | |
private static InstOpCode ConvertIMPOpCode(int opCode) { | |
switch((RawIMPOpCodes)opCode) { | |
case RawIMPOpCodes.Pus: return InstOpCode.Pus; | |
case RawIMPOpCodes.Dup: return InstOpCode.Dup; | |
case RawIMPOpCodes.Swp: return InstOpCode.Swp; | |
case RawIMPOpCodes.Pop: return InstOpCode.Pop; | |
case RawIMPOpCodes.Add: return InstOpCode.Add; | |
case RawIMPOpCodes.Sub: return InstOpCode.Sub; | |
case RawIMPOpCodes.Mul: return InstOpCode.Mul; | |
case RawIMPOpCodes.Div: return InstOpCode.Div; | |
case RawIMPOpCodes.Mod: return InstOpCode.Mod; | |
case RawIMPOpCodes.Sto: return InstOpCode.Sto; | |
case RawIMPOpCodes.Ret: return InstOpCode.Ret; | |
case RawIMPOpCodes.Mrk: return InstOpCode.Mrk; | |
case RawIMPOpCodes.Cas: return InstOpCode.Cas; | |
case RawIMPOpCodes.Jmp: return InstOpCode.Jmp; | |
case RawIMPOpCodes.Jmz: return InstOpCode.Jmz; | |
case RawIMPOpCodes.Jmn: return InstOpCode.Jmn; | |
case RawIMPOpCodes.Ens: return InstOpCode.Ens; | |
case RawIMPOpCodes.End: return InstOpCode.End; | |
case RawIMPOpCodes.Wrc: return InstOpCode.Wrc; | |
case RawIMPOpCodes.Wrn: return InstOpCode.Wrn; | |
case RawIMPOpCodes.Rec: return InstOpCode.Rec; | |
case RawIMPOpCodes.Ren: return InstOpCode.Ren; | |
default: return InstOpCode.Unknown; | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Generic; | |
namespace Whitespace.Utils { | |
public class Heap<T> { | |
private readonly Dictionary<int, T> collection = new Dictionary<int, T>(); | |
private int nextIdx = 0; | |
public T this[int address] { | |
get { | |
T entry; | |
collection.TryGetValue(address, out entry); | |
return entry; | |
} | |
set { | |
collection[address] = value; | |
if(nextIdx <= address) | |
nextIdx = address + 1; | |
} | |
} | |
public int Store(T value) { | |
collection[nextIdx] = value; | |
return nextIdx++; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.IO; | |
namespace Whitespace.Utils { | |
public static class Helper { | |
public static int ReadInt32(TextReader textReader) { | |
int result; | |
while(!int.TryParse(textReader.ReadLine(), out result)) ; | |
return result; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment