Skip to content

Instantly share code, notes, and snippets.

@pervognsen
Last active December 19, 2023 20:46
Show Gist options
  • Star 13 You must be signed in to star a gist
  • Fork 7 You must be signed in to fork a gist
  • Save pervognsen/3bd8c715953900a3dddbd35d88c4f323 to your computer and use it in GitHub Desktop.
Save pervognsen/3bd8c715953900a3dddbd35d88c4f323 to your computer and use it in GitHub Desktop.
#define _CRT_SECURE_NO_WARNINGS
#include <string.h>
#include <stdio.h>
#include <windows.h>
#pragma warning (disable: 4146)
#include <stdint.h>
#ifdef _DEBUG
#define Assert(x) \
if (!(x)) { MessageBoxA(0, #x, "Assertion Failure", MB_OK); __debugbreak(); }
#else
#define Assert(x)
#endif
bool IsPowerOfTwo(uint32_t value) {
return value != 0 && (value & (value - 1)) == 0;
}
uint32_t Log2(uint32_t value) {
uint32_t n = 0;
while (value > 1) {
value /= 2;
n++;
}
return n;
}
// Emitter
enum {
RAX = 0,
RCX = 1,
RDX = 2,
RBX = 3,
RSP = 4,
RBP = 5,
RSI = 6,
RDI = 7,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15
};
typedef uint8_t Register;
enum Scale {
X1 = 0,
X2 = 1,
X4 = 2,
X8 = 3
};
enum Mode {
INDIRECT = 0,
INDIRECT_BYTE_DISPLACED = 1,
INDIRECT_DISPLACED = 2,
DIRECT = 3,
};
enum ConditionCode {
O,
NO,
B,
NB,
E,
NE,
NA,
A,
S,
NS,
P,
NP,
L,
NL,
NG,
G,
NAE = B,
C = B,
AE = NB,
NC = NB,
Z = E,
NZ = NE,
BE = NA,
NBE = A,
PE = P,
PO = NP,
NGE = L,
GE = NL,
LE = NG,
NLE = G
};
uint8_t *data;
uint8_t *code;
uint8_t *next_code;
void DumpCode(char *filename) {
FILE *file = fopen(filename, "wb");
fwrite(code, next_code - code, 1, file);
fclose(file);
}
void Emit(uint8_t value) {
*next_code = value;
next_code++;
}
void Emit4(uint32_t value) {
Emit(value & 0xFF);
Emit((value >> 8) & 0xFF);
Emit((value >> 16) & 0xFF);
Emit((value >> 24) & 0xFF);
}
void Emit8(uint64_t value) {
Emit(value & 0xFF);
Emit((value >> 8) & 0xFF);
Emit((value >> 16) & 0xFF);
Emit((value >> 24) & 0xFF);
Emit((value >> 32) & 0xFF);
Emit((value >> 40) & 0xFF);
Emit((value >> 48) & 0xFF);
Emit((value >> 56) & 0xFF);
}
void EmitModRxRm(uint8_t mod, uint8_t rx, uint8_t rm) {
Assert(mod < 4);
Assert(rx < 16);
Assert(rm < 16);
Emit((mod << 6) | ((rx & 7) << 3) | (rm & 7));
}
void EmitRexIndexed(uint8_t rx, uint8_t base, uint8_t index) {
Emit(0x48 | (base >> 3) | ((index >> 3) << 1) | ((rx >> 3) << 2));
}
void EmitRex(uint8_t rx, uint8_t base) {
EmitRexIndexed(rx, base, 0);
}
// op rax, rcx
// rx = RAX, reg = RCX
void EmitDirect(uint8_t rx, Register reg) {
EmitModRxRm(DIRECT, rx, reg);
}
// op rax, [rcx]
// rx = RAX, base = RCX
void EmitIndirect(uint8_t rx, Register base) {
Assert((base & 7) != RSP);
Assert((base & 7) != RBP);
EmitModRxRm(INDIRECT, rx, base);
}
// op rax, [rip + 0x12345678]
// rx = RAX, displacement = 0x12345678
void EmitIndirectDisplacedRip(uint8_t rx, int32_t displacement) {
EmitModRxRm(INDIRECT, rx, RBP);
Emit4(displacement);
}
// op rax, [rcx + 0x12]
// rx = RAX, base = RCX, displacement = 0x12
void EmitIndirectByteDisplaced(uint8_t rx, Register base, int8_t displacement) {
Assert((base & 7) != RSP);
EmitModRxRm(INDIRECT_BYTE_DISPLACED, rx, base);
Emit(displacement);
}
// op rax, [rcx + 0x12345678]
// rx = RAX, base = RCX, displacement = 0x12345678
void EmitIndirectDisplaced(uint8_t rx, Register base, int32_t displacement) {
Assert((base & 7) != RSP);
EmitModRxRm(INDIRECT_DISPLACED, rx, base);
Emit4(displacement);
}
// op rax, [rcx + 4*rdx]
// rx = RAX, base = RCX, index = RDX, scale = X4
void EmitIndirectIndexed(uint8_t rx, Register base, Register index, Scale scale) {
Assert((base & 7) != RBP);
EmitModRxRm(INDIRECT, rx, RSP);
EmitModRxRm(scale, index, base);
}
// op rax, [rcx + 4*rdx + 0x12]
// rx = RAX, base = RCX, index = RDX, scale = X4, displacement = 0x12
void EmitIndirectIndexedByteDisplaced(uint8_t rx, Register base, Register index, Scale scale, int8_t displacement) {
EmitModRxRm(INDIRECT_BYTE_DISPLACED, rx, RSP);
EmitModRxRm(scale, index, base);
Emit(displacement);
}
// op rax, [rcx + 4*rdx + 0x12345678]
// rx = RAX, base = RCX, index = RDX, scale = X4, displacement = 0x12345678
void EmitIndirectIndexedDisplaced(uint8_t rx, Register base, Register index, Scale scale, int32_t displacement) {
EmitModRxRm(INDIRECT_DISPLACED, rx, RSP);
EmitModRxRm(scale, index, base);
Emit4(displacement);
}
// op rax, [0x12345678]
// rx = RAX, displacement = 0x12345678
void EmitDisplaced(uint8_t rx, int32_t displacement) {
EmitModRxRm(INDIRECT, rx, RSP);
EmitModRxRm(X1, RSP, RBP);
Emit4(displacement);
}
#define EMIT_I(operation, source_immediate) \
EmitRex(0, 0); \
Emit_##operation##_I(); \
Emit4(source_immediate)
#define EMIT_MOV64_R_I(destination, source_immediate) \
EmitRex(0, destination); \
Emit(0xB8 + (destination & 7)); \
Emit8(source_immediate)
#define EMIT_MOV64_RAX_OFF(source_offset) \
EmitRex(0, 0); \
Emit(0xA1); \
Emit8(source_offset)
#define EMIT_MOV64_OFF_RAX(destination_offset) \
EmitRex(0, 0); \
Emit(0xA3); \
Emit8(destination_offset)
#define EMIT_R_R(operation, destination, source) \
EmitRex(destination, source); \
Emit_##operation##_R(); \
EmitDirect(destination, source)
#define EMIT_R_RIPD(operation, destination, source_displacement) \
EmitRex(destination, 0); \
Emit_##operation##_R(); \
EmitIndirectDisplacedRip(destination, source_displacement);
#define EMIT_R_D(operation, destination, source_displacement) \
EmitRex(destination, 0); \
Emit_##operation##_R(); \
EmitDisplaced(destination, source_displacement)
#define EMIT_R_M(operation, destination, source) \
EmitRex(destination, source); \
Emit_##operation##_R(); \
EmitIndirect(destination, source)
#define EMIT_R_MD1(operation, destination, source, source_displacement) \
EmitRex(destination, source); \
Emit_##operation##_R(); \
EmitIndirectByteDisplaced(destination, source, source_displacement)
#define EMIT_R_MD(operation, destination, source, source_displacement) \
EmitRex(destination, source); \
Emit_##operation##_R(); \
EmitIndirectDisplaced(destination, source, source_displacement)
#define EMIT_R_SIB(operation, destination, source_base, source_scale, source_index) \
EmitRexIndexed(destination, source_base, source_index); \
Emit_##operation##_R(); \
EmitIndirectIndexed(destination, source_base, source_index, source_scale)
#define EMIT_R_SIBD1(operation, destination, source_base, source_scale, source_index, source_displacement) \
EmitRexIndexed(destination, source_base, source_index); \
Emit_##operation##_R(); \
EmitIndirectIndexedByteDisplaced(destination, source_base, source_index, source_scale, source_displacement)
#define EMIT_R_SIBD(operation, destination, source_base, source_scale, source_index, source_displacement) \
EmitRexIndexed(destination, source_base, source_index); \
Emit_##operation##_R(); \
EmitIndirectIndexedDisplaced(destination, source_base, source_index, source_scale, source_displacement)
#define EMIT_M_R(operation, destination, source) \
EmitRex(source, destination); \
Emit_##operation##_M(); \
EmitIndirect(source, destination)
#define EMIT_D_R(operation, destination_displacement, source) \
EmitRex(source, 0); \
Emit_##operation##_M(); \
EmitDisplaced(source, destination_displacement)
#define EMIT_RIPD_R(operation, destination_displacement, source) \
EmitRex(source, 0); \
Emit_##operation##_M(); \
EmitIndirectDisplacedRip(source, destination_displacement);
#define EMIT_MD1_R(operation, destination, destination_displacement, source) \
EmitRex(source, destination); \
Emit_##operation##_M(); \
EmitIndirectByteDisplaced(source, destination, destination_displacement)
#define EMIT_MD_R(operation, destination, destination_displacement, source) \
EmitRex(source, destination); \
Emit_##operation##_M(); \
EmitIndirectDisplaced(source, destination, destination_displacement)
#define EMIT_SIB_R(operation, destination_base, destination_scale, destination_index, source) \
EmitRexIndexed(source, destination_base, destination_index); \
Emit_##operation##_M(); \
EmitIndirectIndexed(source, destination_base, destination_index, destination_scale)
#define EMIT_SIBD1_R(operation, destination_base, destination_scale, destination_index, destination_displacement, source) \
EmitRexIndexed(source, destination_base, destination_index); \
Emit_##operation##_M(); \
EmitIndirectIndexedByteDisplaced(source, destination_base, destination_index, destination_scale, destination_displacement)
#define EMIT_SIBD_R(operation, destination_base, destination_scale, destination_index, destination_displacement, source) \
EmitRexIndexed(source, destination_base, destination_index); \
Emit_##operation##_M(); \
EmitIndirectIndexedDisplaced(source, destination_base, destination_index, destination_scale, destination_displacement)
#define EMIT_R_I(operation, destination, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I(); \
EmitDirect(extension_##operation##_I, destination); \
Emit4(source_immediate)
#define EMIT_R_I1(operation, destination, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I1(); \
EmitDirect(extension_##operation##_I1, destination); \
Emit(source_immediate)
#define EMIT_M_I(operation, destination, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I(); \
EmitIndirect(extension_##operation##_I, destination); \
Emit4(source_immediate)
#define EMIT_D_I(operation, destination_displacement, source_immediate) \
EmitRex(0, 0); \
Emit_##operation##_I(); \
EmitDisplaced(extension_##operation##_I, destination_displacement); \
Emit4(source_immediate)
#define EMIT_RIPD_I(operation, destination_displacement, source_immediate) \
EmitRex(0, 0); \
Emit_##operation##_I(); \
EmitIndirectDisplacedRip(extension_##operation##_I, destination_displacement); \
Emit4(source_immediate)
#define EMIT_MD1_I(operation, destination, destination_displacement, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I(); \
EmitIndirectByteDisplaced(extension_##operation##_I, destination, destination_displacement); \
Emit4(source_immediate)
#define EMIT_MD_I(operation, destination, destination_displacement, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I(); \
EmitIndirectDisplaced(extension_##operation##_I, destination, destination_displacement); \
Emit4(source_immediate)
#define EMIT_SIB_I(operation, destination_base, destination_scale, destination_index, source_immediate) \
EmitRexIndexed(0, destination_base, destination_index); \
Emit_##operation##_I(); \
EmitIndirectIndexed(extension_##operation##_I, destination_base, destination_index, destination_scale); \
Emit4(source_immediate)
#define EMIT_SIBD1_I(operation, destination_base, destination_scale, destination_index, destination_displacement, source_immediate) \
EmitRexIndexed(0, destination_base, destination_index); \
Emit_##operation##_I(); \
EmitIndirectIndexedByteDisplaced(extension_##operation##_I, destination_base, destination_index, destination_scale, destination_displacement); \
Emit4(source_immediate)
#define EMIT_SIBD_I(operation, destination_base, destination_scale, destination_index, destination_displacement, source_immediate) \
EmitRexIndexed(0, destination_base, destination_index); \
Emit_##operation##_I(); \
EmitIndirectIndexedDisplaced(extension_##operation##_I, destination_base, destination_index, destination_scale, destination_displacement); \
Emit4(source_immediate)
#define EMIT_X_R(operation, source) \
EmitRex(0, source); \
Emit_##operation##_X(); \
EmitDirect(extension_##operation##_X, source)
#define EMIT_X_RIPD(operation, source_displacement) \
EmitRex(0, 0); \
Emit_##operation##_X(); \
EmitIndirectDisplacedRip(extension_##operation##_X, source_displacement);
#define EMIT_X_D(operation, source_displacement) \
EmitRex(0, 0); \
Emit_##operation##_X(); \
EmitDisplaced(extension_##operation##_X, source_displacement)
#define EMIT_X_M(operation, source) \
EmitRex(0, source); \
Emit_##operation##_X(); \
EmitIndirect(extension_##operation##_X, source)
#define EMIT_X_MD1(operation, source, source_displacement) \
EmitRex(0, source); \
Emit_##operation##_X(); \
EmitIndirectByteDisplaced(extension_##operation##_X, source, source_displacement)
#define EMIT_X_MD(operation, source, source_displacement) \
EmitRex(0, source); \
Emit_##operation##_X(); \
EmitIndirectDisplaced(extension_##operation##_X, source, source_displacement)
#define EMIT_X_SIB(operation, source_base, source_scale, source_index) \
EmitRexIndexed(0, source_base, source_index); \
Emit_##operation##_X(); \
EmitIndirectIndexed(extension_##operation##_X, source_base, source_index, source_scale)
#define EMIT_X_SIBD1(operation, source_base, source_scale, source_index, source_displacement) \
EmitRexIndexed(0, source_base, source_index); \
Emit_##operation##_X(); \
EmitIndirectIndexedByteDisplaced(extension_##operation##_X, source_base, source_index, source_scale, source_displacement)
#define EMIT_X_SIBD(operation, source_base, source_scale, source_index, source_displacement) \
EmitRexIndexed(0, source_base, source_index); \
Emit_##operation##_X(); \
EmitIndirectIndexedDisplaced(extension_##operation##_X, source_base, source_index, source_scale, source_displacement)
#define EMIT_C_I(operation, condition_code, source_immediate) \
Emit_##operation##_C_I(condition_code); \
Emit4(source_immediate)
#define OPERATION_1_R(operation, opcode) \
void Emit_##operation##_R() { \
Emit(opcode); \
}
#define OPERATION_1_M(operation, opcode) \
void Emit_##operation##_M() { \
Emit(opcode); \
}
#define OPERATION_1_I(operation, opcode, extension) \
void Emit_##operation##_I() { \
Emit(opcode); \
} \
enum { extension_##operation##_I = extension };
#define OPERATION_1_I1(operation, opcode, extension) \
void Emit_##operation##_I1() { \
Emit(opcode); \
} \
enum { extension_##operation##_I1 = extension };
#define OPERATION_1_X(operation, opcode, extension) \
void Emit_##operation##_X() { \
Emit(opcode); \
} \
enum { extension_##operation##_X = extension };
#define OPERATION_2_C_I(operation, opcode) \
void Emit_##operation##_C_I(ConditionCode condition_code) { \
Emit(0x0F); \
Emit(opcode + condition_code); \
}
OPERATION_1_R(MOV, 0x8B)
OPERATION_1_M(MOV, 0x89)
OPERATION_1_I(MOVSX, 0xC7, 0x00)
OPERATION_1_I1(SHL, 0xC1, 0x04)
OPERATION_1_I1(SHR, 0xC1, 0x05)
OPERATION_1_R(ADD, 0x03)
OPERATION_1_M(ADD, 0x01)
OPERATION_1_I(ADD, 0x81, 0x00)
OPERATION_1_R(SUB, 0x2B)
OPERATION_1_M(SUB, 0x29)
OPERATION_1_I(SUB, 0x81, 0x05)
OPERATION_1_X(MUL, 0xF7, 0x04)
OPERATION_1_X(DIV, 0xF7, 0x06)
OPERATION_1_I(CMP, 0x81, 0x07)
OPERATION_1_I(JMP, 0xE9, 0x00)
OPERATION_2_C_I(J, 0x80)
// Emitter tests
void Test_Emitter() {
EMIT_MOV64_R_I(RBX, 0x12345678deadbeefull);
EMIT_MOV64_RAX_OFF(0x12345678deadbeefull);
EMIT_MOV64_OFF_RAX(0x12345678deadbeefull);
EMIT_R_R(MOV, RAX, R10);
EMIT_M_R(MOV, RAX, R10);
EMIT_I(JMP, 0x1234);
EMIT_C_I(J, NZ, 0x1234);
EMIT_C_I(J, NZ, 0x1234);
EMIT_C_I(J, NZ, 0x1234);
EMIT_D_I(ADD, 0x1234578, 0xDEADBEEF);
EMIT_RIPD_I(ADD, 0x1234578, 0xDEADBEEF);
for (uint8_t d = RAX; d <= R15; d++) {
Register destination = (Register)d;
EMIT_X_R(MUL, destination);
if ((destination & 7) != RSP) {
EMIT_X_MD1(MUL, destination, 0x12);
EMIT_X_MD(MUL, destination, 0x12345678);
if ((destination & 7) != RBP) {
EMIT_X_M(MUL, destination);
EMIT_X_SIB(MUL, destination, X4, R8);
EMIT_X_SIBD1(MUL, destination, X4, R8, 0x12);
EMIT_X_SIBD(MUL, destination, X4, R8, 0x12345678);
}
}
EMIT_R_I(ADD, destination, 0xDEADBEEF);
if ((destination & 7) != RSP) {
EMIT_MD1_I(ADD, destination, 0x12, 0xDEADBEEF);
EMIT_MD_I(ADD, destination, 0x12345678, 0xDEAFBEEF);
if ((destination & 7) != RBP) {
EMIT_SIB_I(ADD, destination, X4, R8, 0xDEADBEEF);
}
}
EMIT_R_RIPD(ADD, destination, 0x1235678);
EMIT_R_D(ADD, destination, 0x1235678);
EMIT_RIPD_R(ADD, 0x1235678, destination);
EMIT_D_R(ADD, 0x1235678, destination);
for (uint8_t s = RAX; s <= R15; s++) {
Register source = (Register)s;
EMIT_R_R(ADD, destination, source);
if ((source & 7) != RBP) {
EMIT_R_SIB(ADD, destination, source, X4, destination);
EMIT_R_SIBD1(ADD, destination, source, X4, destination, 0x12);
EMIT_R_SIBD(ADD, destination, source, X4, destination, 0x12345678);
}
if ((destination & 7) != RBP) {
EMIT_SIB_R(ADD, destination, X4, source, source);
EMIT_SIBD1_R(ADD, destination, X4, source, 0x12, source);
EMIT_SIBD_R(ADD, destination, X4, source, 0x12345678, source);
}
if ((source & 7) != RSP && (source & 7) != RBP) {
EMIT_R_M(ADD, destination, source);
EMIT_R_MD1(ADD, destination, source, 0x12);
EMIT_R_MD(ADD, destination, source, 0x12345678);
}
if ((destination & 7) != RSP && (destination & 7) != RBP) {
EMIT_M_R(ADD, destination, source);
EMIT_MD1_R(ADD, destination, 0x12, source);
EMIT_MD_R(ADD, destination, 0x12345678, source);
}
if ((source & 7) == RSP) {
EMIT_R_SIB(ADD, destination, source, X1, RSP);
#if 0
EmitRex(destination, source);
EmitAddToRegister();
EmitIndirectIndexed(destination, source, RSP, X1);
#endif
}
}
}
DumpCode("d:\\be\\test.bin");
}
// String table
// TODO: replace with something beter
uint64_t HashString(char *string, size_t length) {
static const uint64_t fnv_prime = 0x100000001b3ull;
static const uint64_t fnv_initializer = 0xcbf29ce484222325ull;
uint64_t hash = fnv_initializer;
for (size_t i = 0; i < length; i++) {
hash ^= string[i];
hash *= fnv_prime;
}
return hash;
}
struct HashedString {
uint64_t hash;
char *string;
};
enum {
MAX_HASHED_STRINGS = 1024,
MAX_STRING_DATA = 1024 * 1024
};
char string_data[MAX_STRING_DATA];
char *next_string_data = string_data;
HashedString hashed_strings[MAX_HASHED_STRINGS];
size_t hashed_strings_count;
char *InternString(char *string, size_t length) {
uint64_t hash = HashString(string, length);
for (size_t i = 0; i < hashed_strings_count; i++) {
if (hashed_strings[i].hash == hash && strncmp(hashed_strings[i].string, string, length) == 0) {
return hashed_strings[i].string;
}
}
Assert(hashed_strings_count < MAX_HASHED_STRINGS);
Assert(next_string_data + length + 1 < string_data + MAX_STRING_DATA);
char *interned_string = next_string_data;
memcpy(interned_string, string, length);
interned_string[length] = 0;
next_string_data += length + 1;
hashed_strings[hashed_strings_count].hash = hash;
hashed_strings[hashed_strings_count].string = interned_string;
hashed_strings_count++;
return interned_string;
}
// Symbol tables
enum TypeKind {
TYPE_UINT64,
TYPE_INT64,
TYPE_UINT32,
TYPE_INT32,
TYPE_UINT8,
TYPE_INT8,
TYPE_ARRAY,
TYPE_AGGREGATE
};
struct Type;
struct Field {
Type *type;
uint32_t offset;
};
struct Type {
TypeKind kind;
uint32_t size;
union {
struct {
Type *element;
size_t length;
} array;
struct {
Field *fields;
size_t length;
} aggregate;
};
};
Type type_uint64 = {TYPE_UINT64, sizeof(uint64_t)};
enum SymbolKind {
SYMBOL_GLOBAL_VARIABLE,
SYMBOL_LOCAL_VARIABLE
};
struct Symbol {
SymbolKind kind;
char *name;
Type *type;
uint32_t offset;
};
enum {
MAX_GLOBAL_VARIABLES = 1024,
MAX_LOCAL_VARIABLES = 1024
};
Symbol global_variables[MAX_GLOBAL_VARIABLES];
size_t global_variables_count;
uint32_t global_variables_offset;
Symbol *PushGlobalVariable(char *name, Type *type) {
Assert(global_variables_count < MAX_GLOBAL_VARIABLES);
Symbol *global_variable = global_variables + global_variables_count;
global_variable->kind = SYMBOL_GLOBAL_VARIABLE;
global_variable->name = name;
global_variable->type = type;
global_variable->offset = global_variables_offset;
global_variables_offset += type->size;
global_variables_count++;
return global_variable;
}
Symbol local_variables[MAX_LOCAL_VARIABLES];
size_t local_variables_count;
uint32_t local_variables_offset;
Symbol *PushLocalVariable(char *name, Type *type) {
Assert(local_variables_count < MAX_LOCAL_VARIABLES);
Symbol *local_variable = local_variables + local_variables_count;
local_variable->kind = SYMBOL_LOCAL_VARIABLE;
local_variable->name = name;
local_variable->type = type;
local_variable->offset = local_variables_offset;
local_variables_offset += type->size;
local_variables_count++;
return local_variable;
}
void PopLocalVariables(size_t new_count) {
Assert(new_count >= 0);
Assert(new_count <= local_variables_count);
if (new_count < local_variables_count) {
local_variables_offset = local_variables[new_count].offset;
}
local_variables_count = new_count;
}
Symbol *FindSymbol(char *name) {
for (int64_t i = local_variables_count - 1; i >= 0; i--) {
if (local_variables[i].name == name) {
return local_variables + i;
}
}
for (int64_t i = global_variables_count; i >= 0; i--) {
if (global_variables[i].name == name) {
return global_variables + i;
}
}
return 0;
}
// Reader
char *string_if;
char *string_else;
char *string_while;
char *string_uint64;
char current_character;
char *remaining_characters;
void ReadCharacter() {
Assert(current_character);
current_character = *remaining_characters;
remaining_characters++;
}
enum {
TOKEN_EOF = 0,
LAST_LITERAL_TOKEN = 127,
TOKEN_INTEGER,
TOKEN_IDENTIFIER,
TOKEN_PRINT,
TOKEN_IF,
TOKEN_ELSE,
TOKEN_WHILE
};
typedef uint8_t Token;
Token current_token;
uint32_t current_token_integer;
char *current_token_identifier;
void ConsumeToken() {
retry:
switch (current_character) {
case 0:
current_token = TOKEN_EOF;
break;
case ' ':
case '\n':
case '\r':
case '\t':
case '\v':
ReadCharacter();
goto retry;
break;
case 'a':
case 'A':
case 'b':
case 'B':
case 'c':
case 'C':
case 'd':
case 'D':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case 'h':
case 'H':
case 'i':
case 'I':
case 'j':
case 'J':
case 'k':
case 'K':
case 'l':
case 'L':
case 'm':
case 'M':
case 'n':
case 'N':
case 'o':
case 'O':
case 'p':
case 'P':
case 'q':
case 'Q':
case 'r':
case 'R':
case 's':
case 'S':
case 't':
case 'T':
case 'u':
case 'U':
case 'v':
case 'V':
case 'w':
case 'W':
case 'x':
case 'X':
case 'y':
case 'Y':
case 'z':
case 'Z': {
char *start = remaining_characters - 1;
ReadCharacter();
while (isalnum(current_character) || current_character == '_') {
ReadCharacter();
}
char *end = remaining_characters - 1;
char *string = InternString(start, end - start);
if (string == string_if) {
current_token = TOKEN_IF;
} else if (string == string_else) {
current_token = TOKEN_ELSE;
} else if (string == string_while) {
current_token = TOKEN_WHILE;
} else {
current_token = TOKEN_IDENTIFIER;
current_token_identifier = string;
}
break;
}
case '=':
case ';':
case ':':
case '{':
case '}':
case '(':
case ')':
case '+':
case '-':
case '*':
case '/':
case '^':
current_token = (Token)current_character;
ReadCharacter();
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
current_token = TOKEN_INTEGER;
current_token_integer = current_character - '0';
ReadCharacter();
while (isdigit(current_character)) {
current_token_integer *= 10;
current_token_integer += current_character - '0';
ReadCharacter();
}
break;
default:
Assert(0);
}
}
void ExpectToken(Token expected_token) {
Assert(current_token == expected_token);
ConsumeToken();
}
uint32_t free_register_mask;
void InitializeFreeRegisters() {
Register available_registers[] = {RCX, RBX, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15};
for (size_t i = 0; i < sizeof(available_registers) / sizeof(*available_registers); i++) {
free_register_mask |= 1 << available_registers[i];
}
}
Register AllocateRegister() {
Assert(free_register_mask != 0);
DWORD free_register;
_BitScanForward(&free_register,free_register_mask);
free_register_mask &= ~(1 << free_register);
return (Register)free_register;
}
void FreeRegister(Register allocated_register) {
Assert((free_register_mask & (1 << allocated_register)) == 0);
free_register_mask |= 1 << allocated_register;
}
enum OperandKind {
OPERAND_NULL,
OPERAND_FRAME_OFFSET,
OPERAND_REGISTER,
OPERAND_IMMEDIATE,
OPERAND_ADDRESS
};
struct Operand {
OperandKind kind;
Type *type;
union {
Register operand_register;
uint32_t operand_immediate;
uint32_t operand_frame_offset;
uint32_t operand_address;
};
};
void AllocateOperandRegister(Operand *operand) {
if (operand->kind != OPERAND_REGISTER) {
operand->kind = OPERAND_REGISTER;
operand->operand_register = AllocateRegister();
}
}
void StealOperandRegister(Operand *destination, Operand *source) {
Assert(source->kind == OPERAND_REGISTER);
destination->kind = OPERAND_REGISTER;
destination->operand_register = source->operand_register;
source->kind = OPERAND_NULL;
}
void FreeOperand(Operand *operand) {
if (operand->kind == OPERAND_REGISTER) {
FreeRegister(operand->operand_register);
}
operand->kind = OPERAND_NULL;
}
void PatchWithOperandAddress(uint32_t offset, Operand *operand) {
Assert(operand->kind == OPERAND_ADDRESS);
*(int32_t *)(next_code - 4 * offset) = (int32_t)(code - next_code - 8) - (uint32_t)operand->operand_address;
}
void EmitOperandToRegister(Operand *operand, Register target_register) {
if (operand->kind == OPERAND_IMMEDIATE) {
EMIT_R_I(MOVSX, target_register, operand->operand_immediate);
} else if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_R_MD1(MOV, target_register, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_R_RIPD(MOV, target_register, 0);
PatchWithOperandAddress(1, operand);
} else if (operand->kind == OPERAND_REGISTER) {
if (operand->operand_register != target_register) {
EMIT_R_R(MOV, target_register, operand->operand_register);
}
}
}
void EnsureOperandHasRegister(Operand *operand) {
if (operand->kind != OPERAND_REGISTER) {
Register operand_register = AllocateRegister();
EmitOperandToRegister(operand, operand_register);
operand->kind = OPERAND_REGISTER;
operand->operand_register = operand_register;
}
}
void ParseExpression(Operand *destination);
void ParseAtom(Operand *destination) {
if (current_token == TOKEN_IDENTIFIER) {
Symbol *symbol = FindSymbol(current_token_identifier);
Assert(symbol);
ConsumeToken();
if (symbol->kind == SYMBOL_LOCAL_VARIABLE) {
destination->kind = OPERAND_FRAME_OFFSET;
destination->operand_frame_offset = symbol->offset;
} else {
destination->kind = OPERAND_ADDRESS;
destination->operand_address = symbol->offset;
}
} else if (current_token == TOKEN_INTEGER) {
ConsumeToken();
destination->kind = OPERAND_IMMEDIATE;
destination->operand_immediate = current_token_integer;
} else if (current_token == '(') {
ConsumeToken();
ParseExpression(destination);
ExpectToken(')');
} else {
Assert(0);
}
}
void EmitMultiply(Operand *destination, Operand *operand) {
if (destination->kind == OPERAND_IMMEDIATE && operand->kind == OPERAND_IMMEDIATE) {
destination->operand_immediate *= operand->operand_immediate;
} else if (operand->kind == OPERAND_IMMEDIATE && IsPowerOfTwo(operand->operand_immediate)) {
EnsureOperandHasRegister(destination);
EMIT_R_I1(SHL, destination->operand_register, Log2(operand->operand_immediate));
} else if (destination->kind == OPERAND_IMMEDIATE && IsPowerOfTwo(destination->operand_immediate)) {
EnsureOperandHasRegister(operand);
EMIT_R_I1(SHL, operand->operand_register, Log2(destination->operand_immediate));
StealOperandRegister(destination, operand);
} else {
EmitOperandToRegister(destination, RAX);
if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_X_MD1(MUL, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_X_RIPD(MUL, 0);
PatchWithOperandAddress(1, operand);
} else {
EnsureOperandHasRegister(operand);
EMIT_X_R(MUL, operand->operand_register);
}
AllocateOperandRegister(destination);
EMIT_R_R(MOV, destination->operand_register, RAX);
}
}
void EmitDivide(Operand *destination, Operand *operand) {
if (destination->kind == OPERAND_IMMEDIATE && operand->kind == OPERAND_IMMEDIATE) {
destination->operand_immediate /= operand->operand_immediate;
} else if (operand->kind == OPERAND_IMMEDIATE && IsPowerOfTwo(operand->operand_immediate)) {
EnsureOperandHasRegister(destination);
EMIT_R_I1(SHR, destination->operand_register, Log2(operand->operand_immediate));
} else {
EmitOperandToRegister(destination, RAX);
if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_X_MD1(DIV, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_X_RIPD(DIV, 0);
PatchWithOperandAddress(1, operand);
} else {
EnsureOperandHasRegister(operand);
EMIT_X_R(DIV, operand->operand_register);
}
AllocateOperandRegister(destination);
EMIT_R_R(MOV, destination->operand_register, RAX);
}
}
void ParseTerm(Operand *destination) {
ParseAtom(destination);
while (current_token == '*' || current_token == '/') {
Token operator_token = current_token;
ConsumeToken();
Operand operand;
ParseTerm(&operand);
if (operator_token == '*') {
EmitMultiply(destination, &operand);
} else {
EmitDivide(destination, &operand);
}
FreeOperand(&operand);
}
}
void EmitAdd(Operand *destination, Operand *operand) {
if (destination->kind == OPERAND_IMMEDIATE && operand->kind == OPERAND_IMMEDIATE) {
destination->operand_immediate += operand->operand_immediate;
} else if (destination->kind == OPERAND_IMMEDIATE) {
EnsureOperandHasRegister(operand);
EMIT_R_I(ADD, operand->operand_register, destination->operand_immediate);
destination->kind = OPERAND_REGISTER;
destination->operand_register = operand->operand_register;
} else {
EnsureOperandHasRegister(destination);
if (operand->kind == OPERAND_IMMEDIATE) {
EMIT_R_I(ADD, destination->operand_register, operand->operand_immediate);
} else if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_R_MD1(ADD, destination->operand_register, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_R_RIPD(ADD, destination->operand_register, 0);
PatchWithOperandAddress(1, operand);
} else {
Assert(operand->kind == OPERAND_REGISTER);
EMIT_R_R(ADD, destination->operand_register, operand->operand_register);
}
}
}
void EmitSubtract(Operand *destination, Operand *operand) {
if (destination->kind == OPERAND_IMMEDIATE && operand->kind == OPERAND_IMMEDIATE) {
destination->operand_immediate -= operand->operand_immediate;
} else if (destination->kind == OPERAND_IMMEDIATE) {
EnsureOperandHasRegister(operand);
EMIT_R_I(ADD, operand->operand_register, -destination->operand_immediate);
destination->kind = OPERAND_REGISTER;
destination->operand_register = operand->operand_register;
} else {
EnsureOperandHasRegister(destination);
if (operand->kind == OPERAND_IMMEDIATE) {
EMIT_R_I(SUB, destination->operand_register, operand->operand_immediate);
} else if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_R_MD1(SUB, destination->operand_register, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_R_RIPD(SUB, destination->operand_register, 0);
PatchWithOperandAddress(1, operand);
} else {
Assert(operand->kind == OPERAND_REGISTER);
EMIT_R_R(SUB, destination->operand_register, operand->operand_register);
}
}
}
void ParseExpression(Operand *destination) {
ParseTerm(destination);
while (current_token == '+' || current_token == '-') {
Token operator_token = current_token;
ConsumeToken();
Operand operand;
ParseTerm(&operand);
if (operator_token == '+') {
EmitAdd(destination, &operand);
} else {
EmitSubtract(destination, &operand);
}
FreeOperand(&operand);
}
}
Type *ParseType() {
if (current_token == TOKEN_IDENTIFIER) {
if (current_token_identifier == string_uint64) {
ConsumeToken();
return &type_uint64;
}
}
return 0;
}
void ParseStatementBlock();
void ParseStatement() {
if (current_token == TOKEN_PRINT) {
ConsumeToken();
Operand operand;
ParseExpression(&operand);
// printf("%d", rcx)
// rcx = ["%d"], rdx = operand result
FreeOperand(&operand);
ExpectToken(';');
} else if (current_token == TOKEN_IF) {
ConsumeToken();
ExpectToken('(');
Operand operand;
ParseExpression(&operand);
ExpectToken(')');
EnsureOperandHasRegister(&operand);
EMIT_R_I(CMP, operand.operand_register, 0);
EMIT_C_I(J, Z, 0);
uint8_t *end_of_if_jump = next_code;
uint8_t *if_jump_immediate_address = next_code - 4;
FreeOperand(&operand);
ParseStatementBlock();
if (current_token == TOKEN_ELSE) {
ConsumeToken();
EMIT_I(JMP, 0);
*(uint32_t *)if_jump_immediate_address = (uint32_t)(next_code - end_of_if_jump);
uint8_t *end_of_else_jump = next_code;
uint8_t *else_jump_immediate_address = next_code - 4;
ParseStatementBlock();
*(uint32_t *)else_jump_immediate_address = (uint32_t)(next_code - end_of_else_jump);
} else {
*(uint32_t *)if_jump_immediate_address = (uint32_t)(next_code - end_of_if_jump);
}
} else if (current_token == TOKEN_WHILE) {
ConsumeToken();
uint8_t *start_of_while = next_code;
ExpectToken('(');
Operand operand;
ParseExpression(&operand);
ExpectToken(')');
EnsureOperandHasRegister(&operand);
EMIT_R_I(CMP, operand.operand_register, 0);
EMIT_C_I(J, Z, 0);
uint8_t *end_of_while_jump = next_code;
uint8_t *while_jump_immediate_address = next_code - 4;
FreeOperand(&operand);
ParseStatementBlock();
EMIT_I(JMP, 0);
*(uint32_t *)(next_code - 4) = (int32_t)(start_of_while - next_code);
*(uint32_t *)while_jump_immediate_address = (uint32_t)(next_code - end_of_while_jump);
} else if (current_token == TOKEN_IDENTIFIER) {
char *identifier = current_token_identifier;
ConsumeToken();
if (current_token == '=') {
ConsumeToken();
Symbol *symbol = FindSymbol(identifier);
Assert(symbol);
if (symbol->kind == SYMBOL_LOCAL_VARIABLE) {
uint32_t local_variable_offset = symbol->offset;
Operand operand;
ParseExpression(&operand);
if (operand.kind == OPERAND_IMMEDIATE) {
EMIT_MD1_I(MOVSX, RBP, local_variable_offset, operand.operand_immediate);
} else {
EnsureOperandHasRegister(&operand);
EMIT_MD1_R(MOV, RBP, local_variable_offset, operand.operand_register);
}
FreeOperand(&operand);
} else {
Assert(symbol->kind == SYMBOL_GLOBAL_VARIABLE);
Operand variable;
variable.kind = OPERAND_ADDRESS;
variable.operand_address = symbol->offset;
Operand operand;
ParseExpression(&operand);
if (operand.kind == OPERAND_IMMEDIATE) {
EMIT_RIPD_I(MOVSX, 0, operand.operand_immediate);
PatchWithOperandAddress(2, &variable);
} else {
EnsureOperandHasRegister(&operand);
EMIT_RIPD_R(MOV, 0, operand.operand_register);
PatchWithOperandAddress(1, &variable);
}
FreeOperand(&operand);
}
} else {
ExpectToken(':');
Type *type = ParseType();
PushLocalVariable(identifier, type);
}
ExpectToken(';');
} else {
Assert(0);
}
}
void ParseStatementBlock() {
uint64_t initial_local_variables_count = local_variables_count;
ExpectToken('{');
while (current_token != '}') {
ParseStatement();
}
ConsumeToken();
PopLocalVariables(initial_local_variables_count);
}
void ParseModule() {
while (current_token != '{') {
ExpectToken(TOKEN_IDENTIFIER);
char *identifier = current_token_identifier;
ExpectToken(':');
Type *type = ParseType();
PushGlobalVariable(identifier, type);
ExpectToken(';');
}
EMIT_MOV64_R_I(RBP, (uint64_t)data);
ParseStatementBlock();
}
void OpenFileForParsing(const char *filename) {
HANDLE file = CreateFileA(filename, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
Assert(file != INVALID_HANDLE_VALUE);
DWORD file_size = GetFileSize(file, 0);
HANDLE file_mapping = CreateFileMappingA(file, 0, PAGE_WRITECOPY, 0, 0, 0);
Assert(file_mapping);
char *file_memory = (char *)MapViewOfFileEx(file_mapping, FILE_MAP_COPY, 0, 0, 0, 0);
Assert(file_memory);
Assert(file_size > 0);
Assert(file_memory[file_size - 1] == '\n');
file_memory[file_size - 1] = 0;
current_character = *file_memory;
remaining_characters = file_memory + 1;
ConsumeToken();
}
void InitializeCompiler() {
data = (uint8_t *)VirtualAlloc(0, 1024 * 1024 * 1024, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
code = data + 4096;
next_code = code;
InitializeFreeRegisters();
string_if = InternString("if", 2);
string_else = InternString("else", 4);
string_while = InternString("while", 5);
string_uint64 = InternString("uint64", 6);
}
void CompileFile(char *source_filename, char *binary_filename) {
LARGE_INTEGER timer_frequency;
QueryPerformanceFrequency(&timer_frequency);
LARGE_INTEGER start_time;
QueryPerformanceCounter(&start_time);
OpenFileForParsing(source_filename);
ParseModule();
LARGE_INTEGER end_time;
QueryPerformanceCounter(&end_time);
uint64_t duration = end_time.QuadPart - start_time.QuadPart;
char temp[32];
sprintf(temp, "Time: %.2fs\n", (float)duration / (float)timer_frequency.QuadPart);
OutputDebugStringA(temp);
DumpCode(binary_filename);
}
int main(int argc, char **argv) {
InitializeCompiler();
// CompileFile("d:\\be\\fib.be", "d:\\be\\fib.bin");
CompileFile("d:\\be\\huge_fib.be", "d:\\be\\huge_fib.bin");
// CompileFile("d:\\be\\test.be", "d:\\be\\test.bin");
// CompileFile("d:\\be\\huge.be", "d:\\be\\huge.bin");
#if 0
void (*dummy_function)() = (void(*)())code;
dummy_function();
#endif
return 0;
}
@SHATSON
Copy link

SHATSON commented Apr 20, 2018

brings an error of compatibility of char and const char

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment