Skip to content

Instantly share code, notes, and snippets.

@adragomir
Forked from pervognsen/be_x64.cpp
Created August 13, 2017 13:42
Show Gist options
  • Save adragomir/ddc180719819725ba3788f6da613d654 to your computer and use it in GitHub Desktop.
Save adragomir/ddc180719819725ba3788f6da613d654 to your computer and use it in GitHub Desktop.
#define _CRT_SECURE_NO_WARNINGS
#include <string.h>
#include <stdio.h>
#include <windows.h>
#pragma warning (disable: 4146)
#include <stdint.h>
#ifdef _DEBUG
#define Assert(x) \
if (!(x)) { MessageBoxA(0, #x, "Assertion Failure", MB_OK); __debugbreak(); }
#else
#define Assert(x)
#endif
bool IsPowerOfTwo(uint32_t value) {
return value != 0 && (value & (value - 1)) == 0;
}
uint32_t Log2(uint32_t value) {
uint32_t n = 0;
while (value > 1) {
value /= 2;
n++;
}
return n;
}
// Emitter
enum {
RAX = 0,
RCX = 1,
RDX = 2,
RBX = 3,
RSP = 4,
RBP = 5,
RSI = 6,
RDI = 7,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15
};
typedef uint8_t Register;
enum Scale {
X1 = 0,
X2 = 1,
X4 = 2,
X8 = 3
};
enum Mode {
INDIRECT = 0,
INDIRECT_BYTE_DISPLACED = 1,
INDIRECT_DISPLACED = 2,
DIRECT = 3,
};
enum ConditionCode {
O,
NO,
B,
NB,
E,
NE,
NA,
A,
S,
NS,
P,
NP,
L,
NL,
NG,
G,
NAE = B,
C = B,
AE = NB,
NC = NB,
Z = E,
NZ = NE,
BE = NA,
NBE = A,
PE = P,
PO = NP,
NGE = L,
GE = NL,
LE = NG,
NLE = G
};
uint8_t *data;
uint8_t *code;
uint8_t *next_code;
void DumpCode(char *filename) {
FILE *file = fopen(filename, "wb");
fwrite(code, next_code - code, 1, file);
fclose(file);
}
void Emit(uint8_t value) {
*next_code = value;
next_code++;
}
void Emit4(uint32_t value) {
Emit(value & 0xFF);
Emit((value >> 8) & 0xFF);
Emit((value >> 16) & 0xFF);
Emit((value >> 24) & 0xFF);
}
void Emit8(uint64_t value) {
Emit(value & 0xFF);
Emit((value >> 8) & 0xFF);
Emit((value >> 16) & 0xFF);
Emit((value >> 24) & 0xFF);
Emit((value >> 32) & 0xFF);
Emit((value >> 40) & 0xFF);
Emit((value >> 48) & 0xFF);
Emit((value >> 56) & 0xFF);
}
void EmitModRxRm(uint8_t mod, uint8_t rx, uint8_t rm) {
Assert(mod < 4);
Assert(rx < 16);
Assert(rm < 16);
Emit((mod << 6) | ((rx & 7) << 3) | (rm & 7));
}
void EmitRexIndexed(uint8_t rx, uint8_t base, uint8_t index) {
Emit(0x48 | (base >> 3) | ((index >> 3) << 1) | ((rx >> 3) << 2));
}
void EmitRex(uint8_t rx, uint8_t base) {
EmitRexIndexed(rx, base, 0);
}
// op rax, rcx
// rx = RAX, reg = RCX
void EmitDirect(uint8_t rx, Register reg) {
EmitModRxRm(DIRECT, rx, reg);
}
// op rax, [rcx]
// rx = RAX, base = RCX
void EmitIndirect(uint8_t rx, Register base) {
Assert((base & 7) != RSP);
Assert((base & 7) != RBP);
EmitModRxRm(INDIRECT, rx, base);
}
// op rax, [rip + 0x12345678]
// rx = RAX, displacement = 0x12345678
void EmitIndirectDisplacedRip(uint8_t rx, int32_t displacement) {
EmitModRxRm(INDIRECT, rx, RBP);
Emit4(displacement);
}
// op rax, [rcx + 0x12]
// rx = RAX, base = RCX, displacement = 0x12
void EmitIndirectByteDisplaced(uint8_t rx, Register base, int8_t displacement) {
Assert((base & 7) != RSP);
EmitModRxRm(INDIRECT_BYTE_DISPLACED, rx, base);
Emit(displacement);
}
// op rax, [rcx + 0x12345678]
// rx = RAX, base = RCX, displacement = 0x12345678
void EmitIndirectDisplaced(uint8_t rx, Register base, int32_t displacement) {
Assert((base & 7) != RSP);
EmitModRxRm(INDIRECT_DISPLACED, rx, base);
Emit4(displacement);
}
// op rax, [rcx + 4*rdx]
// rx = RAX, base = RCX, index = RDX, scale = X4
void EmitIndirectIndexed(uint8_t rx, Register base, Register index, Scale scale) {
Assert((base & 7) != RBP);
EmitModRxRm(INDIRECT, rx, RSP);
EmitModRxRm(scale, index, base);
}
// op rax, [rcx + 4*rdx + 0x12]
// rx = RAX, base = RCX, index = RDX, scale = X4, displacement = 0x12
void EmitIndirectIndexedByteDisplaced(uint8_t rx, Register base, Register index, Scale scale, int8_t displacement) {
EmitModRxRm(INDIRECT_BYTE_DISPLACED, rx, RSP);
EmitModRxRm(scale, index, base);
Emit(displacement);
}
// op rax, [rcx + 4*rdx + 0x12345678]
// rx = RAX, base = RCX, index = RDX, scale = X4, displacement = 0x12345678
void EmitIndirectIndexedDisplaced(uint8_t rx, Register base, Register index, Scale scale, int32_t displacement) {
EmitModRxRm(INDIRECT_DISPLACED, rx, RSP);
EmitModRxRm(scale, index, base);
Emit4(displacement);
}
// op rax, [0x12345678]
// rx = RAX, displacement = 0x12345678
void EmitDisplaced(uint8_t rx, int32_t displacement) {
EmitModRxRm(INDIRECT, rx, RSP);
EmitModRxRm(X1, RSP, RBP);
Emit4(displacement);
}
#define EMIT_I(operation, source_immediate) \
EmitRex(0, 0); \
Emit_##operation##_I(); \
Emit4(source_immediate)
#define EMIT_MOV64_R_I(destination, source_immediate) \
EmitRex(0, destination); \
Emit(0xB8 + (destination & 7)); \
Emit8(source_immediate)
#define EMIT_MOV64_RAX_OFF(source_offset) \
EmitRex(0, 0); \
Emit(0xA1); \
Emit8(source_offset)
#define EMIT_MOV64_OFF_RAX(destination_offset) \
EmitRex(0, 0); \
Emit(0xA3); \
Emit8(destination_offset)
#define EMIT_R_R(operation, destination, source) \
EmitRex(destination, source); \
Emit_##operation##_R(); \
EmitDirect(destination, source)
#define EMIT_R_RIPD(operation, destination, source_displacement) \
EmitRex(destination, 0); \
Emit_##operation##_R(); \
EmitIndirectDisplacedRip(destination, source_displacement);
#define EMIT_R_D(operation, destination, source_displacement) \
EmitRex(destination, 0); \
Emit_##operation##_R(); \
EmitDisplaced(destination, source_displacement)
#define EMIT_R_M(operation, destination, source) \
EmitRex(destination, source); \
Emit_##operation##_R(); \
EmitIndirect(destination, source)
#define EMIT_R_MD1(operation, destination, source, source_displacement) \
EmitRex(destination, source); \
Emit_##operation##_R(); \
EmitIndirectByteDisplaced(destination, source, source_displacement)
#define EMIT_R_MD(operation, destination, source, source_displacement) \
EmitRex(destination, source); \
Emit_##operation##_R(); \
EmitIndirectDisplaced(destination, source, source_displacement)
#define EMIT_R_SIB(operation, destination, source_base, source_scale, source_index) \
EmitRexIndexed(destination, source_base, source_index); \
Emit_##operation##_R(); \
EmitIndirectIndexed(destination, source_base, source_index, source_scale)
#define EMIT_R_SIBD1(operation, destination, source_base, source_scale, source_index, source_displacement) \
EmitRexIndexed(destination, source_base, source_index); \
Emit_##operation##_R(); \
EmitIndirectIndexedByteDisplaced(destination, source_base, source_index, source_scale, source_displacement)
#define EMIT_R_SIBD(operation, destination, source_base, source_scale, source_index, source_displacement) \
EmitRexIndexed(destination, source_base, source_index); \
Emit_##operation##_R(); \
EmitIndirectIndexedDisplaced(destination, source_base, source_index, source_scale, source_displacement)
#define EMIT_M_R(operation, destination, source) \
EmitRex(source, destination); \
Emit_##operation##_M(); \
EmitIndirect(source, destination)
#define EMIT_D_R(operation, destination_displacement, source) \
EmitRex(source, 0); \
Emit_##operation##_M(); \
EmitDisplaced(source, destination_displacement)
#define EMIT_RIPD_R(operation, destination_displacement, source) \
EmitRex(source, 0); \
Emit_##operation##_M(); \
EmitIndirectDisplacedRip(source, destination_displacement);
#define EMIT_MD1_R(operation, destination, destination_displacement, source) \
EmitRex(source, destination); \
Emit_##operation##_M(); \
EmitIndirectByteDisplaced(source, destination, destination_displacement)
#define EMIT_MD_R(operation, destination, destination_displacement, source) \
EmitRex(source, destination); \
Emit_##operation##_M(); \
EmitIndirectDisplaced(source, destination, destination_displacement)
#define EMIT_SIB_R(operation, destination_base, destination_scale, destination_index, source) \
EmitRexIndexed(source, destination_base, destination_index); \
Emit_##operation##_M(); \
EmitIndirectIndexed(source, destination_base, destination_index, destination_scale)
#define EMIT_SIBD1_R(operation, destination_base, destination_scale, destination_index, destination_displacement, source) \
EmitRexIndexed(source, destination_base, destination_index); \
Emit_##operation##_M(); \
EmitIndirectIndexedByteDisplaced(source, destination_base, destination_index, destination_scale, destination_displacement)
#define EMIT_SIBD_R(operation, destination_base, destination_scale, destination_index, destination_displacement, source) \
EmitRexIndexed(source, destination_base, destination_index); \
Emit_##operation##_M(); \
EmitIndirectIndexedDisplaced(source, destination_base, destination_index, destination_scale, destination_displacement)
#define EMIT_R_I(operation, destination, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I(); \
EmitDirect(extension_##operation##_I, destination); \
Emit4(source_immediate)
#define EMIT_R_I1(operation, destination, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I1(); \
EmitDirect(extension_##operation##_I1, destination); \
Emit(source_immediate)
#define EMIT_M_I(operation, destination, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I(); \
EmitIndirect(extension_##operation##_I, destination); \
Emit4(source_immediate)
#define EMIT_D_I(operation, destination_displacement, source_immediate) \
EmitRex(0, 0); \
Emit_##operation##_I(); \
EmitDisplaced(extension_##operation##_I, destination_displacement); \
Emit4(source_immediate)
#define EMIT_RIPD_I(operation, destination_displacement, source_immediate) \
EmitRex(0, 0); \
Emit_##operation##_I(); \
EmitIndirectDisplacedRip(extension_##operation##_I, destination_displacement); \
Emit4(source_immediate)
#define EMIT_MD1_I(operation, destination, destination_displacement, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I(); \
EmitIndirectByteDisplaced(extension_##operation##_I, destination, destination_displacement); \
Emit4(source_immediate)
#define EMIT_MD_I(operation, destination, destination_displacement, source_immediate) \
EmitRex(0, destination); \
Emit_##operation##_I(); \
EmitIndirectDisplaced(extension_##operation##_I, destination, destination_displacement); \
Emit4(source_immediate)
#define EMIT_SIB_I(operation, destination_base, destination_scale, destination_index, source_immediate) \
EmitRexIndexed(0, destination_base, destination_index); \
Emit_##operation##_I(); \
EmitIndirectIndexed(extension_##operation##_I, destination_base, destination_index, destination_scale); \
Emit4(source_immediate)
#define EMIT_SIBD1_I(operation, destination_base, destination_scale, destination_index, destination_displacement, source_immediate) \
EmitRexIndexed(0, destination_base, destination_index); \
Emit_##operation##_I(); \
EmitIndirectIndexedByteDisplaced(extension_##operation##_I, destination_base, destination_index, destination_scale, destination_displacement); \
Emit4(source_immediate)
#define EMIT_SIBD_I(operation, destination_base, destination_scale, destination_index, destination_displacement, source_immediate) \
EmitRexIndexed(0, destination_base, destination_index); \
Emit_##operation##_I(); \
EmitIndirectIndexedDisplaced(extension_##operation##_I, destination_base, destination_index, destination_scale, destination_displacement); \
Emit4(source_immediate)
#define EMIT_X_R(operation, source) \
EmitRex(0, source); \
Emit_##operation##_X(); \
EmitDirect(extension_##operation##_X, source)
#define EMIT_X_RIPD(operation, source_displacement) \
EmitRex(0, 0); \
Emit_##operation##_X(); \
EmitIndirectDisplacedRip(extension_##operation##_X, source_displacement);
#define EMIT_X_D(operation, source_displacement) \
EmitRex(0, 0); \
Emit_##operation##_X(); \
EmitDisplaced(extension_##operation##_X, source_displacement)
#define EMIT_X_M(operation, source) \
EmitRex(0, source); \
Emit_##operation##_X(); \
EmitIndirect(extension_##operation##_X, source)
#define EMIT_X_MD1(operation, source, source_displacement) \
EmitRex(0, source); \
Emit_##operation##_X(); \
EmitIndirectByteDisplaced(extension_##operation##_X, source, source_displacement)
#define EMIT_X_MD(operation, source, source_displacement) \
EmitRex(0, source); \
Emit_##operation##_X(); \
EmitIndirectDisplaced(extension_##operation##_X, source, source_displacement)
#define EMIT_X_SIB(operation, source_base, source_scale, source_index) \
EmitRexIndexed(0, source_base, source_index); \
Emit_##operation##_X(); \
EmitIndirectIndexed(extension_##operation##_X, source_base, source_index, source_scale)
#define EMIT_X_SIBD1(operation, source_base, source_scale, source_index, source_displacement) \
EmitRexIndexed(0, source_base, source_index); \
Emit_##operation##_X(); \
EmitIndirectIndexedByteDisplaced(extension_##operation##_X, source_base, source_index, source_scale, source_displacement)
#define EMIT_X_SIBD(operation, source_base, source_scale, source_index, source_displacement) \
EmitRexIndexed(0, source_base, source_index); \
Emit_##operation##_X(); \
EmitIndirectIndexedDisplaced(extension_##operation##_X, source_base, source_index, source_scale, source_displacement)
#define EMIT_C_I(operation, condition_code, source_immediate) \
Emit_##operation##_C_I(condition_code); \
Emit4(source_immediate)
#define OPERATION_1_R(operation, opcode) \
void Emit_##operation##_R() { \
Emit(opcode); \
}
#define OPERATION_1_M(operation, opcode) \
void Emit_##operation##_M() { \
Emit(opcode); \
}
#define OPERATION_1_I(operation, opcode, extension) \
void Emit_##operation##_I() { \
Emit(opcode); \
} \
enum { extension_##operation##_I = extension };
#define OPERATION_1_I1(operation, opcode, extension) \
void Emit_##operation##_I1() { \
Emit(opcode); \
} \
enum { extension_##operation##_I1 = extension };
#define OPERATION_1_X(operation, opcode, extension) \
void Emit_##operation##_X() { \
Emit(opcode); \
} \
enum { extension_##operation##_X = extension };
#define OPERATION_2_C_I(operation, opcode) \
void Emit_##operation##_C_I(ConditionCode condition_code) { \
Emit(0x0F); \
Emit(opcode + condition_code); \
}
OPERATION_1_R(MOV, 0x8B)
OPERATION_1_M(MOV, 0x89)
OPERATION_1_I(MOVSX, 0xC7, 0x00)
OPERATION_1_I1(SHL, 0xC1, 0x04)
OPERATION_1_I1(SHR, 0xC1, 0x05)
OPERATION_1_R(ADD, 0x03)
OPERATION_1_M(ADD, 0x01)
OPERATION_1_I(ADD, 0x81, 0x00)
OPERATION_1_R(SUB, 0x2B)
OPERATION_1_M(SUB, 0x29)
OPERATION_1_I(SUB, 0x81, 0x05)
OPERATION_1_X(MUL, 0xF7, 0x04)
OPERATION_1_X(DIV, 0xF7, 0x06)
OPERATION_1_I(CMP, 0x81, 0x07)
OPERATION_1_I(JMP, 0xE9, 0x00)
OPERATION_2_C_I(J, 0x80)
// Emitter tests
void Test_Emitter() {
EMIT_MOV64_R_I(RBX, 0x12345678deadbeefull);
EMIT_MOV64_RAX_OFF(0x12345678deadbeefull);
EMIT_MOV64_OFF_RAX(0x12345678deadbeefull);
EMIT_R_R(MOV, RAX, R10);
EMIT_M_R(MOV, RAX, R10);
EMIT_I(JMP, 0x1234);
EMIT_C_I(J, NZ, 0x1234);
EMIT_C_I(J, NZ, 0x1234);
EMIT_C_I(J, NZ, 0x1234);
EMIT_D_I(ADD, 0x1234578, 0xDEADBEEF);
EMIT_RIPD_I(ADD, 0x1234578, 0xDEADBEEF);
for (uint8_t d = RAX; d <= R15; d++) {
Register destination = (Register)d;
EMIT_X_R(MUL, destination);
if ((destination & 7) != RSP) {
EMIT_X_MD1(MUL, destination, 0x12);
EMIT_X_MD(MUL, destination, 0x12345678);
if ((destination & 7) != RBP) {
EMIT_X_M(MUL, destination);
EMIT_X_SIB(MUL, destination, X4, R8);
EMIT_X_SIBD1(MUL, destination, X4, R8, 0x12);
EMIT_X_SIBD(MUL, destination, X4, R8, 0x12345678);
}
}
EMIT_R_I(ADD, destination, 0xDEADBEEF);
if ((destination & 7) != RSP) {
EMIT_MD1_I(ADD, destination, 0x12, 0xDEADBEEF);
EMIT_MD_I(ADD, destination, 0x12345678, 0xDEAFBEEF);
if ((destination & 7) != RBP) {
EMIT_SIB_I(ADD, destination, X4, R8, 0xDEADBEEF);
}
}
EMIT_R_RIPD(ADD, destination, 0x1235678);
EMIT_R_D(ADD, destination, 0x1235678);
EMIT_RIPD_R(ADD, 0x1235678, destination);
EMIT_D_R(ADD, 0x1235678, destination);
for (uint8_t s = RAX; s <= R15; s++) {
Register source = (Register)s;
EMIT_R_R(ADD, destination, source);
if ((source & 7) != RBP) {
EMIT_R_SIB(ADD, destination, source, X4, destination);
EMIT_R_SIBD1(ADD, destination, source, X4, destination, 0x12);
EMIT_R_SIBD(ADD, destination, source, X4, destination, 0x12345678);
}
if ((destination & 7) != RBP) {
EMIT_SIB_R(ADD, destination, X4, source, source);
EMIT_SIBD1_R(ADD, destination, X4, source, 0x12, source);
EMIT_SIBD_R(ADD, destination, X4, source, 0x12345678, source);
}
if ((source & 7) != RSP && (source & 7) != RBP) {
EMIT_R_M(ADD, destination, source);
EMIT_R_MD1(ADD, destination, source, 0x12);
EMIT_R_MD(ADD, destination, source, 0x12345678);
}
if ((destination & 7) != RSP && (destination & 7) != RBP) {
EMIT_M_R(ADD, destination, source);
EMIT_MD1_R(ADD, destination, 0x12, source);
EMIT_MD_R(ADD, destination, 0x12345678, source);
}
if ((source & 7) == RSP) {
EMIT_R_SIB(ADD, destination, source, X1, RSP);
#if 0
EmitRex(destination, source);
EmitAddToRegister();
EmitIndirectIndexed(destination, source, RSP, X1);
#endif
}
}
}
DumpCode("d:\\be\\test.bin");
}
// String table
// TODO: replace with something beter
uint64_t HashString(char *string, size_t length) {
static const uint64_t fnv_prime = 0x100000001b3ull;
static const uint64_t fnv_initializer = 0xcbf29ce484222325ull;
uint64_t hash = fnv_initializer;
for (size_t i = 0; i < length; i++) {
hash ^= string[i];
hash *= fnv_prime;
}
return hash;
}
struct HashedString {
uint64_t hash;
char *string;
};
enum {
MAX_HASHED_STRINGS = 1024,
MAX_STRING_DATA = 1024 * 1024
};
char string_data[MAX_STRING_DATA];
char *next_string_data = string_data;
HashedString hashed_strings[MAX_HASHED_STRINGS];
size_t hashed_strings_count;
char *InternString(char *string, size_t length) {
uint64_t hash = HashString(string, length);
for (size_t i = 0; i < hashed_strings_count; i++) {
if (hashed_strings[i].hash == hash && strncmp(hashed_strings[i].string, string, length) == 0) {
return hashed_strings[i].string;
}
}
Assert(hashed_strings_count < MAX_HASHED_STRINGS);
Assert(next_string_data + length + 1 < string_data + MAX_STRING_DATA);
char *interned_string = next_string_data;
memcpy(interned_string, string, length);
interned_string[length] = 0;
next_string_data += length + 1;
hashed_strings[hashed_strings_count].hash = hash;
hashed_strings[hashed_strings_count].string = interned_string;
hashed_strings_count++;
return interned_string;
}
// Symbol tables
enum TypeKind {
TYPE_UINT64,
TYPE_INT64,
TYPE_UINT32,
TYPE_INT32,
TYPE_UINT8,
TYPE_INT8,
TYPE_ARRAY,
TYPE_AGGREGATE
};
struct Type;
struct Field {
Type *type;
uint32_t offset;
};
struct Type {
TypeKind kind;
uint32_t size;
union {
struct {
Type *element;
size_t length;
} array;
struct {
Field *fields;
size_t length;
} aggregate;
};
};
Type type_uint64 = {TYPE_UINT64, sizeof(uint64_t)};
enum SymbolKind {
SYMBOL_GLOBAL_VARIABLE,
SYMBOL_LOCAL_VARIABLE
};
struct Symbol {
SymbolKind kind;
char *name;
Type *type;
uint32_t offset;
};
enum {
MAX_GLOBAL_VARIABLES = 1024,
MAX_LOCAL_VARIABLES = 1024
};
Symbol global_variables[MAX_GLOBAL_VARIABLES];
size_t global_variables_count;
uint32_t global_variables_offset;
Symbol *PushGlobalVariable(char *name, Type *type) {
Assert(global_variables_count < MAX_GLOBAL_VARIABLES);
Symbol *global_variable = global_variables + global_variables_count;
global_variable->kind = SYMBOL_GLOBAL_VARIABLE;
global_variable->name = name;
global_variable->type = type;
global_variable->offset = global_variables_offset;
global_variables_offset += type->size;
global_variables_count++;
return global_variable;
}
Symbol local_variables[MAX_LOCAL_VARIABLES];
size_t local_variables_count;
uint32_t local_variables_offset;
Symbol *PushLocalVariable(char *name, Type *type) {
Assert(local_variables_count < MAX_LOCAL_VARIABLES);
Symbol *local_variable = local_variables + local_variables_count;
local_variable->kind = SYMBOL_LOCAL_VARIABLE;
local_variable->name = name;
local_variable->type = type;
local_variable->offset = local_variables_offset;
local_variables_offset += type->size;
local_variables_count++;
return local_variable;
}
void PopLocalVariables(size_t new_count) {
Assert(new_count >= 0);
Assert(new_count <= local_variables_count);
if (new_count < local_variables_count) {
local_variables_offset = local_variables[new_count].offset;
}
local_variables_count = new_count;
}
Symbol *FindSymbol(char *name) {
for (int64_t i = local_variables_count - 1; i >= 0; i--) {
if (local_variables[i].name == name) {
return local_variables + i;
}
}
for (int64_t i = global_variables_count; i >= 0; i--) {
if (global_variables[i].name == name) {
return global_variables + i;
}
}
return 0;
}
// Reader
char *string_if;
char *string_else;
char *string_while;
char *string_uint64;
char current_character;
char *remaining_characters;
void ReadCharacter() {
Assert(current_character);
current_character = *remaining_characters;
remaining_characters++;
}
enum {
TOKEN_EOF = 0,
LAST_LITERAL_TOKEN = 127,
TOKEN_INTEGER,
TOKEN_IDENTIFIER,
TOKEN_PRINT,
TOKEN_IF,
TOKEN_ELSE,
TOKEN_WHILE
};
typedef uint8_t Token;
Token current_token;
uint32_t current_token_integer;
char *current_token_identifier;
void ConsumeToken() {
retry:
switch (current_character) {
case 0:
current_token = TOKEN_EOF;
break;
case ' ':
case '\n':
case '\r':
case '\t':
case '\v':
ReadCharacter();
goto retry;
break;
case 'a':
case 'A':
case 'b':
case 'B':
case 'c':
case 'C':
case 'd':
case 'D':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case 'h':
case 'H':
case 'i':
case 'I':
case 'j':
case 'J':
case 'k':
case 'K':
case 'l':
case 'L':
case 'm':
case 'M':
case 'n':
case 'N':
case 'o':
case 'O':
case 'p':
case 'P':
case 'q':
case 'Q':
case 'r':
case 'R':
case 's':
case 'S':
case 't':
case 'T':
case 'u':
case 'U':
case 'v':
case 'V':
case 'w':
case 'W':
case 'x':
case 'X':
case 'y':
case 'Y':
case 'z':
case 'Z': {
char *start = remaining_characters - 1;
ReadCharacter();
while (isalnum(current_character) || current_character == '_') {
ReadCharacter();
}
char *end = remaining_characters - 1;
char *string = InternString(start, end - start);
if (string == string_if) {
current_token = TOKEN_IF;
} else if (string == string_else) {
current_token = TOKEN_ELSE;
} else if (string == string_while) {
current_token = TOKEN_WHILE;
} else {
current_token = TOKEN_IDENTIFIER;
current_token_identifier = string;
}
break;
}
case '=':
case ';':
case ':':
case '{':
case '}':
case '(':
case ')':
case '+':
case '-':
case '*':
case '/':
case '^':
current_token = (Token)current_character;
ReadCharacter();
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
current_token = TOKEN_INTEGER;
current_token_integer = current_character - '0';
ReadCharacter();
while (isdigit(current_character)) {
current_token_integer *= 10;
current_token_integer += current_character - '0';
ReadCharacter();
}
break;
default:
Assert(0);
}
}
void ExpectToken(Token expected_token) {
Assert(current_token == expected_token);
ConsumeToken();
}
uint32_t free_register_mask;
void InitializeFreeRegisters() {
Register available_registers[] = {RCX, RBX, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15};
for (size_t i = 0; i < sizeof(available_registers) / sizeof(*available_registers); i++) {
free_register_mask |= 1 << available_registers[i];
}
}
Register AllocateRegister() {
Assert(free_register_mask != 0);
DWORD free_register;
_BitScanForward(&free_register,free_register_mask);
free_register_mask &= ~(1 << free_register);
return (Register)free_register;
}
void FreeRegister(Register allocated_register) {
Assert((free_register_mask & (1 << allocated_register)) == 0);
free_register_mask |= 1 << allocated_register;
}
enum OperandKind {
OPERAND_NULL,
OPERAND_FRAME_OFFSET,
OPERAND_REGISTER,
OPERAND_IMMEDIATE,
OPERAND_ADDRESS
};
struct Operand {
OperandKind kind;
Type *type;
union {
Register operand_register;
uint32_t operand_immediate;
uint32_t operand_frame_offset;
uint32_t operand_address;
};
};
void AllocateOperandRegister(Operand *operand) {
if (operand->kind != OPERAND_REGISTER) {
operand->kind = OPERAND_REGISTER;
operand->operand_register = AllocateRegister();
}
}
void StealOperandRegister(Operand *destination, Operand *source) {
Assert(source->kind == OPERAND_REGISTER);
destination->kind = OPERAND_REGISTER;
destination->operand_register = source->operand_register;
source->kind = OPERAND_NULL;
}
void FreeOperand(Operand *operand) {
if (operand->kind == OPERAND_REGISTER) {
FreeRegister(operand->operand_register);
}
operand->kind = OPERAND_NULL;
}
void PatchWithOperandAddress(uint32_t offset, Operand *operand) {
Assert(operand->kind == OPERAND_ADDRESS);
*(int32_t *)(next_code - 4 * offset) = (int32_t)(code - next_code - 8) - (uint32_t)operand->operand_address;
}
void EmitOperandToRegister(Operand *operand, Register target_register) {
if (operand->kind == OPERAND_IMMEDIATE) {
EMIT_R_I(MOVSX, target_register, operand->operand_immediate);
} else if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_R_MD1(MOV, target_register, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_R_RIPD(MOV, target_register, 0);
PatchWithOperandAddress(1, operand);
} else if (operand->kind == OPERAND_REGISTER) {
if (operand->operand_register != target_register) {
EMIT_R_R(MOV, target_register, operand->operand_register);
}
}
}
void EnsureOperandHasRegister(Operand *operand) {
if (operand->kind != OPERAND_REGISTER) {
Register operand_register = AllocateRegister();
EmitOperandToRegister(operand, operand_register);
operand->kind = OPERAND_REGISTER;
operand->operand_register = operand_register;
}
}
void ParseExpression(Operand *destination);
void ParseAtom(Operand *destination) {
if (current_token == TOKEN_IDENTIFIER) {
Symbol *symbol = FindSymbol(current_token_identifier);
Assert(symbol);
ConsumeToken();
if (symbol->kind == SYMBOL_LOCAL_VARIABLE) {
destination->kind = OPERAND_FRAME_OFFSET;
destination->operand_frame_offset = symbol->offset;
} else {
destination->kind = OPERAND_ADDRESS;
destination->operand_address = symbol->offset;
}
} else if (current_token == TOKEN_INTEGER) {
ConsumeToken();
destination->kind = OPERAND_IMMEDIATE;
destination->operand_immediate = current_token_integer;
} else if (current_token == '(') {
ConsumeToken();
ParseExpression(destination);
ExpectToken(')');
} else {
Assert(0);
}
}
void EmitMultiply(Operand *destination, Operand *operand) {
if (destination->kind == OPERAND_IMMEDIATE && operand->kind == OPERAND_IMMEDIATE) {
destination->operand_immediate *= operand->operand_immediate;
} else if (operand->kind == OPERAND_IMMEDIATE && IsPowerOfTwo(operand->operand_immediate)) {
EnsureOperandHasRegister(destination);
EMIT_R_I1(SHL, destination->operand_register, Log2(operand->operand_immediate));
} else if (destination->kind == OPERAND_IMMEDIATE && IsPowerOfTwo(destination->operand_immediate)) {
EnsureOperandHasRegister(operand);
EMIT_R_I1(SHL, operand->operand_register, Log2(destination->operand_immediate));
StealOperandRegister(destination, operand);
} else {
EmitOperandToRegister(destination, RAX);
if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_X_MD1(MUL, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_X_RIPD(MUL, 0);
PatchWithOperandAddress(1, operand);
} else {
EnsureOperandHasRegister(operand);
EMIT_X_R(MUL, operand->operand_register);
}
AllocateOperandRegister(destination);
EMIT_R_R(MOV, destination->operand_register, RAX);
}
}
void EmitDivide(Operand *destination, Operand *operand) {
if (destination->kind == OPERAND_IMMEDIATE && operand->kind == OPERAND_IMMEDIATE) {
destination->operand_immediate /= operand->operand_immediate;
} else if (operand->kind == OPERAND_IMMEDIATE && IsPowerOfTwo(operand->operand_immediate)) {
EnsureOperandHasRegister(destination);
EMIT_R_I1(SHR, destination->operand_register, Log2(operand->operand_immediate));
} else {
EmitOperandToRegister(destination, RAX);
if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_X_MD1(DIV, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_X_RIPD(DIV, 0);
PatchWithOperandAddress(1, operand);
} else {
EnsureOperandHasRegister(operand);
EMIT_X_R(DIV, operand->operand_register);
}
AllocateOperandRegister(destination);
EMIT_R_R(MOV, destination->operand_register, RAX);
}
}
void ParseTerm(Operand *destination) {
ParseAtom(destination);
while (current_token == '*' || current_token == '/') {
Token operator_token = current_token;
ConsumeToken();
Operand operand;
ParseTerm(&operand);
if (operator_token == '*') {
EmitMultiply(destination, &operand);
} else {
EmitDivide(destination, &operand);
}
FreeOperand(&operand);
}
}
void EmitAdd(Operand *destination, Operand *operand) {
if (destination->kind == OPERAND_IMMEDIATE && operand->kind == OPERAND_IMMEDIATE) {
destination->operand_immediate += operand->operand_immediate;
} else if (destination->kind == OPERAND_IMMEDIATE) {
EnsureOperandHasRegister(operand);
EMIT_R_I(ADD, operand->operand_register, destination->operand_immediate);
destination->kind = OPERAND_REGISTER;
destination->operand_register = operand->operand_register;
} else {
EnsureOperandHasRegister(destination);
if (operand->kind == OPERAND_IMMEDIATE) {
EMIT_R_I(ADD, destination->operand_register, operand->operand_immediate);
} else if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_R_MD1(ADD, destination->operand_register, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_R_RIPD(ADD, destination->operand_register, 0);
PatchWithOperandAddress(1, operand);
} else {
Assert(operand->kind == OPERAND_REGISTER);
EMIT_R_R(ADD, destination->operand_register, operand->operand_register);
}
}
}
void EmitSubtract(Operand *destination, Operand *operand) {
if (destination->kind == OPERAND_IMMEDIATE && operand->kind == OPERAND_IMMEDIATE) {
destination->operand_immediate -= operand->operand_immediate;
} else if (destination->kind == OPERAND_IMMEDIATE) {
EnsureOperandHasRegister(operand);
EMIT_R_I(ADD, operand->operand_register, -destination->operand_immediate);
destination->kind = OPERAND_REGISTER;
destination->operand_register = operand->operand_register;
} else {
EnsureOperandHasRegister(destination);
if (operand->kind == OPERAND_IMMEDIATE) {
EMIT_R_I(SUB, destination->operand_register, operand->operand_immediate);
} else if (operand->kind == OPERAND_FRAME_OFFSET) {
Assert(operand->operand_frame_offset <= INT8_MAX);
EMIT_R_MD1(SUB, destination->operand_register, RBP, operand->operand_frame_offset);
} else if (operand->kind == OPERAND_ADDRESS) {
EMIT_R_RIPD(SUB, destination->operand_register, 0);
PatchWithOperandAddress(1, operand);
} else {
Assert(operand->kind == OPERAND_REGISTER);
EMIT_R_R(SUB, destination->operand_register, operand->operand_register);
}
}
}
void ParseExpression(Operand *destination) {
ParseTerm(destination);
while (current_token == '+' || current_token == '-') {
Token operator_token = current_token;
ConsumeToken();
Operand operand;
ParseTerm(&operand);
if (operator_token == '+') {
EmitAdd(destination, &operand);
} else {
EmitSubtract(destination, &operand);
}
FreeOperand(&operand);
}
}
Type *ParseType() {
if (current_token == TOKEN_IDENTIFIER) {
if (current_token_identifier == string_uint64) {
ConsumeToken();
return &type_uint64;
}
}
return 0;
}
void ParseStatementBlock();
void ParseStatement() {
if (current_token == TOKEN_PRINT) {
ConsumeToken();
Operand operand;
ParseExpression(&operand);
// printf("%d", rcx)
// rcx = ["%d"], rdx = operand result
FreeOperand(&operand);
ExpectToken(';');
} else if (current_token == TOKEN_IF) {
ConsumeToken();
ExpectToken('(');
Operand operand;
ParseExpression(&operand);
ExpectToken(')');
EnsureOperandHasRegister(&operand);
EMIT_R_I(CMP, operand.operand_register, 0);
EMIT_C_I(J, Z, 0);
uint8_t *end_of_if_jump = next_code;
uint8_t *if_jump_immediate_address = next_code - 4;
FreeOperand(&operand);
ParseStatementBlock();
if (current_token == TOKEN_ELSE) {
ConsumeToken();
EMIT_I(JMP, 0);
*(uint32_t *)if_jump_immediate_address = (uint32_t)(next_code - end_of_if_jump);
uint8_t *end_of_else_jump = next_code;
uint8_t *else_jump_immediate_address = next_code - 4;
ParseStatementBlock();
*(uint32_t *)else_jump_immediate_address = (uint32_t)(next_code - end_of_else_jump);
} else {
*(uint32_t *)if_jump_immediate_address = (uint32_t)(next_code - end_of_if_jump);
}
} else if (current_token == TOKEN_WHILE) {
ConsumeToken();
uint8_t *start_of_while = next_code;
ExpectToken('(');
Operand operand;
ParseExpression(&operand);
ExpectToken(')');
EnsureOperandHasRegister(&operand);
EMIT_R_I(CMP, operand.operand_register, 0);
EMIT_C_I(J, Z, 0);
uint8_t *end_of_while_jump = next_code;
uint8_t *while_jump_immediate_address = next_code - 4;
FreeOperand(&operand);
ParseStatementBlock();
EMIT_I(JMP, 0);
*(uint32_t *)(next_code - 4) = (int32_t)(start_of_while - next_code);
*(uint32_t *)while_jump_immediate_address = (uint32_t)(next_code - end_of_while_jump);
} else if (current_token == TOKEN_IDENTIFIER) {
char *identifier = current_token_identifier;
ConsumeToken();
if (current_token == '=') {
ConsumeToken();
Symbol *symbol = FindSymbol(identifier);
Assert(symbol);
if (symbol->kind == SYMBOL_LOCAL_VARIABLE) {
uint32_t local_variable_offset = symbol->offset;
Operand operand;
ParseExpression(&operand);
if (operand.kind == OPERAND_IMMEDIATE) {
EMIT_MD1_I(MOVSX, RBP, local_variable_offset, operand.operand_immediate);
} else {
EnsureOperandHasRegister(&operand);
EMIT_MD1_R(MOV, RBP, local_variable_offset, operand.operand_register);
}
FreeOperand(&operand);
} else {
Assert(symbol->kind == SYMBOL_GLOBAL_VARIABLE);
Operand variable;
variable.kind = OPERAND_ADDRESS;
variable.operand_address = symbol->offset;
Operand operand;
ParseExpression(&operand);
if (operand.kind == OPERAND_IMMEDIATE) {
EMIT_RIPD_I(MOVSX, 0, operand.operand_immediate);
PatchWithOperandAddress(2, &variable);
} else {
EnsureOperandHasRegister(&operand);
EMIT_RIPD_R(MOV, 0, operand.operand_register);
PatchWithOperandAddress(1, &variable);
}
FreeOperand(&operand);
}
} else {
ExpectToken(':');
Type *type = ParseType();
PushLocalVariable(identifier, type);
}
ExpectToken(';');
} else {
Assert(0);
}
}
void ParseStatementBlock() {
uint64_t initial_local_variables_count = local_variables_count;
ExpectToken('{');
while (current_token != '}') {
ParseStatement();
}
ConsumeToken();
PopLocalVariables(initial_local_variables_count);
}
void ParseModule() {
while (current_token != '{') {
ExpectToken(TOKEN_IDENTIFIER);
char *identifier = current_token_identifier;
ExpectToken(':');
Type *type = ParseType();
PushGlobalVariable(identifier, type);
ExpectToken(';');
}
EMIT_MOV64_R_I(RBP, (uint64_t)data);
ParseStatementBlock();
}
void OpenFileForParsing(const char *filename) {
HANDLE file = CreateFileA(filename, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
Assert(file != INVALID_HANDLE_VALUE);
DWORD file_size = GetFileSize(file, 0);
HANDLE file_mapping = CreateFileMappingA(file, 0, PAGE_WRITECOPY, 0, 0, 0);
Assert(file_mapping);
char *file_memory = (char *)MapViewOfFileEx(file_mapping, FILE_MAP_COPY, 0, 0, 0, 0);
Assert(file_memory);
Assert(file_size > 0);
Assert(file_memory[file_size - 1] == '\n');
file_memory[file_size - 1] = 0;
current_character = *file_memory;
remaining_characters = file_memory + 1;
ConsumeToken();
}
void InitializeCompiler() {
data = (uint8_t *)VirtualAlloc(0, 1024 * 1024 * 1024, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
code = data + 4096;
next_code = code;
InitializeFreeRegisters();
string_if = InternString("if", 2);
string_else = InternString("else", 4);
string_while = InternString("while", 5);
string_uint64 = InternString("uint64", 6);
}
void CompileFile(char *source_filename, char *binary_filename) {
LARGE_INTEGER timer_frequency;
QueryPerformanceFrequency(&timer_frequency);
LARGE_INTEGER start_time;
QueryPerformanceCounter(&start_time);
OpenFileForParsing(source_filename);
ParseModule();
LARGE_INTEGER end_time;
QueryPerformanceCounter(&end_time);
uint64_t duration = end_time.QuadPart - start_time.QuadPart;
char temp[32];
sprintf(temp, "Time: %.2fs\n", (float)duration / (float)timer_frequency.QuadPart);
OutputDebugStringA(temp);
DumpCode(binary_filename);
}
int main(int argc, char **argv) {
InitializeCompiler();
// CompileFile("d:\\be\\fib.be", "d:\\be\\fib.bin");
CompileFile("d:\\be\\huge_fib.be", "d:\\be\\huge_fib.bin");
// CompileFile("d:\\be\\test.be", "d:\\be\\test.bin");
// CompileFile("d:\\be\\huge.be", "d:\\be\\huge.bin");
#if 0
void (*dummy_function)() = (void(*)())code;
dummy_function();
#endif
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment