Skip to content

Instantly share code, notes, and snippets.

@Papierkorb
Last active April 14, 2018 14:19
Show Gist options
  • Save Papierkorb/aceccb75c1cd949daabfa6ad5e7b5a96 to your computer and use it in GitHub Desktop.
Save Papierkorb/aceccb75c1cd949daabfa6ad5e7b5a96 to your computer and use it in GitHub Desktop.
The humble beginnings of an AMD64 JIT (Without any libraries)
/* Compile: $ g++ -std=c++11 -o amd64 amd64.cpp */
#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <cstring>
#include <sys/mman.h>
// Helper enums so we don't have to poke in bytes directly.
// You can find the documentation of all of these in the *free* AMD64 specification:
// "AMD64 Architecture Programmer's Manual, Volume 3, General-Purpose and System Instructions"
enum class RexField : uint8_t {
/** If set, operand is 64-Bit */
W = 1 << 3,
R = 1 << 2,
X = 1 << 1,
B = 1 << 0,
Prefix = 0x40,
};
enum Opcode {
REX_W = static_cast<uint8_t>(RexField::Prefix) | static_cast<uint8_t>(RexField::W),
REX_R = static_cast<uint8_t>(RexField::Prefix) | static_cast<uint8_t>(RexField::R),
INC_RegMem64 = 0xFF, // /0
MOV_RegMem64_Reg64 = 0x89, // /r
RET = 0xC3,
};
enum Register {
RAX = 0, /* Result, Caller */
RBX = 3, /* Calee */
RCX = 1, /* 4th Arg, Caller */
RDX = 2, /* 3rd Arg, Caller */
RDI = 7, /* 1st Arg, Caller */
RSI = 6, /* 2nd Arg, Caller */
R8 = 0, /* 5th Arg, Caller, REX:B */
R9 = 1, /* 6th Arg, Caller, REX:B */
R10 = 2, /* Caller, REX:B */
R11 = 3, /* Caller, REX:B */
R12 = 4, /* Callee, REX:B */
R13 = 5, /* Callee, REX:B */
R14 = 6, /* Callee, REX:B */
R15 = 7, /* Callee, REX:B */
};
static constexpr uint8_t modrm(uint8_t mod, uint8_t reg, uint8_t rm) {
// 7 6 5 4 3 2 1 0 Bits
// [mod][ reg ][ r/m ]
return ((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7);
}
static constexpr uint8_t reg2reg(Register source, Register target)
{ return modrm(0b11, static_cast<uint8_t>(source), static_cast<uint8_t>(target)); }
static constexpr uint8_t reg(uint8_t group, Register target)
{ return modrm(0b11, group, static_cast<uint8_t>(target)); }
int main() {
// Static function shellcode. If you can't find the right opcodes easily,
// here's a trick:
// 1. Write a `foo.as` file with assembler instructions for GAS ("GNU Assembler")
// 2. Assemble and then disassemble it:
// $ as -o foo foo.as && objdump -S foo
const uint8_t shellcode[] = {
REX_W, INC_RegMem64, reg(0, RDI), // [REX:W] INC/0 %rdi
REX_W, MOV_RegMem64_Reg64, reg2reg(RDI, RAX), // [REX:W] MOV/r %rdi, %rax
RET, // RETQ
};
// Acquire RWX-able memory ("Don't try this at home")
// In a proper JIT you would mmap() the memory RX or RW only, but *never* RWX.
// To switch between RX and RW use mprotect().
//
// Before writing to the memory you'd mprotect(RW) it, and before calling,
// you'd mprotect(RX) it. This can get tricky with multi-threading!
uint8_t *ptr = (uint8_t *)mmap(NULL, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
// ^ "Place it anywhere" ^ Just private memory
// ^ We want a page of memory From no backing file ^
// ^ Make it readable, writable AND executable No offset either ^
printf("ptr = %p\n");
// It's always a good idea to check for errors. This example could e.g. fail
// if your system rejects requests for RWX memory!
if (ptr == MAP_FAILED) {
perror("mmap");
abort();
}
// Copy shellcode
memcpy(ptr, shellcode, sizeof(shellcode));
// Cast to function pointer
using Func = uint64_t(*)(uint64_t); // `uint64_t func(uint64_t v)`
Func inc1 = reinterpret_cast<Func>(ptr);
// Call!
printf("inc1(5) = %d\n", inc1(5));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment