Skip to content

Instantly share code, notes, and snippets.

@RealNeGate
Last active February 15, 2024 10:59
Show Gist options
  • Save RealNeGate/428a274496ce9852e06f9db1852b6cc2 to your computer and use it in GitHub Desktop.
Save RealNeGate/428a274496ce9852e06f9db1852b6cc2 to your computer and use it in GitHub Desktop.
// I tried not doing anything too non-portable so it should be possible to run
// this on Mac or Linux... probably... even then, you can't use the obj files there
//
// once you have the obj file you should be able to do:
// link YOUROBJ.obj /defaultlib:libcmt
// ^^^^^^^^^^^^^^^^^^
// linking against crt
#define _CRT_SECURE_NO_WARNINGS
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define STB_DS_IMPLEMENTATION
#include "stb_ds.h"
#define DynArray(T) T*
// IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ | IMAGE_SCN_ALIGN_16BYTES
#define COFF_CHARACTERISTICS_TEXT 0x60500020u
// IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_WRITE | IMAGE_SCN_MEM_READ
#define COFF_CHARACTERISTICS_DATA 0xC0000040u
// IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ
#define COFF_CHARACTERISTICS_RODATA 0x40000040u
// IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_WRITE | IMAGE_SCN_MEM_READ | IMAGE_SCN_ALIGN_16BYTES
#define COFF_CHARACTERISTICS_BSS 0xC0500080u
// IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_8BYTES | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_DISCARDABLE
#define COFF_CHARACTERISTICS_CV 0x42100040u
#define IMAGE_SYM_CLASS_EXTERNAL 0x0002
#define IMAGE_SYM_CLASS_STATIC 0x0003
#define IMAGE_SYM_CLASS_LABEL 0x0006
#define IMAGE_SYM_CLASS_FILE 0x0067
#define IMAGE_FILE_LINE_NUMS_STRIPPED 0x0004
#define IMAGE_REL_AMD64_ADDR64 0x0001
#define IMAGE_REL_AMD64_ADDR32 0x0002
#define IMAGE_REL_AMD64_ADDR32NB 0x0003
#define IMAGE_REL_AMD64_REL32 0x0004
#define IMAGE_REL_AMD64_SECTION 0x000A
#define IMAGE_REL_AMD64_SECREL 0x000B
#define MD5_HASHBYTES 16
typedef struct COFF_SectionHeader {
char name[8];
union {
uint32_t physical_address;
uint32_t virtual_size;
} misc;
uint32_t virtual_address;
uint32_t raw_data_size;
uint32_t raw_data_pos;
uint32_t pointer_to_reloc;
uint32_t pointer_to_lineno;
uint16_t num_reloc;
uint16_t num_lineno;
uint32_t characteristics;
} COFF_SectionHeader;
static_assert(sizeof(COFF_SectionHeader) == 40, "COFF Section header size != 40 bytes");
typedef struct COFF_FileHeader {
uint16_t machine;
uint16_t num_sections;
uint32_t timestamp;
uint32_t symbol_table;
uint32_t symbol_count;
uint16_t optional_header_size;
uint16_t characteristics;
} COFF_FileHeader;
static_assert(sizeof(COFF_FileHeader) == 20, "COFF File header size != 20 bytes");
// NOTE: Symbols, relocations, and line numbers are 2 byte packed
#pragma pack(push,2)
typedef struct COFF_ImageReloc {
union {
uint32_t VirtualAddress;
uint32_t RelocCount;
};
uint32_t SymbolTableIndex;
uint16_t Type;
} COFF_ImageReloc;
static_assert(sizeof(COFF_ImageReloc) == 10, "COFF Image Relocation size != 10 bytes");
typedef struct COFF_Symbol {
union {
uint8_t short_name[8];
uint32_t long_name[2];
};
uint32_t value;
int16_t section_number;
uint16_t type;
uint8_t storage_class;
uint8_t aux_symbols_count;
} COFF_Symbol;
static_assert(sizeof(COFF_Symbol) == 18, "COFF Symbol size != 18 bytes");
typedef struct COFF_AuxSectionSymbol {
uint32_t length; // section length
uint16_t reloc_count; // number of relocation entries
uint16_t lineno_count; // number of line numbers
uint32_t checksum; // checksum for communal
int16_t number; // section number to associate with
uint8_t selection; // communal selection type
uint8_t reserved;
int16_t high_bits; // high bits of the section number
} COFF_AuxSectionSymbol;
static_assert(sizeof(COFF_AuxSectionSymbol) == 18, "COFF Aux Section Symbol size != 18 bytes");
#pragma pack(pop)
enum {
COFF_MACHINE_AMD64 = 0x8664, // AMD64 (K8)
COFF_MACHINE_ARM64 = 0xAA64, // ARM64 Little-Endian
};
typedef int SymbolTableIndex;
typedef struct {
const char* name;
uint32_t characteristics;
uint32_t relocation_pos;
DynArray(COFF_ImageReloc) relocations;
uint32_t raw_data_pos;
uint32_t raw_data_size;
const uint8_t* raw_data;
} Section;
typedef struct {
const char* name;
uint32_t value;
uint32_t section_num;
uint32_t storage_class;
} Symbol;
static DynArray(Symbol) symbols;
static DynArray(Section) section_headers;
static void add_reloc(Section* s, const COFF_ImageReloc rel) {
arrput(s->relocations, rel);
}
static void add_symbol(const Symbol sym) {
arrput(symbols, sym);
}
// this small .text has some simple relocations to compile an extended
// hello world program
static SymbolTableIndex generate_text_section(int rdata_section_num, int normal_symbol_base) {
static const uint8_t contents[] = {
0x48, 0x8D, 0x0D, 0x05, 0x00, 0x00, 0x00, // lea rcx, [.rdata + 5]
0xBA, 0x2A, 0x00, 0x00, 0x00, // mov rdx, 42
0xE9, 0x00, 0x00, 0x00, 0x00, // jmp printf
};
Section s = {
.name = ".text",
.characteristics = COFF_CHARACTERISTICS_TEXT,
.raw_data = contents,
.raw_data_size = sizeof(contents)
};
// Apply a relocation to the Hello string
add_reloc(&s, (COFF_ImageReloc){
.Type = IMAGE_REL_AMD64_REL32,
.SymbolTableIndex = rdata_section_num,
// This refers to the spot within the text section that
// the relocation will happen to, relocations add onto
// the value that's already there
.VirtualAddress = 3
});
add_reloc(&s, (COFF_ImageReloc){
.Type = IMAGE_REL_AMD64_REL32,
// we're referring to the printf external symbol
.SymbolTableIndex = normal_symbol_base+1,
// this is the CALL instruction's rip relative address
.VirtualAddress = 13
});
arrput(section_headers, s);
// it's zero based and there's two symbols per section
SymbolTableIndex text_section = (arrlen(section_headers)-1)*2;
// there's a distinction between symbol table indices and
// section numbers, i explain the section headers in the symbol
// output code at the bottom
int text_section_num = arrlen(section_headers);
// this is symbol normal_symbol_base+0
add_symbol((Symbol) {
.name = "main",
// the value means the byte offset at which the
// function is found in the text section
.value = 0,
.section_num = text_section_num,
// the external storage means it's visible beyond this TU
.storage_class = IMAGE_SYM_CLASS_EXTERNAL
});
// this is symbol normal_symbol_base+1
add_symbol((Symbol) {
.name = "printf",
// this symbol is importing something that's defined
// elsewhere, section_number = 0 means it's not bound
// to any section here
.value = 0,
.section_num = 0,
// the external storage means it's visible beyond this TU
.storage_class = IMAGE_SYM_CLASS_EXTERNAL
});
return text_section;
}
static SymbolTableIndex generate_rdata_section() {
// we put some extra bytes at the start so we can show
// what relocations look like offseted a bit
static const uint8_t contents[] = "_x_x_Hello, Agent %d!";
Section s = {
.name = ".rdata",
.characteristics = COFF_CHARACTERISTICS_RODATA,
.raw_data = contents,
.raw_data_size = sizeof(contents)
};
arrput(section_headers, s);
// it's zero based and there's two symbols per section
return (arrlen(section_headers)-1)*2;
}
int main(int argc, char** argv) {
if (argc < 2) {
printf("Expected output path for .obj file\n");
return 1;
}
FILE* file = fopen(argv[1], "wb");
if (!file) {
printf("Could not open '%s' for writing\n", argv[1]);
return 1;
}
// Assemble some machine code and data
int normal_symbol_base = 4;
int rdata_section_num = generate_rdata_section();
int text_section_num = generate_text_section(rdata_section_num, normal_symbol_base);
// Convert our abstraction over COFF into file contents
size_t section_count = arrlen(section_headers);
// normal symbols like functions and imports start right after our section symbols
// i dont think this is necessarily a rule more like a convention
assert(section_count * 2 == normal_symbol_base);
// The file header is at the start and just gives some basic
// data on where the important tables are
COFF_FileHeader header = {
.num_sections = section_count,
.timestamp = time(NULL),
// in this example the machine code is x64 but you can find the
// table for these values online at:
.machine = COFF_MACHINE_AMD64,
// there's 2 symbols per section (the auxillary symbol counts)
.symbol_count = (2 * section_count) + arrlen(symbols),
// we fill in this value later on
.symbol_table = 0,
.characteristics = IMAGE_FILE_LINE_NUMS_STRIPPED
};
// layout
uint32_t string_table_pos;
{
size_t pos = sizeof(COFF_FileHeader) + (section_count * sizeof(COFF_SectionHeader));
// raw data
for (size_t i = 0; i < section_count; i++) {
section_headers[i].raw_data_pos = pos;
pos += section_headers[i].raw_data_size;
}
// relocations
for (size_t i = 0; i < section_count; i++) {
// if there's no relocations you can just leave the relocation
// pos as 0, it doesn't matter
section_headers[i].relocation_pos = pos;
pos += arrlen(section_headers[i].relocations) * sizeof(COFF_ImageReloc);
}
// we'll place the symbol table at the end, directly after the symbol
// table is the string table which is where longer symbol names will
// be placed
header.symbol_table = pos;
string_table_pos = pos + (header.symbol_count * sizeof(COFF_Symbol));
}
// write the COFF headers
fwrite(&header, sizeof(header), 1, file);
for (size_t i = 0; i < section_count; i++) {
COFF_SectionHeader sec = {
.characteristics = section_headers[i].characteristics,
.raw_data_size = section_headers[i].raw_data_size,
.raw_data_pos = section_headers[i].raw_data_pos,
.num_reloc = arrlen(section_headers[i].relocations),
.pointer_to_reloc = section_headers[i].relocation_pos
};
// We just truncate the longer section names here, doesn't
// matter here since all the names are small like .text
assert(strlen(section_headers[i].name) < 8);
strncpy(sec.name, section_headers[i].name, 8);
sec.name[8 - 1] = 0;
fwrite(&sec, sizeof(sec), 1, file);
}
// write out raw data
for (size_t i = 0; i < section_count; i++) {
assert(ftell(file) == section_headers[i].raw_data_pos);
fwrite(section_headers[i].raw_data, section_headers[i].raw_data_size, 1, file);
}
// relocations
for (size_t i = 0; i < section_count; i++) {
assert(ftell(file) == section_headers[i].relocation_pos);
fwrite(section_headers[i].relocations, arrlen(section_headers[i].relocations), sizeof(COFF_ImageReloc), file);
}
assert(ftell(file) == header.symbol_table);
for (size_t i = 0; i < section_count; i++) {
// section number 0 is kinda a NULL section so we skip it when
// labeling our sections, relocations and symbols use this number
// to refer to the sections so we wanna be consistent
int section_number = i+1;
COFF_Symbol sym = {
.section_number = section_number,
// section symbols have static storage because
// every separate translation unit will have their
// own separate copy
.storage_class = IMAGE_SYM_CLASS_STATIC,
// auxillary symbols just add on to the data these symbols
// tell us, in this case since it's a section we wanna add
// extra data that says how many relocations and how big
// the section itself is
.aux_symbols_count = 1
};
// Same thing as before, we dont care that it truncates, we'll assert
// but beyond that it shouldn't actually matter
assert(strlen(section_headers[i].name) < 8);
strncpy((char*) sym.short_name, section_headers[i].name, 8);
sym.short_name[8 - 1] = 0;
fwrite(&sym, sizeof(sym), 1, file);
// Write the auxillary section symbol
COFF_AuxSectionSymbol aux = {
.length = section_headers[i].raw_data_size,
.reloc_count = arrlen(section_headers[i].relocations),
// for odd reasons the section number is duplicated in
// the symbol and the aux
.number = section_number
};
fwrite(&aux, sizeof(aux), 1, file);
}
// this is where our normal looking symbols go, imports,
// functions, globals. symbols here can be longer than 8
// characters which means that we use the long name format
// and place the actual string into the string table
uint32_t string_table_mark = 4;
DynArray(char*) string_table = NULL;
size_t symbol_count = arrlen(symbols);
for (size_t i = 0; i < symbol_count; i++) {
COFF_Symbol sym = {
.value = symbols[i].value,
.section_number = symbols[i].section_num,
.storage_class = symbols[i].storage_class
};
const char* name = symbols[i].name;
size_t name_len = strlen(name);
if (name_len >= 8) {
sym.long_name[0] = 0; // this value is 0 for the long names
sym.long_name[1] = string_table_mark; // and this is the position in the string table
// allocate some space in the string table
arrput(string_table, (char*) name);
string_table_mark += name_len + 1;
} else {
memcpy(sym.short_name, name, name_len + 1);
}
fwrite(&sym, sizeof(sym), 1, file);
}
// String table
// First 4 bytes are the size of the string table, then
// it's all just null terminated strings
assert(ftell(file) == string_table_pos);
fwrite(&string_table_mark, sizeof(string_table_mark), 1, file);
size_t string_table_count = arrlen(string_table);
for (size_t i = 0; i < string_table_count; i++) {
size_t len = strlen(string_table[i]) + 1;
fwrite(string_table[i], len, 1, file);
}
fclose(file);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment