Skip to content

Instantly share code, notes, and snippets.

@zevweiss

zevweiss/think.c Secret

Created November 16, 2024 20:24
Show Gist options
  • Save zevweiss/b9da17b661e35c4cabab2cc6e8330df8 to your computer and use it in GitHub Desktop.
Save zevweiss/b9da17b661e35c4cabab2cc6e8330df8 to your computer and use it in GitHub Desktop.
#include <sys/mman.h>
#include "common.h"
#include "think.h"
/*
* insn think params:
* - total bytes
* - nop size
* - number of nops per block
* - -1 ~> "all"
* - 0 ~> "jmp -> jmp -> jmp..."
* - 1 ~> "nop jmp -> nop jmp -> nop jmp..."
*/
#if !defined(__x86_64__) && !defined(__i386__)
#error Only supported on x86
#endif
/* Source: http://www.felixcloutier.com/x86/NOP.html */
static const char* nop_bytes[] = {
[1] = "\x90",
[2] = "\x66\x90",
[3] = "\x0f\x1f\x00",
[4] = "\x0f\x1f\x40\x00",
[5] = "\x0f\x1f\x44\x00\x00",
[6] = "\x66\x0f\x1f\x44\x00\x00",
[7] = "\x0f\x1f\x80\x00\x00\x00\x00",
[8] = "\x0f\x1f\x84\x00\x00\x00\x00\x00",
[9] = "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
};
/* Size of a reasonable-range jump instruction */
#define JMPSIZE 5
/* And the opcode thereof */
#define JMPCODE 0xe9
/* int3 opcode as a filler byte */
#define TRAPCODE 0xcc
#define CACHELINE_SIZE 64
union cacheline {
union cacheline* next;
char pad[CACHELINE_SIZE];
};
/*
* Return a randomly-ordered malloc()ed array of N unsigned longs, with each
* value in [0, N) appearing once.
*/
static unsigned long* shuffled_range(unsigned long n)
{
unsigned long i, j, tmp;
unsigned long* seq = xmalloc(n * sizeof(*seq));
for (i = 0; i < n; i++)
seq[i] = i;
for (i = 0; n && i < n - 1; i++) {
j = i + (random() % (n - i));
tmp = seq[i];
seq[i] = seq[j];
seq[j] = tmp;
}
return seq;
}
static const char ret_code[] = { 0xc3, }; /* retq */
static const char thinkload_code[] = { 0x48, 0x8b, 0x3f, }; /* mov (%rdi), %rdi */
/*
* A tight little loop to be copied into the generated think code to clean up
* any data think work that remains after instruction think is done.
*/
asm(".local dthink_tail\n"
"dthink_tail:\n\t"
"movq (%rdi), %rdi\n\t"
"testq %rdi, %rdi\n\t"
"jne dthink_tail\n\t"
".local dthink_tail_end\n"
"dthink_tail_end:");
extern char dthink_tail[], dthink_tail_end[];
#define DTHINK_TAIL_LEN (dthink_tail_end - dthink_tail)
struct thinkwork {
size_t i, d;
};
static inline size_t thinkcode_blksize(const struct thinkparams* p)
{
return sizeof(thinkload_code) + (p->nops_per_block * p->nopsize) + JMPSIZE;
}
static void fill_think_block(const struct thinkparams* p, struct thinkwork* remaining,
void* blkaddr, void* next)
{
size_t ldsize = sizeof(thinkload_code);
size_t blksize = thinkcode_blksize(p);
void* nopaddr = mempcpy(blkaddr, remaining->d ? thinkload_code : nop_bytes[ldsize],
ldsize);
void* jmpaddr = repbytes(nopaddr, p->nops_per_block * p->nopsize,
nop_bytes[p->nopsize], p->nopsize);
*(uint8_t*)jmpaddr = JMPCODE;
*(int32_t*)(jmpaddr + 1) = next - (jmpaddr + JMPSIZE);
remaining->d = remaining->d >= CACHELINE_SIZE ? remaining->d - CACHELINE_SIZE : 0;
/* This should only be called as many times as we have code blocks to generate */
assert(remaining->i >= blksize);
remaining->i -= blksize;
}
static void gen_thinkcode(struct thinkparams* p)
{
void* tailloc;
void* blkaddr;
void* nextblkaddr;
void* first_jmpdst;
void* shufbuf;
unsigned long i, numblks, shufblks;
size_t blksize;
unsigned long* blkorder = NULL;
struct thinkwork remaining = {
.i = p->insn_footprint,
.d = p->data_footprint,
};
/*
* First instruction of dthink_tail should match what we're generating
* in the ithink blocks.
*/
assert(!memcmp(thinkload_code, dthink_tail, sizeof(thinkload_code)));
tailloc = p->thinkcode + p->insn_footprint;
blksize = thinkcode_blksize(p);
numblks = p->insn_footprint / blksize;
/*
* Physically-first block (block zero) is always execution-first as
* well, so don't shuffle it.
*/
shufblks = numblks ? numblks - 1 : 0;
if (shufblks) {
shufbuf = p->thinkcode + blksize;
blkorder = shuffled_range(shufblks);
/* Make block zero jump to the first shuffled one */
first_jmpdst = shufbuf + (blkorder[0] * blksize);
fill_think_block(p, &remaining, p->thinkcode, first_jmpdst);
for (i = 0; i < shufblks - 1; i++) {
blkaddr = shufbuf + (blkorder[i] * blksize);
nextblkaddr = shufbuf + (blkorder[i + 1] * blksize);
fill_think_block(p, &remaining, blkaddr, nextblkaddr);
}
/* Make the (execution-order) last block jump to the tail code */
blkaddr = shufbuf + (blkorder[shufblks - 1] * blksize);
fill_think_block(p, &remaining, blkaddr, tailloc);
} else if (numblks) /* single block jumps straight to tail code */
fill_think_block(p, &remaining, p->thinkcode, tailloc);
/* If there's data left over, insert the cleanup tail */
if (remaining.d)
tailloc = mempcpy(tailloc, dthink_tail, DTHINK_TAIL_LEN);
/* And finally poke in the return instruction. */
memcpy(tailloc, ret_code, sizeof(ret_code));
free(blkorder);
}
static void check_params(const struct thinkparams* p)
{
if (p->nopsize < 1 || p->nopsize >= ARR_LEN(nop_bytes)) {
fprintf(stderr, "Invalid nop size %d (must be in [1,%lu])\n",
p->nopsize, ARR_LEN(nop_bytes) - 1);
exit(1);
}
if (p->nops_per_block < 0) {
fprintf(stderr, "Invalid nop count %d (must be non-negative)\n",
p->nops_per_block);
exit(1);
}
}
void init_think(struct thinkparams* p)
{
size_t mapbytes, i;
unsigned long* idxs;
size_t iblksize, dblksize, codesize;
unsigned long niblks, ndblks;
check_params(p);
if (!p->insn_footprint && !p->data_footprint) {
p->thinkcode = NULL;
return;
}
/* Round code and data footprints up to a multiple of unit size */
iblksize = thinkcode_blksize(p);
dblksize = sizeof(union cacheline);
if (p->insn_footprint % iblksize)
p->insn_footprint += iblksize - (p->insn_footprint % iblksize);
if (p->data_footprint % dblksize)
p->data_footprint += dblksize - (p->data_footprint % dblksize);
niblks = p->insn_footprint / iblksize;
ndblks = p->data_footprint / dblksize;
if (ndblks) {
p->data_buf = xmalloc(ndblks * dblksize);
/*
* Build up a randomly-ordered chain of data-dependent loads:
* first get a randomized array of indices...
*/
idxs = shuffled_range(ndblks);
/* ...then link the list in that order. */
for (i = 0; i < ndblks - 1; i++)
p->data_buf[idxs[i]].next = &p->data_buf[idxs[i + 1]];
p->data_buf[idxs[ndblks - 1]].next = NULL;
p->data_chain_head = &p->data_buf[idxs[0]];
xfree(idxs);
}
codesize = p->insn_footprint + sizeof(ret_code);
/*
* niblks >= ndblks is fine (we just insert a nop instead of a load),
* but ndblks > niblks means we need to insert the data cleanup
* tail...
*/
if (ndblks > niblks)
codesize += DTHINK_TAIL_LEN;
mapbytes = (codesize / getpagesize()) * getpagesize();
if (codesize % getpagesize())
mapbytes += getpagesize();
p->thinkcode = mmap(NULL, mapbytes, PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (p->thinkcode == MAP_FAILED) {
perror("insn_think_buf mmap");
exit(1);
}
gen_thinkcode(p);
}
void do_think(const struct thinkparams* p)
{
if (p->thinkcode)
((void (*)(void*))p->thinkcode)(p->data_chain_head);
}
#ifndef THINK_H
#define THINK_H
struct thinkparams {
size_t data_footprint, insn_footprint;
int nopsize, nops_per_block;
union cacheline* data_buf;
union cacheline* data_chain_head;
void* thinkcode;
};
void init_think(struct thinkparams* p);
void do_think(const struct thinkparams* p);
#endif /* THINK_H */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment