Skip to content

Instantly share code, notes, and snippets.

@numinit
Last active Jan 25, 2021
Embed
What would you like to do?
Dumps in-memory text segments by single-stepping with ptrace. Usage: dtext <program> [args...]
/* Dumps in-memory text segments by single-stepping with ptrace.
* Usage: dtext <program> [args...]
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdlib.h>
#include <errno.h>
#include <stdint.h>
#include <stdarg.h>
#include <string.h>
#include <signal.h>
#include <stddef.h>
#include <inttypes.h>
#include <elf.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/user.h>
#include <sys/syscall.h>
#define PAGEMASK 0xfffffffffffff000ULL
#define PAGESIZE 0x1000ULL
#define INVALID_PAGE 0ULL
/* three ring buffers for the Elven-kings under sky.clear.rice.edu */
#define ELF_MAGIC 0x464c457fUL
typedef uint64_t reg_t;
typedef struct segment {
reg_t start, end;
} segment_t;
static void info(const char *fmt, ...) {
va_list va;
va_start(va, fmt);
fprintf(stderr, "[I] ");
vfprintf(stderr, fmt, va);
fprintf(stderr, "\n");
va_end(va);
}
static void error(const char *fmt, ...) {
va_list va;
va_start(va, fmt);
fprintf(stderr, "[E] ");
vfprintf(stderr, fmt, va);
fprintf(stderr, ": %d (%s)\n", errno, strerror(errno));
va_end(va);
}
static void wait_for(pid_t pid) {
int status;
waitpid(pid, &status, 0);
if (WIFEXITED(status) || WIFSIGNALED(status)) {
int code = WEXITSTATUS(status);
info("child exited with code %d, our work here is done", code);
exit(code);
}
}
static int peek(pid_t pid, reg_t addr, reg_t *out) {
long data;
errno = 0;
// PTRACE_PEEKTEXT and PTRACE_PEEKDATA allegedly do the same thing?
// In any case, try peeking at text, and then peek at data if that
// fails.
data = ptrace(PTRACE_PEEKTEXT, pid, (void *)addr, NULL);
if (data == -1 && errno != 0) {
errno = 0;
data = ptrace(PTRACE_PEEKDATA, pid, (void *)addr, NULL);
}
if (data == -1 && errno != 0) {
// Still? Okay, we've probably reached a boundary.
return -1;
} else {
*out = (reg_t)data;
return 0;
}
}
static reg_t get_pc(pid_t pid) {
struct user_regs_struct regs;
ptrace(PTRACE_GETREGS, pid, NULL, &regs);
return (reg_t)regs.rip;
}
static reg_t get_text_base(pid_t pid) {
reg_t rip = get_pc(pid);
// Walk backward in 1-page increments until we find the ELF header.
reg_t ptr = rip & PAGEMASK; // align to a page boundary
while (true) {
reg_t data;
if (peek(pid, ptr, &data) != 0) {
error("ptrace");
ptr = INVALID_PAGE;
break;
} else if ((data & 0xffffffffUL) == ELF_MAGIC) {
// ELF header
break;
} else {
// back up a page
ptr -= PAGESIZE;
}
}
return ptr;
}
static reg_t dump_data(pid_t pid, reg_t base, reg_t size, FILE *f) {
reg_t ptr = base;
reg_t dumped = 0;
while (dumped < size) {
reg_t data;
if (peek(pid, ptr, &data) != 0) {
break;
} else if (f && fwrite(&data, sizeof(data), 1, f) != 1) {
error("fwrite");
ptr = INVALID_PAGE;
break;
} else {
ptr += sizeof(data);
dumped += sizeof(data);
}
}
// Round to the nearest page
return ((ptr & PAGEMASK) + PAGESIZE - 1);
}
static int dump_segment(const char *bn, reg_t vaddr, reg_t size,
segment_t **segments_ptr, size_t *num_segments_ptr,
pid_t pid) {
segment_t *segments = *segments_ptr;
size_t num_segments = *num_segments_ptr;
char filename[128];
snprintf(filename, sizeof(filename), "%s-%016zx.elf", bn, vaddr);
FILE *f = fopen(filename, "wb");
reg_t final;
if (f == NULL) {
error("fopen");
return -1;
} else {
info("dumping segment @ 0x%016zx to %s...", vaddr, filename);
if ((final = dump_data(pid, vaddr, size, f)) == INVALID_PAGE) {
info("failed to dump data");
fclose(f);
return -1;
} else {
info("success!");
fclose(f);
}
}
// Add a segment
info("adding segment 0x%016zx..0x%016zx", vaddr, final);
segments = realloc(segments, sizeof(segment_t) * (++num_segments));
if (segments == NULL) {
error("realloc");
return -1;
} else {
segments[num_segments - 1].start = vaddr;
segments[num_segments - 1].end = final;
*segments_ptr = segments;
*num_segments_ptr = num_segments;
return 0;
}
}
static int dump_segments(const char *bn, reg_t base, segment_t **segments_ptr,
size_t *num_segments_ptr, pid_t pid) {
// Parse the ELF header to get all the segments.
reg_t header = 0, e_phoff = 0, e_shoff = 0;
union {
uint64_t reg;
struct {
uint16_t e_phentsize;
uint16_t e_phnum;
uint16_t e_shentsize;
uint16_t e_shnum;
} __attribute__ ((packed)) u16;
} sizes = {.reg = 0};
int ret = 0;
if ((ret = peek(pid, base, &header)) != 0) {
info("couldn't peek at segment base");
return ret;
} else if ((header & 0xffffffffUL) != ELF_MAGIC) {
info("segment was not an ELF binary");
return -1;
} else if ((ret = peek(pid, base + offsetof(Elf64_Ehdr, e_phoff), &e_phoff)) != 0) {
info("couldn't peek at program header offset");
return ret;
} else if ((ret = peek(pid, base + offsetof(Elf64_Ehdr, e_shoff), &e_shoff)) != 0) {
info("couldn't peek at section header offset");
return ret;
} else if ((ret = peek(pid, base + offsetof(Elf64_Ehdr, e_phentsize), &sizes.reg)) != 0) {
info("couldn't peek at sizes offset");
return ret;
}
info("got %" PRIu16 " program %s", sizes.u16.e_phnum,
sizes.u16.e_phnum == 1 ? "segment" : "segments");
info("got %" PRIu16 " standard %s", sizes.u16.e_shnum,
sizes.u16.e_shnum == 1 ? "segment" : "segments");
reg_t p_offset = e_phoff;
for (uint16_t i = 0; i < sizes.u16.e_phnum; i++) {
reg_t program_vaddr = 0, program_size = 0;
if ((ret = peek(pid, base + p_offset + offsetof(Elf64_Phdr, p_vaddr), &program_vaddr)) != 0) {
info("failed to peek at program segment p_vaddr");
return ret;
} else if ((ret = peek(pid, base + p_offset + offsetof(Elf64_Phdr, p_memsz), &program_size)) != 0) {
info("failed to peek at program segment p_memsz");
return ret;
}
if (program_vaddr != 0 && program_size > 0) {
if ((ret = dump_segment(bn, program_vaddr, program_size,
segments_ptr, num_segments_ptr, pid)) != 0) {
info("failed to dump program segment %" PRIu16, i);
return ret;
}
} else {
info("program segment %" PRIu16 " had invalid address, skipping", i);
}
p_offset += sizes.u16.e_phentsize;
}
reg_t s_offset = e_shoff;
for (uint16_t i = 0; i < sizes.u16.e_shnum; i++) {
reg_t section_vaddr = 0, section_size = 0;
if ((ret = peek(pid, base + s_offset + offsetof(Elf64_Shdr, sh_addr), &section_vaddr)) != 0) {
info("failed to peek at section segment sh_addr");
return ret;
} else if ((ret = peek(pid, base + s_offset + offsetof(Elf64_Shdr, sh_size), &section_size)) != 0) {
info("failed to peek at section segment sh_size");
return ret;
}
if (section_vaddr != 0 && section_size > 0) {
if ((ret = dump_segment(bn, section_vaddr, section_size,
segments_ptr, num_segments_ptr, pid)) != 0) {
info("failed to dump section segment %" PRIu16, i);
return ret;
}
} else {
info("section segment %" PRIu16 " had invalid address, skipping", i);
}
s_offset += sizes.u16.e_shentsize;
}
return 0;
}
static bool go = true;
static void dump(const char *bn, pid_t pid) {
char filename[32];
segment_t *segments = NULL;
size_t num_segments = 0;
// Wait for the process to become ready
wait_for(pid);
while (go) {
info("----- got a new ELF binary");
// Get the text base for this segment
reg_t base = get_text_base(pid), final;
if (base == INVALID_PAGE) {
info("couldn't find ELF header");
break;
} else {
info("new text base is at %016zx", base);
}
// Okay, we have the ELF text base. Figure out which segments are
// currently loaded.
if (dump_segments(bn, base, &segments, &num_segments, pid) != 0) {
error("dumping segments failed");
break;
}
// Wait for the PC to exit all known segments
size_t step = 0;
bool new_segment = false;
while (!new_segment && go) {
reg_t rip;
ptrace(PTRACE_SINGLESTEP, pid, 0, NULL);
wait_for(pid);
rip = get_pc(pid);
new_segment = true;
for (size_t i = 0; i < num_segments; i++) {
if (rip >= segments[i].start && rip <= segments[i].end) {
// Already seen it.
new_segment = false;
break;
}
}
if (step % 10000 == 0) {
info("step %zu: ip=0x%016zx", step, rip);
}
step++;
}
}
free(segments);
ptrace(PTRACE_DETACH, pid, 0, SIGKILL);
wait_for(pid);
}
static void sigint_handler(int sig) {
info("received signal %d", sig);
go = false;
}
int main(int argc, char * const argv[]) {
pid_t pid;
if ((pid = fork()) == 0) {
// Hello! Please trace me, regardless of what anyone else says.
ptrace(PTRACE_TRACEME, 0, NULL, NULL);
execvp(argv[1], &argv[1]);
error("execvp failed!");
return -1;
} else {
const char *bn = basename(argv[1]);
info("starting %s as pid %d", bn, pid);
// Install a SIGINT handler
signal(SIGINT, sigint_handler);
// Dump the child process
dump(bn, pid);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment