Skip to content

Instantly share code, notes, and snippets.

@seal9055
Last active December 6, 2022 19:15
Show Gist options
  • Save seal9055/d6dd41fab6407640c4682f975ce5e56c to your computer and use it in GitHub Desktop.
Save seal9055/d6dd41fab6407640c4682f975ce5e56c to your computer and use it in GitHub Desktop.
Minimal Compiling Dynamorio Sample

Install dynamorio release

# Install linux dynamorio release from here
https://dynamorio.org/page_releases.html

# Setup
tar xf DynamoRIO-Linux-9.0.1.tar.gz && mv DynamoRIO-Linux-9.0.1 dynamorio

Compiling and running initial sample plugin

./build.sh

./run.sh <target-app>

The build command executes successfully, but when attempting to run the target gives me undefined symbol errors. Running the nm command on the produced library lists many of the symbols as undefined.

#!/bin/sh
mkdir build
cd build
cmake -DDynamoRIO_DIR=/home/seal/analysis_project/dynamorio/cmake ..
make
cd ..
cp build/libexec_trace.so .
rm -r build
cmake_minimum_required(VERSION 3.22)
project(execution_trace)
# Disable libc for decreased binary size (and because it leaving libc enabled results in floating
# point errors when attempting to run the library
set(DynamoRIO_USE_LIBC OFF)
# Locate Dynamorio installation
find_package(DynamoRIO)
if (NOT DynamoRIO_FOUND)
message(FATAL_ERROR "DynamoRIO package required to build")
endif(NOT DynamoRIO_FOUND)
# Helper function to add clients
function (add_client name source_file_list extension_list)
# Add/Configure client
add_library(${name} SHARED ${source_file_list})
configure_DynamoRIO_client(${name})
# Add extensions
foreach (ext ${extension_list})
use_DynamoRIO_extension(${name} ${ext})
endforeach (ext)
endfunction (add_client)
# Add the actual library
add_client(exec_trace "exec_trace.c;utils.c" "drmgr;drreg;drx")
#include <stddef.h> /* for offsetof */
#include "dr_api.h"
#include "drmgr.h"
#include "drreg.h"
#include "utils.h"
/* Each ins_ref_t describes an executed instruction. */
typedef struct _ins_ref_t {
app_pc pc;
int opcode;
} ins_ref_t;
/* Max number of ins_ref a buffer can have. It should be big enough
* to hold all entries between clean calls.
*/
#define MAX_NUM_INS_REFS 8192
/* The maximum size of buffer for holding ins_refs. */
#define MEM_BUF_SIZE (sizeof(ins_ref_t) * MAX_NUM_INS_REFS)
/* thread private log file and counter */
typedef struct {
byte *seg_base;
ins_ref_t *buf_base;
file_t log;
FILE *logf;
uint64 num_refs;
} per_thread_t;
static client_id_t client_id;
static void *mutex; /* for multithread support */
static uint64 num_refs; /* keep a global instruction reference count */
/* Allocated TLS slot offsets */
enum {
INSTRACE_TLS_OFFS_BUF_PTR,
INSTRACE_TLS_COUNT, /* total number of TLS slots allocated */
};
static reg_id_t tls_seg;
static uint tls_offs;
static int tls_idx;
#define TLS_SLOT(tls_base, enum_val) (void **)((byte *)(tls_base) + tls_offs + (enum_val))
#define BUF_PTR(tls_base) *(ins_ref_t **)TLS_SLOT(tls_base, INSTRACE_TLS_OFFS_BUF_PTR)
#define MINSERT instrlist_meta_preinsert
static void
instrace(void *drcontext)
{
per_thread_t *data;
ins_ref_t *ins_ref, *buf_ptr;
data = drmgr_get_tls_field(drcontext, tls_idx);
buf_ptr = BUF_PTR(data->seg_base);
/* Example of dumped file content:
* 0x7f59c2d002d3: call
* 0x7ffeacab0ec8: mov
*/
/* We use libc's fprintf as it is buffered and much faster than dr_fprintf
* for repeated printing that dominates performance, as the printing does here.
*/
for (ins_ref = (ins_ref_t *)data->buf_base; ins_ref < buf_ptr; ins_ref++) {
/* We use PIFX to avoid leading zeroes and shrink the resulting file. */
dr_fprintf(data->logf, PIFX ",%s\n", (ptr_uint_t)ins_ref->pc,
decode_opcode_name(ins_ref->opcode));
data->num_refs++;
}
BUF_PTR(data->seg_base) = data->buf_base;
}
/* clean_call dumps the memory reference info to the log file */
static void
clean_call(void)
{
void *drcontext = dr_get_current_drcontext();
instrace(drcontext);
}
static void
insert_load_buf_ptr(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t reg_ptr)
{
dr_insert_read_raw_tls(drcontext, ilist, where, tls_seg,
tls_offs + INSTRACE_TLS_OFFS_BUF_PTR, reg_ptr);
}
static void
insert_update_buf_ptr(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, int adjust)
{
MINSERT(
ilist, where,
XINST_CREATE_add(drcontext, opnd_create_reg(reg_ptr), OPND_CREATE_INT16(adjust)));
dr_insert_write_raw_tls(drcontext, ilist, where, tls_seg,
tls_offs + INSTRACE_TLS_OFFS_BUF_PTR, reg_ptr);
}
static void
insert_save_opcode(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t base,
reg_id_t scratch, int opcode)
{
scratch = reg_resize_to_opsz(scratch, OPSZ_2);
MINSERT(ilist, where,
XINST_CREATE_load_int(drcontext, opnd_create_reg(scratch),
OPND_CREATE_INT16(opcode)));
MINSERT(ilist, where,
XINST_CREATE_store_2bytes(
drcontext, OPND_CREATE_MEM16(base, offsetof(ins_ref_t, opcode)),
opnd_create_reg(scratch)));
}
static void
insert_save_pc(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t base,
reg_id_t scratch, app_pc pc)
{
instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)pc, opnd_create_reg(scratch),
ilist, where, NULL, NULL);
MINSERT(ilist, where,
XINST_CREATE_store(drcontext,
OPND_CREATE_MEMPTR(base, offsetof(ins_ref_t, pc)),
opnd_create_reg(scratch)));
}
/* insert inline code to add an instruction entry into the buffer */
static void
instrument_instr(void *drcontext, instrlist_t *ilist, instr_t *where)
{
/* We need two scratch registers */
reg_id_t reg_ptr, reg_tmp;
if (drreg_reserve_register(drcontext, ilist, where, NULL, &reg_ptr) !=
DRREG_SUCCESS ||
drreg_reserve_register(drcontext, ilist, where, NULL, &reg_tmp) !=
DRREG_SUCCESS) {
DR_ASSERT(false); /* cannot recover */
return;
}
insert_load_buf_ptr(drcontext, ilist, where, reg_ptr);
insert_save_pc(drcontext, ilist, where, reg_ptr, reg_tmp, instr_get_app_pc(where));
insert_save_opcode(drcontext, ilist, where, reg_ptr, reg_tmp,
instr_get_opcode(where));
insert_update_buf_ptr(drcontext, ilist, where, reg_ptr, sizeof(ins_ref_t));
/* Restore scratch registers */
if (drreg_unreserve_register(drcontext, ilist, where, reg_ptr) != DRREG_SUCCESS ||
drreg_unreserve_register(drcontext, ilist, where, reg_tmp) != DRREG_SUCCESS)
DR_ASSERT(false);
}
/* For each app instr, we insert inline code to fill the buffer. */
static dr_emit_flags_t
event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *instr,
bool for_trace, bool translating, void *user_data)
{
/* we don't want to auto-predicate any instrumentation */
drmgr_disable_auto_predication(drcontext, bb);
if (!instr_is_app(instr))
return DR_EMIT_DEFAULT;
/* insert code to add an entry to the buffer */
instrument_instr(drcontext, bb, instr);
/* insert code once per bb to call clean_call for processing the buffer */
if (drmgr_is_first_instr(drcontext, instr)
/* XXX i#1698: there are constraints for code between ldrex/strex pairs,
* so we minimize the instrumentation in between by skipping the clean call.
* We're relying a bit on the typical code sequence with either ldrex..strex
* in the same bb, in which case our call at the start of the bb is fine,
* or with a branch in between and the strex at the start of the next bb.
* However, there is still a chance that the instrumentation code may clear the
* exclusive monitor state.
* Using a fault to handle a full buffer should be more robust, and the
* forthcoming buffer filling API (i#513) will provide that.
*/
IF_AARCHXX(&&!instr_is_exclusive_store(instr)))
dr_insert_clean_call(drcontext, bb, instr, (void *)clean_call, false, 0);
return DR_EMIT_DEFAULT;
}
static void
event_thread_init(void *drcontext)
{
per_thread_t *data = dr_thread_alloc(drcontext, sizeof(per_thread_t));
DR_ASSERT(data != NULL);
drmgr_set_tls_field(drcontext, tls_idx, data);
/* Keep seg_base in a per-thread data structure so we can get the TLS
* slot and find where the pointer points to in the buffer.
*/
data->seg_base = dr_get_dr_segment_base(tls_seg);
data->buf_base =
dr_raw_mem_alloc(MEM_BUF_SIZE, DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
DR_ASSERT(data->seg_base != NULL && data->buf_base != NULL);
/* put buf_base to TLS as starting buf_ptr */
BUF_PTR(data->seg_base) = data->buf_base;
data->num_refs = 0;
/* We're going to dump our data to a per-thread file.
* On Windows we need an absolute path so we place it in
* the same directory as our library. We could also pass
* in a path as a client argument.
*/
data->log =
log_file_open(client_id, drcontext, NULL /* using client lib path */, "instrace",
#ifndef WINDOWS
DR_FILE_CLOSE_ON_FORK |
#endif
DR_FILE_ALLOW_LARGE);
data->logf = log_stream_from_file(data->log);
dr_fprintf(data->logf, "Format: <instr address>,<opcode>\n");
}
static void
event_thread_exit(void *drcontext)
{
per_thread_t *data;
instrace(drcontext); /* dump any remaining buffer entries */
data = drmgr_get_tls_field(drcontext, tls_idx);
dr_mutex_lock(mutex);
num_refs += data->num_refs;
dr_mutex_unlock(mutex);
log_stream_close(data->logf); /* closes fd too */
dr_raw_mem_free(data->buf_base, MEM_BUF_SIZE);
dr_thread_free(drcontext, data, sizeof(per_thread_t));
}
static void
event_exit(void)
{
dr_log(NULL, DR_LOG_ALL, 1, "Client 'instrace' num refs seen: " SZFMT "\n", num_refs);
if (!dr_raw_tls_cfree(tls_offs, INSTRACE_TLS_COUNT))
DR_ASSERT(false);
if (!drmgr_unregister_tls_field(tls_idx) ||
!drmgr_unregister_thread_init_event(event_thread_init) ||
!drmgr_unregister_thread_exit_event(event_thread_exit) ||
!drmgr_unregister_bb_insertion_event(event_app_instruction) ||
drreg_exit() != DRREG_SUCCESS)
DR_ASSERT(false);
dr_mutex_destroy(mutex);
drmgr_exit();
}
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
/* We need 2 reg slots beyond drreg's eflags slots => 3 slots */
drreg_options_t ops = { sizeof(ops), 3, false };
dr_set_client_name("DynamoRIO Sample Client 'instrace'",
"http://dynamorio.org/issues");
if (!drmgr_init() || drreg_init(&ops) != DRREG_SUCCESS)
DR_ASSERT(false);
/* register events */
dr_register_exit_event(event_exit);
if (!drmgr_register_thread_init_event(event_thread_init) ||
!drmgr_register_thread_exit_event(event_thread_exit) ||
!drmgr_register_bb_instrumentation_event(NULL /*analysis_func*/,
event_app_instruction, NULL))
DR_ASSERT(false);
client_id = id;
mutex = dr_mutex_create();
tls_idx = drmgr_register_tls_field();
DR_ASSERT(tls_idx != -1);
/* The TLS field provided by DR cannot be directly accessed from the code cache.
* For better performance, we allocate raw TLS so that we can directly
* access and update it with a single instruction.
*/
if (!dr_raw_tls_calloc(&tls_seg, &tls_offs, INSTRACE_TLS_COUNT, 0))
DR_ASSERT(false);
dr_log(NULL, DR_LOG_ALL, 1, "Client 'instrace' initializing\n");
}
#!/bin/sh
../dynamorio/bin64/drrun -c ./libexec_trace.so -- $1
#include "utils.h"
#include "drx.h"
#include <stdio.h>
#ifdef WINDOWS
# include <io.h>
#endif
file_t
log_file_open(client_id_t id, void *drcontext, const char *path, const char *name,
uint flags)
{
file_t log;
char log_dir[MAXIMUM_PATH];
char buf[MAXIMUM_PATH];
size_t len;
char *dirsep;
DR_ASSERT(name != NULL);
len = dr_snprintf(log_dir, BUFFER_SIZE_ELEMENTS(log_dir), "%s",
path == NULL ? dr_get_client_path(id) : path);
DR_ASSERT(len > 0);
NULL_TERMINATE_BUFFER(log_dir);
dirsep = log_dir + len - 1;
if (path == NULL /* removing client lib */ ||
/* path does not have a trailing / and is too large to add it */
(*dirsep != '/' IF_WINDOWS(&&*dirsep != '\\') &&
len == BUFFER_SIZE_ELEMENTS(log_dir) - 1)) {
for (dirsep = log_dir + len; *dirsep != '/' IF_WINDOWS(&&*dirsep != '\\');
dirsep--)
DR_ASSERT(dirsep > log_dir);
}
/* remove trailing / if necessary */
if (*dirsep == '/' IF_WINDOWS(|| *dirsep == '\\'))
*dirsep = 0;
else if (sizeof(log_dir) > (dirsep + 1 - log_dir) / sizeof(log_dir[0]))
*(dirsep + 1) = 0;
NULL_TERMINATE_BUFFER(log_dir);
/* we do not need call drx_init before using drx_open_unique_appid_file */
log = drx_open_unique_appid_file(log_dir, dr_get_process_id(), name, "log", flags,
buf, BUFFER_SIZE_ELEMENTS(buf));
if (log != INVALID_FILE) {
char msg[MAXIMUM_PATH];
len = dr_snprintf(msg, BUFFER_SIZE_ELEMENTS(msg), "Data file %s created", buf);
DR_ASSERT(len > 0);
NULL_TERMINATE_BUFFER(msg);
dr_log(drcontext, DR_LOG_ALL, 1, "%s", msg);
#ifdef SHOW_RESULTS
DISPLAY_STRING(msg);
# ifdef WINDOWS
if (dr_is_notify_on()) {
/* assuming dr_enable_console_printing() is called in the initialization */
dr_fprintf(STDERR, "%s\n", msg);
}
# endif /* WINDOWS */
#endif /* SHOW_RESULTS */
}
return log;
}
void
log_file_close(file_t log)
{
dr_close_file(log);
}
FILE *
log_stream_from_file(file_t f)
{
#ifdef WINDOWS
int fd = _open_osfhandle((intptr_t)f, 0);
if (fd == -1)
return NULL;
return _fdopen(fd, "w");
#else
return fdopen(f, "w");
#endif
}
void
log_stream_close(FILE *f)
{
fclose(f); /* closes underlying fd too for all platforms */
}
#include "dr_api.h" /* for file_t, client_id_t */
#include <stdio.h>
#define BUFFER_SIZE_BYTES(buf) sizeof(buf)
#define BUFFER_SIZE_ELEMENTS(buf) (BUFFER_SIZE_BYTES(buf) / sizeof((buf)[0]))
#define BUFFER_LAST_ELEMENT(buf) (buf)[BUFFER_SIZE_ELEMENTS(buf) - 1]
#define NULL_TERMINATE_BUFFER(buf) BUFFER_LAST_ELEMENT(buf) = 0
#ifdef WINDOWS
# define IF_WINDOWS(x) x
# define IF_UNIX_ELSE(x, y) y
#else
# define IF_WINDOWS(x)
# define IF_UNIX_ELSE(x, y) x
#endif
#ifdef WINDOWS
# define DISPLAY_STRING(msg) dr_messagebox("%s", msg)
# define IF_WINDOWS(x) x
#else
# define DISPLAY_STRING(msg) dr_printf("%s\n", msg);
# define IF_WINDOWS(x) /* nothing */
#endif
/* open a log file
* - id: client id for getting the client library path
* - drcontext: DR's context for per-thread logging, pass NULL if global logging
* - path: where the log file should be, pass NULL if using client library path
* - name: name of the log file
* - flags: file open mode, e.g., DR_FILE_WRITE_REQUIRE_NEW
*/
file_t
log_file_open(client_id_t id, void *drcontext, const char *path, const char *name,
uint flags);
/* close a log file opened by log_file_open */
void
log_file_close(file_t log);
/* Converts a raw file descriptor into a FILE stream. */
FILE *
log_stream_from_file(file_t f);
/* log_file_close does *not* need to be called when calling this on a
* stream converted from a file descriptor.
*/
void
log_stream_close(FILE *f);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment