Create a gist now

Instantly share code, notes, and snippets.

@pgoodman /tsxicc Secret
Created May 4, 2015

What would you like to do?
#!/bin/bash -
''''echo -n
args="$@"
# Figure out if we should be using clang or clang++.
if [[ $0 == *"tsxicc" ]] ; then
compiler_cmd=clang-3.5
else
compiler_cmd=clang++-3.5
fi
should_instrument=0
should_link=0
obj_files=
# Try to determine if we should instrument.
for obj_file in $args ; do
if [[ $obj_file == *".o" ]] ; then
if [[ ! -f $obj_file ]] ; then
should_instrument=1
break
else
should_link=1
obj_files+=" $obj_file"
fi
elif [[ $obj_file == *".a" ]] ; then
should_link=1
obj_files+=" $obj_file"
fi
done
# Do some linking instead of instrumenting.
if [[ $should_link -eq 1 ]] ; then
# Create a temporary file that will store the combination of the other object
# files. We have this step so that, after linking, we can figure out which
# remaining symbols are undefined, then handle them with another assembly
# file.
combined_obj_file=$(mktemp /tmp/combined.XXXXXXXXXX.o) || {
echo "Failed to create temporary combined object file."
}
symbols_file=$combined_obj_file".syms"
# Combine all the object files.
ld -g -r $obj_files -o $combined_obj_file
# Find all the missing symbols.
nm $combined_obj_file \
| grep -e ' U ' \
| grep '__tsxi_direct' \
| sed 's/ U //' \
> $symbols_file
exit $?
fi
# Don't do any instrumenting. This could be because we're asking for things like --help
# or we've already done our instrumentation.
if [[ $should_instrument -eq 0 ]] && [[ $should_link -eq 0 ]] ; then
$compiler_cmd $args
exit $?
fi
# Try to use clang/clang++ to compile to assembly files instead
# of object files.
new_args=${args/.o/.o.s}
$compiler_cmd -S $new_args || {
echo "Unable to instrument files with TSXi."
exit 1
}
compiler_ret_code=$?
# Instrument the assembly files.
for obj_file in $args ; do
if [[ $obj_file == *".o" ]] ; then
asm_file=${obj_file/.o/.o.s}
inst_asm_file=${obj_file/.o/.o.inst.s}
asm_funcs_file="$asm_file.funcs"
asm_labels_file="$asm_file.labels"
asm_globals_file="$asm_file.globals"
asm_weak_symbols_file="$asm_file.weak"
if [[ ! -f $asm_file ]] ; then
echo "$asm_file does not exist; exiting."
exit $ret_code
fi
# Create a list of all functions.
grep -e '@function' $asm_file \
| grep -e '\.type' \
| sed "s/\\t.type\\t\([^,]\+\),@function/\1/" \
> $asm_funcs_file
# Create a list of all labels.
grep -e '^[a-zA-Z0-9_$.]*:' $asm_file \
| sed 's/\([^:]*\):.*/\1/' \
> $asm_labels_file
# Create a list of all globals.
grep -e '\.globl' $asm_file \
| sed "s/\t.globl\t//" \
> $asm_globals_file
# Create a list of all weak symbols.
grep -e '\.weak' $asm_file \
| sed "s/\t.weak\t//" \
> $asm_weak_symbols_file
# Run this script as a python program so that we can do the actual instrumentation of the
# assembly.
python $0 $asm_file $asm_funcs_file $asm_labels_file $asm_globals_file $asm_weak_symbols_file > $inst_asm_file
# Compile the instrumented assembly to an object file.
$compiler_cmd -c $inst_asm_file -o $obj_file
fi
done
# Exit out of the bash script.
exit $compiler_ret_code
'''
import re
import sys
LABEL_LINE = re.compile(r"^([a-zA-Z0-9_$.]+):(.*)$", re.I)
JUMP_LINE = re.compile(r"^\s*(call|j)([a-z]{,3})\s+([^\s]+)(.*)$", re.I)
RET_LINE = re.compile(r"^\s*ret(.*)$", re.I)
ERROR_FUNCTION = "__tsxi_error"
MAIN_FUNCTION = "__tsxi_main"
DIRECT_SUFFIX = "__tsxi_direct"
BEGIN_TRANSACTION = "xbegin\t{}".format(ERROR_FUNCTION)
END_TRANSACTION = "xend"
INDENT = "\t"
PLT_SUFFIX = "@PLT"
FUNCTIONS = set()
LABELS = set([MAIN_FUNCTION])
GLOBALS = set()
WEAKS = set()
def is_function(symbol):
return symbol in FUNCTIONS or symbol.endswith(PLT_SUFFIX)
def is_external(symbol):
return symbol in GLOBALS or symbol.endswith(PLT_SUFFIX)
def is_external_function(symbol):
return is_function(symbol) and is_external(symbol)
def is_weak(symbol):
return symbol in WEAKS
def P(*args):
print "".join(str(arg) for arg in args)
def process_direct_label(label):
if is_external(label):
P(".globl ", label, DIRECT_SUFFIX)
elif is_weak(label):
P(".weak ", label, DIRECT_SUFFIX)
P(label, DIRECT_SUFFIX, ":")
def process_external_function(label):
P(INDENT, END_TRANSACTION) # TODO(pag): Eventually xtest?
def process_internal_function(label):
P(INDENT, END_TRANSACTION)
# Process a line that looks like the definition of a label.
def process_label(label, rest):
if "main" == label:
label = MAIN_FUNCTION
P(label, ":")
if label.startswith("__cxx_"):
pass # ABI-specific things, don't instrument.
elif is_external_function(label):
process_external_function(label)
process_direct_label(label)
elif is_function(label):
process_internal_function(label)
process_direct_label(label)
else:
assert label in LABELS
P(rest)
# Process a jump, function call, or branch.
def process_jump(cfi_type, target, rest):
goes_through_plt = target.endswith(PLT_SUFFIX)
if target in LABELS or goes_through_plt: # Direct jump.
if is_function(target):
if goes_through_plt:
target = target[:-len(PLT_SUFFIX)] + DIRECT_SUFFIX + PLT_SUFFIX
else:
target = target + DIRECT_SUFFIX
P(INDENT, cfi_type, " ", target)
else:
P(INDENT, cfi_type, " ", target)
else: # Indirect jump.
P(INDENT, BEGIN_TRANSACTION)
P(INDENT, cfi_type, " ", target)
if "call" in cfi_type.lower():
P(INDENT, END_TRANSACTION)
P(rest)
# Process a function return.
def process_return(rest):
P(INDENT, BEGIN_TRANSACTION)
P(INDENT, "ret", rest)
# Declare an external tsxi error handler.
def process_first_line():
P(".globl ", ERROR_FUNCTION)
P(".type ", ERROR_FUNCTION, ",@function")
P()
P(".globl ", MAIN_FUNCTION)
P(".type ", MAIN_FUNCTION, ",@function")
P("main:")
P(INDENT, BEGIN_TRANSACTION)
P(INDENT, "call\t", MAIN_FUNCTION)
P(INDENT, END_TRANSACTION)
P(INDENT, "ret")
# Process a single assembly line.
def process_line(asm_line):
m = LABEL_LINE.match(asm_line)
if m:
return process_label(m.group(1), m.group(2))
m = JUMP_LINE.match(asm_line)
if m:
return process_jump(m.group(1) + m.group(2), m.group(3), m.group(4))
m = RET_LINE.match(asm_line)
if m:
return process_return(m.group(1))
P(asm_line)
def read_symbols(syms, file_name):
with open(file_name, "r") as syms_file:
syms.update(syms_file.read().split("\n"))
if "__main__" == __name__:
read_symbols(FUNCTIONS, sys.argv[2])
read_symbols(LABELS, sys.argv[3])
read_symbols(GLOBALS, sys.argv[4])
read_symbols(WEAKS, sys.argv[5])
with open(sys.argv[1], "r") as asm_lines:
processed_first_line = False
for asm_line in asm_lines:
process_line(asm_line.rstrip("\r\n\t "))
# For the "first" line, we want to do some things like
# inject an error function, a new main function, etc.
if not processed_first_line and ".text" in asm_line:
process_first_line()
processed_first_line = True
P()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment