-
-
Save pgoodman/53802532a9759afcfde8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash - | |
''''echo -n | |
args="$@" | |
# Figure out if we should be using clang or clang++. | |
if [[ $0 == *"tsxicc" ]] ; then | |
compiler_cmd=clang-3.5 | |
else | |
compiler_cmd=clang++-3.5 | |
fi | |
should_instrument=0 | |
should_link=0 | |
obj_files= | |
# Try to determine if we should instrument. | |
for obj_file in $args ; do | |
if [[ $obj_file == *".o" ]] ; then | |
if [[ ! -f $obj_file ]] ; then | |
should_instrument=1 | |
break | |
else | |
should_link=1 | |
obj_files+=" $obj_file" | |
fi | |
elif [[ $obj_file == *".a" ]] ; then | |
should_link=1 | |
obj_files+=" $obj_file" | |
fi | |
done | |
# Do some linking instead of instrumenting. | |
if [[ $should_link -eq 1 ]] ; then | |
# Create a temporary file that will store the combination of the other object | |
# files. We have this step so that, after linking, we can figure out which | |
# remaining symbols are undefined, then handle them with another assembly | |
# file. | |
combined_obj_file=$(mktemp /tmp/combined.XXXXXXXXXX.o) || { | |
echo "Failed to create temporary combined object file." | |
} | |
symbols_file=$combined_obj_file".syms" | |
# Combine all the object files. | |
ld -g -r $obj_files -o $combined_obj_file | |
# Find all the missing symbols. | |
nm $combined_obj_file \ | |
| grep -e ' U ' \ | |
| grep '__tsxi_direct' \ | |
| sed 's/ U //' \ | |
> $symbols_file | |
exit $? | |
fi | |
# Don't do any instrumenting. This could be because we're asking for things like --help | |
# or we've already done our instrumentation. | |
if [[ $should_instrument -eq 0 ]] && [[ $should_link -eq 0 ]] ; then | |
$compiler_cmd $args | |
exit $? | |
fi | |
# Try to use clang/clang++ to compile to assembly files instead | |
# of object files. | |
new_args=${args/.o/.o.s} | |
$compiler_cmd -S $new_args || { | |
echo "Unable to instrument files with TSXi." | |
exit 1 | |
} | |
compiler_ret_code=$? | |
# Instrument the assembly files. | |
for obj_file in $args ; do | |
if [[ $obj_file == *".o" ]] ; then | |
asm_file=${obj_file/.o/.o.s} | |
inst_asm_file=${obj_file/.o/.o.inst.s} | |
asm_funcs_file="$asm_file.funcs" | |
asm_labels_file="$asm_file.labels" | |
asm_globals_file="$asm_file.globals" | |
asm_weak_symbols_file="$asm_file.weak" | |
if [[ ! -f $asm_file ]] ; then | |
echo "$asm_file does not exist; exiting." | |
exit $ret_code | |
fi | |
# Create a list of all functions. | |
grep -e '@function' $asm_file \ | |
| grep -e '\.type' \ | |
| sed "s/\\t.type\\t\([^,]\+\),@function/\1/" \ | |
> $asm_funcs_file | |
# Create a list of all labels. | |
grep -e '^[a-zA-Z0-9_$.]*:' $asm_file \ | |
| sed 's/\([^:]*\):.*/\1/' \ | |
> $asm_labels_file | |
# Create a list of all globals. | |
grep -e '\.globl' $asm_file \ | |
| sed "s/\t.globl\t//" \ | |
> $asm_globals_file | |
# Create a list of all weak symbols. | |
grep -e '\.weak' $asm_file \ | |
| sed "s/\t.weak\t//" \ | |
> $asm_weak_symbols_file | |
# Run this script as a python program so that we can do the actual instrumentation of the | |
# assembly. | |
python $0 $asm_file $asm_funcs_file $asm_labels_file $asm_globals_file $asm_weak_symbols_file > $inst_asm_file | |
# Compile the instrumented assembly to an object file. | |
$compiler_cmd -c $inst_asm_file -o $obj_file | |
fi | |
done | |
# Exit out of the bash script. | |
exit $compiler_ret_code | |
''' | |
import re | |
import sys | |
LABEL_LINE = re.compile(r"^([a-zA-Z0-9_$.]+):(.*)$", re.I) | |
JUMP_LINE = re.compile(r"^\s*(call|j)([a-z]{,3})\s+([^\s]+)(.*)$", re.I) | |
RET_LINE = re.compile(r"^\s*ret(.*)$", re.I) | |
ERROR_FUNCTION = "__tsxi_error" | |
MAIN_FUNCTION = "__tsxi_main" | |
DIRECT_SUFFIX = "__tsxi_direct" | |
BEGIN_TRANSACTION = "xbegin\t{}".format(ERROR_FUNCTION) | |
END_TRANSACTION = "xend" | |
INDENT = "\t" | |
PLT_SUFFIX = "@PLT" | |
FUNCTIONS = set() | |
LABELS = set([MAIN_FUNCTION]) | |
GLOBALS = set() | |
WEAKS = set() | |
def is_function(symbol): | |
return symbol in FUNCTIONS or symbol.endswith(PLT_SUFFIX) | |
def is_external(symbol): | |
return symbol in GLOBALS or symbol.endswith(PLT_SUFFIX) | |
def is_external_function(symbol): | |
return is_function(symbol) and is_external(symbol) | |
def is_weak(symbol): | |
return symbol in WEAKS | |
def P(*args): | |
print "".join(str(arg) for arg in args) | |
def process_direct_label(label): | |
if is_external(label): | |
P(".globl ", label, DIRECT_SUFFIX) | |
elif is_weak(label): | |
P(".weak ", label, DIRECT_SUFFIX) | |
P(label, DIRECT_SUFFIX, ":") | |
def process_external_function(label): | |
P(INDENT, END_TRANSACTION) # TODO(pag): Eventually xtest? | |
def process_internal_function(label): | |
P(INDENT, END_TRANSACTION) | |
# Process a line that looks like the definition of a label. | |
def process_label(label, rest): | |
if "main" == label: | |
label = MAIN_FUNCTION | |
P(label, ":") | |
if label.startswith("__cxx_"): | |
pass # ABI-specific things, don't instrument. | |
elif is_external_function(label): | |
process_external_function(label) | |
process_direct_label(label) | |
elif is_function(label): | |
process_internal_function(label) | |
process_direct_label(label) | |
else: | |
assert label in LABELS | |
P(rest) | |
# Process a jump, function call, or branch. | |
def process_jump(cfi_type, target, rest): | |
goes_through_plt = target.endswith(PLT_SUFFIX) | |
if target in LABELS or goes_through_plt: # Direct jump. | |
if is_function(target): | |
if goes_through_plt: | |
target = target[:-len(PLT_SUFFIX)] + DIRECT_SUFFIX + PLT_SUFFIX | |
else: | |
target = target + DIRECT_SUFFIX | |
P(INDENT, cfi_type, " ", target) | |
else: | |
P(INDENT, cfi_type, " ", target) | |
else: # Indirect jump. | |
P(INDENT, BEGIN_TRANSACTION) | |
P(INDENT, cfi_type, " ", target) | |
if "call" in cfi_type.lower(): | |
P(INDENT, END_TRANSACTION) | |
P(rest) | |
# Process a function return. | |
def process_return(rest): | |
P(INDENT, BEGIN_TRANSACTION) | |
P(INDENT, "ret", rest) | |
# Declare an external tsxi error handler. | |
def process_first_line(): | |
P(".globl ", ERROR_FUNCTION) | |
P(".type ", ERROR_FUNCTION, ",@function") | |
P() | |
P(".globl ", MAIN_FUNCTION) | |
P(".type ", MAIN_FUNCTION, ",@function") | |
P("main:") | |
P(INDENT, BEGIN_TRANSACTION) | |
P(INDENT, "call\t", MAIN_FUNCTION) | |
P(INDENT, END_TRANSACTION) | |
P(INDENT, "ret") | |
# Process a single assembly line. | |
def process_line(asm_line): | |
m = LABEL_LINE.match(asm_line) | |
if m: | |
return process_label(m.group(1), m.group(2)) | |
m = JUMP_LINE.match(asm_line) | |
if m: | |
return process_jump(m.group(1) + m.group(2), m.group(3), m.group(4)) | |
m = RET_LINE.match(asm_line) | |
if m: | |
return process_return(m.group(1)) | |
P(asm_line) | |
def read_symbols(syms, file_name): | |
with open(file_name, "r") as syms_file: | |
syms.update(syms_file.read().split("\n")) | |
if "__main__" == __name__: | |
read_symbols(FUNCTIONS, sys.argv[2]) | |
read_symbols(LABELS, sys.argv[3]) | |
read_symbols(GLOBALS, sys.argv[4]) | |
read_symbols(WEAKS, sys.argv[5]) | |
with open(sys.argv[1], "r") as asm_lines: | |
processed_first_line = False | |
for asm_line in asm_lines: | |
process_line(asm_line.rstrip("\r\n\t ")) | |
# For the "first" line, we want to do some things like | |
# inject an error function, a new main function, etc. | |
if not processed_first_line and ".text" in asm_line: | |
process_first_line() | |
processed_first_line = True | |
P() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment