| #!/bin/bash - | |
| ''''echo -n | |
| args="$@" | |
| # Figure out if we should be using clang or clang++. | |
| if [[ $0 == *"tsxicc" ]] ; then | |
| compiler_cmd=clang-3.5 | |
| else | |
| compiler_cmd=clang++-3.5 | |
| fi | |
| should_instrument=0 | |
| should_link=0 | |
| obj_files= | |
| # Try to determine if we should instrument. | |
| for obj_file in $args ; do | |
| if [[ $obj_file == *".o" ]] ; then | |
| if [[ ! -f $obj_file ]] ; then | |
| should_instrument=1 | |
| break | |
| else | |
| should_link=1 | |
| obj_files+=" $obj_file" | |
| fi | |
| elif [[ $obj_file == *".a" ]] ; then | |
| should_link=1 | |
| obj_files+=" $obj_file" | |
| fi | |
| done | |
| # Do some linking instead of instrumenting. | |
| if [[ $should_link -eq 1 ]] ; then | |
| # Create a temporary file that will store the combination of the other object | |
| # files. We have this step so that, after linking, we can figure out which | |
| # remaining symbols are undefined, then handle them with another assembly | |
| # file. | |
| combined_obj_file=$(mktemp /tmp/combined.XXXXXXXXXX.o) || { | |
| echo "Failed to create temporary combined object file." | |
| } | |
| symbols_file=$combined_obj_file".syms" | |
| # Combine all the object files. | |
| ld -g -r $obj_files -o $combined_obj_file | |
| # Find all the missing symbols. | |
| nm $combined_obj_file \ | |
| | grep -e ' U ' \ | |
| | grep '__tsxi_direct' \ | |
| | sed 's/ U //' \ | |
| > $symbols_file | |
| exit $? | |
| fi | |
| # Don't do any instrumenting. This could be because we're asking for things like --help | |
| # or we've already done our instrumentation. | |
| if [[ $should_instrument -eq 0 ]] && [[ $should_link -eq 0 ]] ; then | |
| $compiler_cmd $args | |
| exit $? | |
| fi | |
| # Try to use clang/clang++ to compile to assembly files instead | |
| # of object files. | |
| new_args=${args/.o/.o.s} | |
| $compiler_cmd -S $new_args || { | |
| echo "Unable to instrument files with TSXi." | |
| exit 1 | |
| } | |
| compiler_ret_code=$? | |
| # Instrument the assembly files. | |
| for obj_file in $args ; do | |
| if [[ $obj_file == *".o" ]] ; then | |
| asm_file=${obj_file/.o/.o.s} | |
| inst_asm_file=${obj_file/.o/.o.inst.s} | |
| asm_funcs_file="$asm_file.funcs" | |
| asm_labels_file="$asm_file.labels" | |
| asm_globals_file="$asm_file.globals" | |
| asm_weak_symbols_file="$asm_file.weak" | |
| if [[ ! -f $asm_file ]] ; then | |
| echo "$asm_file does not exist; exiting." | |
| exit $ret_code | |
| fi | |
| # Create a list of all functions. | |
| grep -e '@function' $asm_file \ | |
| | grep -e '\.type' \ | |
| | sed "s/\\t.type\\t\([^,]\+\),@function/\1/" \ | |
| > $asm_funcs_file | |
| # Create a list of all labels. | |
| grep -e '^[a-zA-Z0-9_$.]*:' $asm_file \ | |
| | sed 's/\([^:]*\):.*/\1/' \ | |
| > $asm_labels_file | |
| # Create a list of all globals. | |
| grep -e '\.globl' $asm_file \ | |
| | sed "s/\t.globl\t//" \ | |
| > $asm_globals_file | |
| # Create a list of all weak symbols. | |
| grep -e '\.weak' $asm_file \ | |
| | sed "s/\t.weak\t//" \ | |
| > $asm_weak_symbols_file | |
| # Run this script as a python program so that we can do the actual instrumentation of the | |
| # assembly. | |
| python $0 $asm_file $asm_funcs_file $asm_labels_file $asm_globals_file $asm_weak_symbols_file > $inst_asm_file | |
| # Compile the instrumented assembly to an object file. | |
| $compiler_cmd -c $inst_asm_file -o $obj_file | |
| fi | |
| done | |
| # Exit out of the bash script. | |
| exit $compiler_ret_code | |
| ''' | |
| import re | |
| import sys | |
| LABEL_LINE = re.compile(r"^([a-zA-Z0-9_$.]+):(.*)$", re.I) | |
| JUMP_LINE = re.compile(r"^\s*(call|j)([a-z]{,3})\s+([^\s]+)(.*)$", re.I) | |
| RET_LINE = re.compile(r"^\s*ret(.*)$", re.I) | |
| ERROR_FUNCTION = "__tsxi_error" | |
| MAIN_FUNCTION = "__tsxi_main" | |
| DIRECT_SUFFIX = "__tsxi_direct" | |
| BEGIN_TRANSACTION = "xbegin\t{}".format(ERROR_FUNCTION) | |
| END_TRANSACTION = "xend" | |
| INDENT = "\t" | |
| PLT_SUFFIX = "@PLT" | |
| FUNCTIONS = set() | |
| LABELS = set([MAIN_FUNCTION]) | |
| GLOBALS = set() | |
| WEAKS = set() | |
| def is_function(symbol): | |
| return symbol in FUNCTIONS or symbol.endswith(PLT_SUFFIX) | |
| def is_external(symbol): | |
| return symbol in GLOBALS or symbol.endswith(PLT_SUFFIX) | |
| def is_external_function(symbol): | |
| return is_function(symbol) and is_external(symbol) | |
| def is_weak(symbol): | |
| return symbol in WEAKS | |
| def P(*args): | |
| print "".join(str(arg) for arg in args) | |
| def process_direct_label(label): | |
| if is_external(label): | |
| P(".globl ", label, DIRECT_SUFFIX) | |
| elif is_weak(label): | |
| P(".weak ", label, DIRECT_SUFFIX) | |
| P(label, DIRECT_SUFFIX, ":") | |
| def process_external_function(label): | |
| P(INDENT, END_TRANSACTION) # TODO(pag): Eventually xtest? | |
| def process_internal_function(label): | |
| P(INDENT, END_TRANSACTION) | |
| # Process a line that looks like the definition of a label. | |
| def process_label(label, rest): | |
| if "main" == label: | |
| label = MAIN_FUNCTION | |
| P(label, ":") | |
| if label.startswith("__cxx_"): | |
| pass # ABI-specific things, don't instrument. | |
| elif is_external_function(label): | |
| process_external_function(label) | |
| process_direct_label(label) | |
| elif is_function(label): | |
| process_internal_function(label) | |
| process_direct_label(label) | |
| else: | |
| assert label in LABELS | |
| P(rest) | |
| # Process a jump, function call, or branch. | |
| def process_jump(cfi_type, target, rest): | |
| goes_through_plt = target.endswith(PLT_SUFFIX) | |
| if target in LABELS or goes_through_plt: # Direct jump. | |
| if is_function(target): | |
| if goes_through_plt: | |
| target = target[:-len(PLT_SUFFIX)] + DIRECT_SUFFIX + PLT_SUFFIX | |
| else: | |
| target = target + DIRECT_SUFFIX | |
| P(INDENT, cfi_type, " ", target) | |
| else: | |
| P(INDENT, cfi_type, " ", target) | |
| else: # Indirect jump. | |
| P(INDENT, BEGIN_TRANSACTION) | |
| P(INDENT, cfi_type, " ", target) | |
| if "call" in cfi_type.lower(): | |
| P(INDENT, END_TRANSACTION) | |
| P(rest) | |
| # Process a function return. | |
| def process_return(rest): | |
| P(INDENT, BEGIN_TRANSACTION) | |
| P(INDENT, "ret", rest) | |
| # Declare an external tsxi error handler. | |
| def process_first_line(): | |
| P(".globl ", ERROR_FUNCTION) | |
| P(".type ", ERROR_FUNCTION, ",@function") | |
| P() | |
| P(".globl ", MAIN_FUNCTION) | |
| P(".type ", MAIN_FUNCTION, ",@function") | |
| P("main:") | |
| P(INDENT, BEGIN_TRANSACTION) | |
| P(INDENT, "call\t", MAIN_FUNCTION) | |
| P(INDENT, END_TRANSACTION) | |
| P(INDENT, "ret") | |
| # Process a single assembly line. | |
| def process_line(asm_line): | |
| m = LABEL_LINE.match(asm_line) | |
| if m: | |
| return process_label(m.group(1), m.group(2)) | |
| m = JUMP_LINE.match(asm_line) | |
| if m: | |
| return process_jump(m.group(1) + m.group(2), m.group(3), m.group(4)) | |
| m = RET_LINE.match(asm_line) | |
| if m: | |
| return process_return(m.group(1)) | |
| P(asm_line) | |
| def read_symbols(syms, file_name): | |
| with open(file_name, "r") as syms_file: | |
| syms.update(syms_file.read().split("\n")) | |
| if "__main__" == __name__: | |
| read_symbols(FUNCTIONS, sys.argv[2]) | |
| read_symbols(LABELS, sys.argv[3]) | |
| read_symbols(GLOBALS, sys.argv[4]) | |
| read_symbols(WEAKS, sys.argv[5]) | |
| with open(sys.argv[1], "r") as asm_lines: | |
| processed_first_line = False | |
| for asm_line in asm_lines: | |
| process_line(asm_line.rstrip("\r\n\t ")) | |
| # For the "first" line, we want to do some things like | |
| # inject an error function, a new main function, etc. | |
| if not processed_first_line and ".text" in asm_line: | |
| process_first_line() | |
| processed_first_line = True | |
| P() |