Created
October 31, 2016 18:10
-
-
Save hackcasual/474718a0763f2213bb86a43949a0ac7a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/emcc.py b/emcc.py | |
index 18edda4..a598c5c 100755 | |
--- a/emcc.py | |
+++ b/emcc.py | |
@@ -1923,6 +1923,10 @@ There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR P | |
# Move final output to the js target | |
shutil.move(final, js_target) | |
+ # Bundle symbol data in with the cyberdwarf file | |
+ if shared.Settings.FUNCTION_COVERAGE: | |
+ execute([shared.PYTHON, shared.path_from_root('tools', 'funccov.py'), target]) | |
+ | |
# Separate out the asm.js code, if asked. Or, if necessary for another option | |
if (separate_asm or shared.Settings.BINARYEN) and not shared.Settings.WASM_BACKEND: | |
logging.debug('separating asm') | |
diff --git a/emscripten.py b/emscripten.py | |
index d6cd5b9..31569d4 100755 | |
--- a/emscripten.py | |
+++ b/emscripten.py | |
@@ -135,6 +135,8 @@ def get_and_parse_backend(infile, settings, temp_files, DEBUG): | |
if settings['CYBERDWARF']: | |
backend_args += ['-enable-cyberdwarf'] | |
backend_args += ['-enable-debug-intrinsics'] | |
+ if settings['FUNCTION_COVERAGE']: | |
+ backend_args += ['-function-coverage'] | |
if DEBUG: | |
logging.debug('emscript: llvm backend: ' + ' '.join(backend_args)) | |
diff --git a/src/settings.js b/src/settings.js | |
index b6c6cba..ec56b34 100644 | |
--- a/src/settings.js | |
+++ b/src/settings.js | |
@@ -754,4 +754,6 @@ var CYBERDWARF = 0; // see http://kripken.github.io/emscripten-site/docs/debuggi | |
var BUNDLED_CD_DEBUG_FILE = ""; // Path to the CyberDWARF debug file passed to the compiler | |
+var FUNCTION_COVERAGE = 0; // Tableau internal for now | |
+ | |
// Reserved: variables containing POINTER_MASKING. | |
diff --git a/tools/funccov.py b/tools/funccov.py | |
new file mode 100644 | |
index 0000000..1e0a45a | |
--- /dev/null | |
+++ b/tools/funccov.py | |
@@ -0,0 +1,97 @@ | |
+""" | |
+This tool updates function coverage injected tracers to be unique per function. | |
+ | |
+Phase 1: Identifies the minified symbol name that points to the HEAP8 object | |
+Phase 2: Identifies the common expression each asm function contains that updates the coverage global | |
+Phase 3: Modifies the address to be unique for each function and stores function name in a metadata file | |
+ | |
+Coverage ID is generated from a hash of the sorted array of covered functions. | |
+This facilitates a design where the client doesn't need the data but can report it to a server that has the coverage information | |
+""" | |
+ | |
+ASM_START = "// EMSCRIPTEN_START_ASM" | |
+ASM_END = "// EMSCRIPTEN_END_ASM" | |
+ | |
+import sys | |
+import re | |
+import json | |
+import hashlib | |
+import base64 | |
+ | |
+fc = "" | |
+ | |
+with open(sys.argv[1]) as inf: | |
+ fc = inf.read() | |
+ | |
+symbols = "" | |
+ | |
+# Insert symbol information into the coverage data file | |
+with open(sys.argv[1] + ".symbols") as inf: | |
+ symbols = inf.read() | |
+ | |
+symbols = {y[0]:y[2] for y in [x.strip().partition(":") for x in symbols.split("\n")]} | |
+ | |
+start_i = fc.index(ASM_START) + len(ASM_START) | |
+end_i = fc.index(ASM_END) | |
+ | |
+asm_body = fc[start_i:end_i] | |
+ | |
+# Look for the name of our 8bit heap view variable | |
+try: | |
+ heap8_var = re.search('var\s*(.)\s*=\s*new global.Int8Array', asm_body, re.M).group(1) | |
+except: | |
+ print("Couldn't find HEAP8 value for {}. Not processing for function coverage".format(sys.argv[1])) | |
+ exit(0) | |
+ | |
+first_func = re.search('function ([^(]+)[^{]+{[^{}]*?(' + heap8_var + '\\[(\d+)\\]=1;)', asm_body, re.M) | |
+ | |
+track_assignment = first_func.group(2) | |
+heap_offset = int(first_func.group(3)) | |
+ | |
+cur_asm_body = asm_body; | |
+ | |
+# Compile the expression we'll be using a lot | |
+tight_re = re.compile('function ([^(]+)[^{]+{[^{}]*?(' + heap8_var + '\\[' + str(heap_offset) + '\\]=1;)', re.M) | |
+ | |
+func_count = 0 | |
+func_id_to_name = [] | |
+asm_chunks = [] | |
+ | |
+cur_match = tight_re.search(cur_asm_body) | |
+ | |
+while cur_match: | |
+ func_name = cur_match.group(1) | |
+ | |
+ # Find the string offsets that bracket the heap address | |
+ offset_start = cur_asm_body[:cur_match.end()].rfind('[') + 1 | |
+ offset_end = cur_asm_body[:cur_match.end()].rfind(']') | |
+ | |
+ asm_chunks.append(cur_asm_body[:offset_start]) | |
+ asm_chunks.append(str(heap_offset + func_count)) | |
+ | |
+ func_id_to_name.append(symbols[func_name]) | |
+ | |
+ func_count += 1 | |
+ cur_asm_body = cur_asm_body[offset_end:] | |
+ cur_match = tight_re.search(cur_asm_body) | |
+ | |
+asm_chunks.append(cur_asm_body) | |
+ | |
+print("Processed {} functions".format(func_count)) | |
+ | |
+str_repr = json.dumps(func_id_to_name) | |
+ | |
+# Create a unique ID to separate coverage data files and instrumented Javascript | |
+cov_id = base64.b64encode(hashlib.sha256(str_repr).digest())[:10] | |
+ | |
+print("Function coverage ID = {}".format(cov_id)) | |
+ | |
+with open(sys.argv[1] + ".coverage", "w") as outf: | |
+ json.dump({"cov-id": cov_id, "data": func_id_to_name}, outf) | |
+ | |
+with open(sys.argv[1], "w") as outf: | |
+ outf.write(fc[:start_i]) | |
+ outf.write("".join(asm_chunks)) | |
+ outf.write(fc[end_i:]) | |
+ outf.write("\nModule['tab-funccov-id']='" + cov_id + "';\nModule['tab-funccov-offset']=" | |
+ + str(heap_offset) + ";\nModule['tab-funccov-count']=" + str(func_count) + ";\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment