Skip to content

Instantly share code, notes, and snippets.

@hackcasual
Created October 31, 2016 18:10
Show Gist options
  • Save hackcasual/474718a0763f2213bb86a43949a0ac7a to your computer and use it in GitHub Desktop.
Save hackcasual/474718a0763f2213bb86a43949a0ac7a to your computer and use it in GitHub Desktop.
diff --git a/emcc.py b/emcc.py
index 18edda4..a598c5c 100755
--- a/emcc.py
+++ b/emcc.py
@@ -1923,6 +1923,10 @@ There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR P
# Move final output to the js target
shutil.move(final, js_target)
+ # Bundle symbol data in with the cyberdwarf file
+ if shared.Settings.FUNCTION_COVERAGE:
+ execute([shared.PYTHON, shared.path_from_root('tools', 'funccov.py'), target])
+
# Separate out the asm.js code, if asked. Or, if necessary for another option
if (separate_asm or shared.Settings.BINARYEN) and not shared.Settings.WASM_BACKEND:
logging.debug('separating asm')
diff --git a/emscripten.py b/emscripten.py
index d6cd5b9..31569d4 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -135,6 +135,8 @@ def get_and_parse_backend(infile, settings, temp_files, DEBUG):
if settings['CYBERDWARF']:
backend_args += ['-enable-cyberdwarf']
backend_args += ['-enable-debug-intrinsics']
+ if settings['FUNCTION_COVERAGE']:
+ backend_args += ['-function-coverage']
if DEBUG:
logging.debug('emscript: llvm backend: ' + ' '.join(backend_args))
diff --git a/src/settings.js b/src/settings.js
index b6c6cba..ec56b34 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -754,4 +754,6 @@ var CYBERDWARF = 0; // see http://kripken.github.io/emscripten-site/docs/debuggi
var BUNDLED_CD_DEBUG_FILE = ""; // Path to the CyberDWARF debug file passed to the compiler
+var FUNCTION_COVERAGE = 0; // Tableau internal for now
+
// Reserved: variables containing POINTER_MASKING.
diff --git a/tools/funccov.py b/tools/funccov.py
new file mode 100644
index 0000000..1e0a45a
--- /dev/null
+++ b/tools/funccov.py
@@ -0,0 +1,97 @@
+"""
+This tool updates function coverage injected tracers to be unique per function.
+
+Phase 1: Identifies the minified symbol name that points to the HEAP8 object
+Phase 2: Identifies the common expression each asm function contains that updates the coverage global
+Phase 3: Modifies the address to be unique for each function and stores function name in a metadata file
+
+Coverage ID is generated from a hash of the sorted array of covered functions.
+This facilitates a design where the client doesn't need the data but can report it to a server that has the coverage information
+"""
+
+ASM_START = "// EMSCRIPTEN_START_ASM"
+ASM_END = "// EMSCRIPTEN_END_ASM"
+
+import sys
+import re
+import json
+import hashlib
+import base64
+
+fc = ""
+
+with open(sys.argv[1]) as inf:
+ fc = inf.read()
+
+symbols = ""
+
+# Insert symbol information into the coverage data file
+with open(sys.argv[1] + ".symbols") as inf:
+ symbols = inf.read()
+
+symbols = {y[0]:y[2] for y in [x.strip().partition(":") for x in symbols.split("\n")]}
+
+start_i = fc.index(ASM_START) + len(ASM_START)
+end_i = fc.index(ASM_END)
+
+asm_body = fc[start_i:end_i]
+
+# Look for the name of our 8bit heap view variable
+try:
+ heap8_var = re.search('var\s*(.)\s*=\s*new global.Int8Array', asm_body, re.M).group(1)
+except:
+ print("Couldn't find HEAP8 value for {}. Not processing for function coverage".format(sys.argv[1]))
+ exit(0)
+
+first_func = re.search('function ([^(]+)[^{]+{[^{}]*?(' + heap8_var + '\\[(\d+)\\]=1;)', asm_body, re.M)
+
+track_assignment = first_func.group(2)
+heap_offset = int(first_func.group(3))
+
+cur_asm_body = asm_body;
+
+# Compile the expression we'll be using a lot
+tight_re = re.compile('function ([^(]+)[^{]+{[^{}]*?(' + heap8_var + '\\[' + str(heap_offset) + '\\]=1;)', re.M)
+
+func_count = 0
+func_id_to_name = []
+asm_chunks = []
+
+cur_match = tight_re.search(cur_asm_body)
+
+while cur_match:
+ func_name = cur_match.group(1)
+
+ # Find the string offsets that bracket the heap address
+ offset_start = cur_asm_body[:cur_match.end()].rfind('[') + 1
+ offset_end = cur_asm_body[:cur_match.end()].rfind(']')
+
+ asm_chunks.append(cur_asm_body[:offset_start])
+ asm_chunks.append(str(heap_offset + func_count))
+
+ func_id_to_name.append(symbols[func_name])
+
+ func_count += 1
+ cur_asm_body = cur_asm_body[offset_end:]
+ cur_match = tight_re.search(cur_asm_body)
+
+asm_chunks.append(cur_asm_body)
+
+print("Processed {} functions".format(func_count))
+
+str_repr = json.dumps(func_id_to_name)
+
+# Create a unique ID to separate coverage data files and instrumented Javascript
+cov_id = base64.b64encode(hashlib.sha256(str_repr).digest())[:10]
+
+print("Function coverage ID = {}".format(cov_id))
+
+with open(sys.argv[1] + ".coverage", "w") as outf:
+ json.dump({"cov-id": cov_id, "data": func_id_to_name}, outf)
+
+with open(sys.argv[1], "w") as outf:
+ outf.write(fc[:start_i])
+ outf.write("".join(asm_chunks))
+ outf.write(fc[end_i:])
+ outf.write("\nModule['tab-funccov-id']='" + cov_id + "';\nModule['tab-funccov-offset']="
+ + str(heap_offset) + ";\nModule['tab-funccov-count']=" + str(func_count) + ";\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment