Created
May 13, 2019 17:25
-
-
Save spiermar/195286aa29d2ebbe6ecde27c1fd7e8b7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
import logging | |
import nflxprofile_pb2 | |
event_regexp = re.compile(r" +([0-9.]+): .+?:") | |
frame_regexp = re.compile(r"^[\t ]*[0-9a-fA-F]+ (.+) \((.*?)\)$") | |
comm_regexp = re.compile(r"^ *([^0-9]+)") | |
idle_process = re.compile("swapper") | |
idle_stack = re.compile("(cpuidle|cpu_idle|cpu_bringup_and_idle|native_safe_halt|xen_hypercall_sched_op|xen_hypercall_vcpu_op)") | |
idle_regexp = re.compile("%s.*%s" % (idle_process.pattern, idle_stack.pattern)) | |
LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO') | |
logger = logging.getLogger() | |
logger.setLevel(getattr(logging, LOGLEVEL)) | |
# inverted cache for function names | |
inverted_child_id_cache = {} | |
def library2type(library): | |
if library == "": | |
return "" | |
if library.startswith("/tmp/perf-"): | |
return "jit" | |
if library.startswith("["): | |
return "kernel" | |
if library.find("vmlinux") > 0: | |
return "kernel" | |
return "user" | |
def add_to_inverted_child_id_cache(name, parent_id, child_id): | |
try: | |
function = inverted_child_id_cache[name] | |
function['p'].append(parent_id) | |
function['c'].append(child_id) | |
except KeyError: | |
inverted_child_id_cache[name] = { | |
'p': [parent_id], | |
'c': [child_id] | |
} | |
def find_child_node_id(nodes, node_id, child_name): | |
try: | |
function = inverted_child_id_cache[child_name] | |
index = function['p'].index(node_id) | |
return function['c'][index] | |
except KeyError: | |
pass | |
except ValueError: | |
pass | |
return None | |
def parse_from_perf(profile_iterator): | |
# creating the new protobuf profile and initializing with root | |
profile = nflxprofile_pb2.Profile() | |
profile.nodes[0].function_name = 'root' | |
profile.nodes[0].hit_count = 0 | |
profile.params['has_parent'] = 'true' | |
# global count for node ids | |
id_count = 1 | |
# sample timestamp store for delta calculation | |
previous_ts = None | |
# temporary stack array for current sample | |
stack = [] | |
# comm for previous sample | |
comm = None | |
# ts for the previous sample | |
ts = None | |
for line in profile_iterator: | |
# utf-8 | |
line = line.decode('utf-8') | |
# skip comments and empty lines | |
if not line or line[0] == '#': | |
continue | |
# As a performance optimization, skip an event regexp search if the | |
# line looks like a stack trace based on starting with '\t'. This | |
# makes a big difference. | |
r = None | |
if (line[0] != '\t'): | |
r = event_regexp.search(line) | |
if (r): # TODO: or after last line | |
if (stack): | |
# process prior stack | |
stackstr = "" | |
for pair in stack: | |
stackstr += pair[0] + ";" | |
if not (idle_regexp.search(stackstr)): | |
node_id = 0 | |
for i, pair in enumerate(stack): | |
# Split inlined frames. "->" is used by software such as java | |
# perf-map-agent. For example, "a->b->c" means c() is inlined in b(), | |
# and b() is inlined in a(). This code will identify b() and c() as | |
# the "inlined" library type, and a() as whatever the library says | |
# it is. | |
names = pair[0].split('->') | |
n = 0 | |
for j, name in enumerate(names): | |
child_id = find_child_node_id(profile.nodes, node_id, name) | |
if child_id is not None: | |
node = profile.nodes[child_id] | |
node_id = child_id | |
if i == (len(stack) - 1): | |
# last item | |
node.hit_count = node.hit_count + 1 | |
else: | |
# strip leading "L" from java symbols (only reason we need comm): | |
if (comm and comm == "java" and name.startswith("L")): | |
name = name[1:] | |
libtype = library2type(pair[1]) if n == 0 else "inlined" | |
n += 1 | |
profile.nodes[id_count].function_name = name | |
profile.nodes[id_count].hit_count = 0 | |
profile.nodes[id_count].libtype = libtype | |
profile.nodes[id_count].parent = node_id | |
profile.nodes[node_id].children.append(id_count) # adding children id | |
add_to_inverted_child_id_cache(name, node_id, id_count) # adding new node to inverted node cache | |
node_id = id_count # moving current node id to it | |
id_count = id_count + 1 # incrementing next id | |
profile.samples.append(node_id) | |
if ts: | |
if not previous_ts: | |
profile.time_deltas.append(0) | |
profile.start_time = ts | |
else: | |
profile.time_deltas.append(ts - previous_ts) | |
previous_ts = ts | |
profile.end_time = ts | |
stack = [] | |
ts = float(r.group(1)) | |
r = comm_regexp.search(line) | |
if (r): | |
comm = r.group(1).rstrip() | |
stack.append([comm, ""]) | |
else: | |
stack.append(["<unknown>", ""]) | |
else: | |
r = frame_regexp.search(line) | |
if (r): | |
name = r.group(1) | |
# strip instruction offset (+0xfe200...) | |
c = name.find("+") | |
if (c > 0): | |
name = name[:c] | |
stack.insert(1, [name, r.group(2)]) | |
print("Processed {} ids.".format(str(id_count))) | |
return profile |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment