Skip to content

Instantly share code, notes, and snippets.

@spiermar
Created May 13, 2019 17:25
Show Gist options
  • Save spiermar/195286aa29d2ebbe6ecde27c1fd7e8b7 to your computer and use it in GitHub Desktop.
Save spiermar/195286aa29d2ebbe6ecde27c1fd7e8b7 to your computer and use it in GitHub Desktop.
import re
import os
import logging
import nflxprofile_pb2
event_regexp = re.compile(r" +([0-9.]+): .+?:")
frame_regexp = re.compile(r"^[\t ]*[0-9a-fA-F]+ (.+) \((.*?)\)$")
comm_regexp = re.compile(r"^ *([^0-9]+)")
idle_process = re.compile("swapper")
idle_stack = re.compile("(cpuidle|cpu_idle|cpu_bringup_and_idle|native_safe_halt|xen_hypercall_sched_op|xen_hypercall_vcpu_op)")
idle_regexp = re.compile("%s.*%s" % (idle_process.pattern, idle_stack.pattern))
LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO')
logger = logging.getLogger()
logger.setLevel(getattr(logging, LOGLEVEL))
# inverted cache for function names
inverted_child_id_cache = {}
def library2type(library):
if library == "":
return ""
if library.startswith("/tmp/perf-"):
return "jit"
if library.startswith("["):
return "kernel"
if library.find("vmlinux") > 0:
return "kernel"
return "user"
def add_to_inverted_child_id_cache(name, parent_id, child_id):
try:
function = inverted_child_id_cache[name]
function['p'].append(parent_id)
function['c'].append(child_id)
except KeyError:
inverted_child_id_cache[name] = {
'p': [parent_id],
'c': [child_id]
}
def find_child_node_id(nodes, node_id, child_name):
try:
function = inverted_child_id_cache[child_name]
index = function['p'].index(node_id)
return function['c'][index]
except KeyError:
pass
except ValueError:
pass
return None
def parse_from_perf(profile_iterator):
# creating the new protobuf profile and initializing with root
profile = nflxprofile_pb2.Profile()
profile.nodes[0].function_name = 'root'
profile.nodes[0].hit_count = 0
profile.params['has_parent'] = 'true'
# global count for node ids
id_count = 1
# sample timestamp store for delta calculation
previous_ts = None
# temporary stack array for current sample
stack = []
# comm for previous sample
comm = None
# ts for the previous sample
ts = None
for line in profile_iterator:
# utf-8
line = line.decode('utf-8')
# skip comments and empty lines
if not line or line[0] == '#':
continue
# As a performance optimization, skip an event regexp search if the
# line looks like a stack trace based on starting with '\t'. This
# makes a big difference.
r = None
if (line[0] != '\t'):
r = event_regexp.search(line)
if (r): # TODO: or after last line
if (stack):
# process prior stack
stackstr = ""
for pair in stack:
stackstr += pair[0] + ";"
if not (idle_regexp.search(stackstr)):
node_id = 0
for i, pair in enumerate(stack):
# Split inlined frames. "->" is used by software such as java
# perf-map-agent. For example, "a->b->c" means c() is inlined in b(),
# and b() is inlined in a(). This code will identify b() and c() as
# the "inlined" library type, and a() as whatever the library says
# it is.
names = pair[0].split('->')
n = 0
for j, name in enumerate(names):
child_id = find_child_node_id(profile.nodes, node_id, name)
if child_id is not None:
node = profile.nodes[child_id]
node_id = child_id
if i == (len(stack) - 1):
# last item
node.hit_count = node.hit_count + 1
else:
# strip leading "L" from java symbols (only reason we need comm):
if (comm and comm == "java" and name.startswith("L")):
name = name[1:]
libtype = library2type(pair[1]) if n == 0 else "inlined"
n += 1
profile.nodes[id_count].function_name = name
profile.nodes[id_count].hit_count = 0
profile.nodes[id_count].libtype = libtype
profile.nodes[id_count].parent = node_id
profile.nodes[node_id].children.append(id_count) # adding children id
add_to_inverted_child_id_cache(name, node_id, id_count) # adding new node to inverted node cache
node_id = id_count # moving current node id to it
id_count = id_count + 1 # incrementing next id
profile.samples.append(node_id)
if ts:
if not previous_ts:
profile.time_deltas.append(0)
profile.start_time = ts
else:
profile.time_deltas.append(ts - previous_ts)
previous_ts = ts
profile.end_time = ts
stack = []
ts = float(r.group(1))
r = comm_regexp.search(line)
if (r):
comm = r.group(1).rstrip()
stack.append([comm, ""])
else:
stack.append(["<unknown>", ""])
else:
r = frame_regexp.search(line)
if (r):
name = r.group(1)
# strip instruction offset (+0xfe200...)
c = name.find("+")
if (c > 0):
name = name[:c]
stack.insert(1, [name, r.group(2)])
print("Processed {} ids.".format(str(id_count)))
return profile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment