spiermar/nflxprofile_convert.py

## nflxprofile_convert.py
import re
import os
import logging
import nflxprofile_pb2

event_regexp = re.compile(r" +([0-9.]+): .+?:")
frame_regexp = re.compile(r"^[\t ]*[0-9a-fA-F]+ (.+) \((.*?)\)$")
comm_regexp = re.compile(r"^ *([^0-9]+)")
idle_process = re.compile("swapper")
idle_stack = re.compile("(cpuidle|cpu_idle|cpu_bringup_and_idle|native_safe_halt|xen_hypercall_sched_op|xen_hypercall_vcpu_op)")
idle_regexp = re.compile("%s.*%s" % (idle_process.pattern, idle_stack.pattern))

LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO')

logger = logging.getLogger()
logger.setLevel(getattr(logging, LOGLEVEL))

# inverted cache for function names
inverted_child_id_cache = {}


def library2type(library):
    if library == "":
        return ""
    if library.startswith("/tmp/perf-"):
        return "jit"
    if library.startswith("["):
        return "kernel"
    if library.find("vmlinux") > 0:
        return "kernel"
    return "user"


def add_to_inverted_child_id_cache(name, parent_id, child_id):
    try:
        function = inverted_child_id_cache[name]
        function['p'].append(parent_id)
        function['c'].append(child_id)
    except KeyError:
        inverted_child_id_cache[name] = {
            'p': [parent_id],
            'c': [child_id]
        }


def find_child_node_id(nodes, node_id, child_name):
    try:
        function = inverted_child_id_cache[child_name]
        index = function['p'].index(node_id)
        return function['c'][index]
    except KeyError:
        pass
    except ValueError:
        pass
    return None


def parse_from_perf(profile_iterator):
    # creating the new protobuf profile and initializing with root
    profile = nflxprofile_pb2.Profile()
    profile.nodes[0].function_name = 'root'
    profile.nodes[0].hit_count = 0
    profile.params['has_parent'] = 'true'

    # global count for node ids
    id_count = 1

    # sample timestamp store for delta calculation
    previous_ts = None

    # temporary stack array for current sample
    stack = []

    # comm for previous sample
    comm = None

    # ts for the previous sample
    ts = None

    for line in profile_iterator:
        # utf-8
        line = line.decode('utf-8')

        # skip comments and empty lines
        if not line or line[0] == '#':
            continue

        # As a performance optimization, skip an event regexp search if the
        # line looks like a stack trace based on starting with '\t'. This
        # makes a big difference.
        r = None

        if (line[0] != '\t'):
            r = event_regexp.search(line)
        if (r):  # TODO: or after last line
            if (stack):
                # process prior stack
                stackstr = ""
                for pair in stack:
                    stackstr += pair[0] + ";"
                if not (idle_regexp.search(stackstr)):
                    node_id = 0
                    for i, pair in enumerate(stack):
                        # Split inlined frames. "->" is used by software such as java
                        # perf-map-agent. For example, "a->b->c" means c() is inlined in b(),
                        # and b() is inlined in a(). This code will identify b() and c() as
                        # the "inlined" library type, and a() as whatever the library says
                        # it is.
                        names = pair[0].split('->')
                        n = 0
                        for j, name in enumerate(names):
                            child_id = find_child_node_id(profile.nodes, node_id, name)
                            if child_id is not None:
                                node = profile.nodes[child_id]
                                node_id = child_id
                                if i == (len(stack) - 1):
                                    # last item
                                    node.hit_count = node.hit_count + 1
                            else:
                                # strip leading "L" from java symbols (only reason we need comm):
                                if (comm and comm == "java" and name.startswith("L")):
                                    name = name[1:]
                                libtype = library2type(pair[1]) if n == 0 else "inlined"
                                n += 1
                                profile.nodes[id_count].function_name = name
                                profile.nodes[id_count].hit_count = 0
                                profile.nodes[id_count].libtype = libtype
                                profile.nodes[id_count].parent = node_id
                                profile.nodes[node_id].children.append(id_count) # adding children id
                                add_to_inverted_child_id_cache(name, node_id, id_count) # adding new node to inverted node cache
                                node_id = id_count # moving current node id to it
                                id_count = id_count + 1 # incrementing next id
                    profile.samples.append(node_id)
                    if ts:
                        if not previous_ts:
                            profile.time_deltas.append(0)
                            profile.start_time = ts
                        else:
                            profile.time_deltas.append(ts - previous_ts)
                        previous_ts = ts
                        profile.end_time = ts
                stack = []
            ts = float(r.group(1))
            r = comm_regexp.search(line)
            if (r):
                comm = r.group(1).rstrip()
                stack.append([comm, ""])
            else:
                stack.append(["<unknown>", ""])
        else:
            r = frame_regexp.search(line)
            if (r):
                name = r.group(1)
                # strip instruction offset (+0xfe200...)
                c = name.find("+")
                if (c > 0):
                    name = name[:c]
                stack.insert(1, [name, r.group(2)])
    print("Processed {} ids.".format(str(id_count)))
    return profile
	import re
	import os
	import logging
	import nflxprofile_pb2

	event_regexp = re.compile(r" +([0-9.]+): .+?:")
	frame_regexp = re.compile(r"^[\t ][0-9a-fA-F]+ (.+) \((.?)\)$")
	comm_regexp = re.compile(r"^ *([^0-9]+)")
	idle_process = re.compile("swapper")
	idle_stack = re.compile("(cpuidle\|cpu_idle\|cpu_bringup_and_idle\|native_safe_halt\|xen_hypercall_sched_op\|xen_hypercall_vcpu_op)")
	idle_regexp = re.compile("%s.*%s" % (idle_process.pattern, idle_stack.pattern))

	LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO')

	logger = logging.getLogger()
	logger.setLevel(getattr(logging, LOGLEVEL))

	# inverted cache for function names
	inverted_child_id_cache = {}


	def library2type(library):
	if library == "":
	return ""
	if library.startswith("/tmp/perf-"):
	return "jit"
	if library.startswith("["):
	return "kernel"
	if library.find("vmlinux") > 0:
	return "kernel"
	return "user"


	def add_to_inverted_child_id_cache(name, parent_id, child_id):
	try:
	function = inverted_child_id_cache[name]
	function['p'].append(parent_id)
	function['c'].append(child_id)
	except KeyError:
	inverted_child_id_cache[name] = {
	'p': [parent_id],
	'c': [child_id]
	}


	def find_child_node_id(nodes, node_id, child_name):
	try:
	function = inverted_child_id_cache[child_name]
	index = function['p'].index(node_id)
	return function['c'][index]
	except KeyError:
	pass
	except ValueError:
	pass
	return None


	def parse_from_perf(profile_iterator):
	# creating the new protobuf profile and initializing with root
	profile = nflxprofile_pb2.Profile()
	profile.nodes[0].function_name = 'root'
	profile.nodes[0].hit_count = 0
	profile.params['has_parent'] = 'true'

	# global count for node ids
	id_count = 1

	# sample timestamp store for delta calculation
	previous_ts = None

	# temporary stack array for current sample
	stack = []

	# comm for previous sample
	comm = None

	# ts for the previous sample
	ts = None

	for line in profile_iterator:
	# utf-8
	line = line.decode('utf-8')

	# skip comments and empty lines
	if not line or line[0] == '#':
	continue

	# As a performance optimization, skip an event regexp search if the
	# line looks like a stack trace based on starting with '\t'. This
	# makes a big difference.
	r = None

	if (line[0] != '\t'):
	r = event_regexp.search(line)
	if (r): # TODO: or after last line
	if (stack):
	# process prior stack
	stackstr = ""
	for pair in stack:
	stackstr += pair[0] + ";"
	if not (idle_regexp.search(stackstr)):
	node_id = 0
	for i, pair in enumerate(stack):
	# Split inlined frames. "->" is used by software such as java
	# perf-map-agent. For example, "a->b->c" means c() is inlined in b(),
	# and b() is inlined in a(). This code will identify b() and c() as
	# the "inlined" library type, and a() as whatever the library says
	# it is.
	names = pair[0].split('->')
	n = 0
	for j, name in enumerate(names):
	child_id = find_child_node_id(profile.nodes, node_id, name)
	if child_id is not None:
	node = profile.nodes[child_id]
	node_id = child_id
	if i == (len(stack) - 1):
	# last item
	node.hit_count = node.hit_count + 1
	else:
	# strip leading "L" from java symbols (only reason we need comm):
	if (comm and comm == "java" and name.startswith("L")):
	name = name[1:]
	libtype = library2type(pair[1]) if n == 0 else "inlined"
	n += 1
	profile.nodes[id_count].function_name = name
	profile.nodes[id_count].hit_count = 0
	profile.nodes[id_count].libtype = libtype
	profile.nodes[id_count].parent = node_id
	profile.nodes[node_id].children.append(id_count) # adding children id
	add_to_inverted_child_id_cache(name, node_id, id_count) # adding new node to inverted node cache
	node_id = id_count # moving current node id to it
	id_count = id_count + 1 # incrementing next id
	profile.samples.append(node_id)
	if ts:
	if not previous_ts:
	profile.time_deltas.append(0)
	profile.start_time = ts
	else:
	profile.time_deltas.append(ts - previous_ts)
	previous_ts = ts
	profile.end_time = ts
	stack = []
	ts = float(r.group(1))
	r = comm_regexp.search(line)
	if (r):
	comm = r.group(1).rstrip()
	stack.append([comm, ""])
	else:
	stack.append(["<unknown>", ""])
	else:
	r = frame_regexp.search(line)
	if (r):
	name = r.group(1)
	# strip instruction offset (+0xfe200...)
	c = name.find("+")
	if (c > 0):
	name = name[:c]
	stack.insert(1, [name, r.group(2)])
	print("Processed {} ids.".format(str(id_count)))
	return profile