tomrittervg/mmseparate.py

## mmseparate.py
#!/usr/bin/env python3

# First MOZ_LOG="MessageManager:5" ./mach run > mmlog.txt 2>&1
# Then ./mmseparate.py mmlog.txt

import os
import re
import sys
import time
import pathlib
import argparse
import jsbeautifier

opts = jsbeautifier.default_options()
opts.indent_size = 2

LINE_PARSE = re.compile("\[([a-zA-Z]+) ([0-9]+): ([a-zA-Z ]+)\]: ([DV])/MessageManager ([-0-9]+) (.+)$")
TOPIC_PARSE = re.compile("(.+) Message: (.+) in process type: .+")

class MMLine():
	def _parse(self, l):
		m = LINE_PARSE.match(l)
		if not m:
			import pdb
			pdb.set_trace
			print("Could not parse line:", l)
			sys.exit(1)

		if len(m.groups()) != 6:
			print("Did not get correct number of line groups from:", l)
			sys.exit(1)

		assert self.process_type == m.groups()[0] or not self.process_type, "Assertion failed on (%s, %s): %s" % (self.process_type, m.groups()[0], l)
		assert self.process_pid == m.groups()[1] or not self.process_pid, "Assertion failed on (%s, %s): %s" % (self.process_pid, m.groups()[1], l)
		assert self.thread == m.groups()[2] or not self.thread, "Assertion failed on (%s, %s): %s" % (self.thread, m.groups()[2], l)
		assert self.msg_id == m.groups()[4] or not self.msg_id, "Assertion failed on (%s, %s): %s" % (self.msg_id, m.groups()[4], l)

		assert (m.groups()[3] == "D" and not self.process_type) or m.groups()[3] == "V", "Assertion failed on " + l

		self.process_type = m.groups()[0]
		self.process_pid = m.groups()[1]
		self.thread = m.groups()[2]
		self.msg_id = m.groups()[4]

		return m.groups()[3], m.groups()[5]

	def __init__(self, l):
		self.process_type = ""
		self.process_pid = ""
		self.thread = ""
		self.msg_id = ""
		self.verbosity = ""
		self.content = ""
		self.callsite = ""
		self.topic = ""

		verbosity, payload = self._parse(l)

		if verbosity == "D":
			self.type = "debug"

			m = TOPIC_PARSE.search(payload)
			if not m or len(m.groups()) != 2:
				print("Did not get correct number of topic groups from:", payload)
				sys.exit(1)

			self.callsite = m.groups()[0]
			self.topic = m.groups()[1]
		else:
			self.type = "verbose"

	def add_content(self, l):
		self.message = self._parse(l)[1]

def process(filename, output_dir):
	# First separate the lines by pid because we have interleaving of log lines
	lines_by_pid = {}

	# Used to map pids to a process type (e.g. parent)
	pid_to_type = {}

	# Used to associate the Verbose messages to Debug temporarily
	tmp_correlation_bucket = {}

	# Used to store the data
	data_by_pid = {}
	data_by_topic = {}

	with open(filename, "r") as f:
		for l in f:
			if "MessageManager" not in l:
				continue
			if not l.strip():
				continue

			line = MMLine(l)

			if line.process_pid not in lines_by_pid:
				lines_by_pid[line.process_pid] = []
				data_by_pid[line.process_pid] = []
				pid_to_type[line.process_pid] = line.process_type

			if line.topic and line.topic not in data_by_topic:
				data_by_topic[line.topic] = []

			lines_by_pid[line.process_pid].append(l)

	for pid in lines_by_pid:
		print("Processing %s (pid %s)" % (pid_to_type[pid], pid))

		lines = iter(lines_by_pid[pid])

		for l in lines:
			line = MMLine(l)

			if line.type == "debug":
				tmp_correlation_bucket[line.msg_id] = line
			else:
				tmp_correlation_bucket[line.msg_id].add_content(l)
				line = tmp_correlation_bucket[line.msg_id]

				data_by_pid[pid].append(line)
				data_by_topic[line.topic].append(line)

	for pid in data_by_pid:
		process_type = pid_to_type[pid]
		output_file = os.path.join(output_dir, "%s.%s.log" % (process_type, pid))
		print("Outputting all data for %s (pid %s) to %s" % (process_type, pid, output_file))

		data = data_by_pid[pid]
		with open(output_file, "w") as f:
			for d in data:
				f.write("------------------------------\n")
				f.write("%s received in %s\n" % (d.topic, d.callsite))
				message = jsbeautifier.beautify(d.message, opts)
				f.write("  " + message.replace("\n", "\n  ") + "\n\n")

	for topic in data_by_topic:
		data = data_by_topic[topic]

		process_types = {}
		callsites = {}
		for d in data:
			if d.process_type not in process_types:
				process_types[d.process_type] = 0
			if d.callsite not in callsites:
				callsites[d.callsite] = 0

			process_types[d.process_type] += 1
			callsites[d.callsite] += 1

		output_file = os.path.join(output_dir, "topic.%s.log" % (topic))
		print("Outputting all data for topic '%s' to %s" % (topic, output_file))
		if len(callsites) > 1:
			print("  Unusually, this topic was received in %s different callsites." % len(callsites))
		if len(process_types) > 1:
			print("  Unusually, this topic was received in %s different process types." % len(process_types))

		with open(output_file, "w") as f:
			f.write("Process Breakdown:\n")
			for t in process_types:
				f.write("  %s: %s\n" % (t, process_types[t]))

			f.write("\n------------------------------\n")

			f.write("Callsite Breakdown:\n")
			for c in callsites:
				f.write("  %s: %s\n" % (c, callsites[c]))

			f.write("\n------------------------------\n")

			for d in data:
				f.write("------------------------------\n")
				f.write("Received in %s in process %s (pid %s)\n" % (d.callsite, d.process_type, d.process_pid))
				message = jsbeautifier.beautify(d.message, opts)
				f.write("  " + message.replace("\n", "\n  ") + "\n\n")


if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("-o", "--out-dir", default="mm-sorted-%s" % int(time.time()), help="Directory to write output logs in.")
	parser.add_argument("log_files", action="append", help="Log files to parse.")
	args = parser.parse_args()

	if len(args.log_files) <= 0:
		print("Supply log files to separate on the command line.")
		sys.exit(1)

	pathlib.Path(args.out_dir).mkdir(parents=True, exist_ok=True)

	for f in args.log_files:
		print("Processing ", f)
		process(f, args.out_dir)
	#!/usr/bin/env python3

	# First MOZ_LOG="MessageManager:5" ./mach run > mmlog.txt 2>&1
	# Then ./mmseparate.py mmlog.txt

	import os
	import re
	import sys
	import time
	import pathlib
	import argparse
	import jsbeautifier

	opts = jsbeautifier.default_options()
	opts.indent_size = 2

	LINE_PARSE = re.compile("\[([a-zA-Z]+) ([0-9]+): ([a-zA-Z ]+)\]: ([DV])/MessageManager ([-0-9]+) (.+)$")
	TOPIC_PARSE = re.compile("(.+) Message: (.+) in process type: .+")

	class MMLine():
	def _parse(self, l):
	m = LINE_PARSE.match(l)
	if not m:
	import pdb
	pdb.set_trace
	print("Could not parse line:", l)
	sys.exit(1)

	if len(m.groups()) != 6:
	print("Did not get correct number of line groups from:", l)
	sys.exit(1)

	assert self.process_type == m.groups()[0] or not self.process_type, "Assertion failed on (%s, %s): %s" % (self.process_type, m.groups()[0], l)
	assert self.process_pid == m.groups()[1] or not self.process_pid, "Assertion failed on (%s, %s): %s" % (self.process_pid, m.groups()[1], l)
	assert self.thread == m.groups()[2] or not self.thread, "Assertion failed on (%s, %s): %s" % (self.thread, m.groups()[2], l)
	assert self.msg_id == m.groups()[4] or not self.msg_id, "Assertion failed on (%s, %s): %s" % (self.msg_id, m.groups()[4], l)

	assert (m.groups()[3] == "D" and not self.process_type) or m.groups()[3] == "V", "Assertion failed on " + l

	self.process_type = m.groups()[0]
	self.process_pid = m.groups()[1]
	self.thread = m.groups()[2]
	self.msg_id = m.groups()[4]

	return m.groups()[3], m.groups()[5]

	def __init__(self, l):
	self.process_type = ""
	self.process_pid = ""
	self.thread = ""
	self.msg_id = ""
	self.verbosity = ""
	self.content = ""
	self.callsite = ""
	self.topic = ""

	verbosity, payload = self._parse(l)

	if verbosity == "D":
	self.type = "debug"

	m = TOPIC_PARSE.search(payload)
	if not m or len(m.groups()) != 2:
	print("Did not get correct number of topic groups from:", payload)
	sys.exit(1)

	self.callsite = m.groups()[0]
	self.topic = m.groups()[1]
	else:
	self.type = "verbose"

	def add_content(self, l):
	self.message = self._parse(l)[1]

	def process(filename, output_dir):
	# First separate the lines by pid because we have interleaving of log lines
	lines_by_pid = {}

	# Used to map pids to a process type (e.g. parent)
	pid_to_type = {}

	# Used to associate the Verbose messages to Debug temporarily
	tmp_correlation_bucket = {}

	# Used to store the data
	data_by_pid = {}
	data_by_topic = {}

	with open(filename, "r") as f:
	for l in f:
	if "MessageManager" not in l:
	continue
	if not l.strip():
	continue

	line = MMLine(l)

	if line.process_pid not in lines_by_pid:
	lines_by_pid[line.process_pid] = []
	data_by_pid[line.process_pid] = []
	pid_to_type[line.process_pid] = line.process_type

	if line.topic and line.topic not in data_by_topic:
	data_by_topic[line.topic] = []

	lines_by_pid[line.process_pid].append(l)

	for pid in lines_by_pid:
	print("Processing %s (pid %s)" % (pid_to_type[pid], pid))

	lines = iter(lines_by_pid[pid])

	for l in lines:
	line = MMLine(l)

	if line.type == "debug":
	tmp_correlation_bucket[line.msg_id] = line
	else:
	tmp_correlation_bucket[line.msg_id].add_content(l)
	line = tmp_correlation_bucket[line.msg_id]

	data_by_pid[pid].append(line)
	data_by_topic[line.topic].append(line)

	for pid in data_by_pid:
	process_type = pid_to_type[pid]
	output_file = os.path.join(output_dir, "%s.%s.log" % (process_type, pid))
	print("Outputting all data for %s (pid %s) to %s" % (process_type, pid, output_file))

	data = data_by_pid[pid]
	with open(output_file, "w") as f:
	for d in data:
	f.write("------------------------------\n")
	f.write("%s received in %s\n" % (d.topic, d.callsite))
	message = jsbeautifier.beautify(d.message, opts)
	f.write(" " + message.replace("\n", "\n ") + "\n\n")

	for topic in data_by_topic:
	data = data_by_topic[topic]

	process_types = {}
	callsites = {}
	for d in data:
	if d.process_type not in process_types:
	process_types[d.process_type] = 0
	if d.callsite not in callsites:
	callsites[d.callsite] = 0

	process_types[d.process_type] += 1
	callsites[d.callsite] += 1

	output_file = os.path.join(output_dir, "topic.%s.log" % (topic))
	print("Outputting all data for topic '%s' to %s" % (topic, output_file))
	if len(callsites) > 1:
	print(" Unusually, this topic was received in %s different callsites." % len(callsites))
	if len(process_types) > 1:
	print(" Unusually, this topic was received in %s different process types." % len(process_types))

	with open(output_file, "w") as f:
	f.write("Process Breakdown:\n")
	for t in process_types:
	f.write(" %s: %s\n" % (t, process_types[t]))

	f.write("\n------------------------------\n")

	f.write("Callsite Breakdown:\n")
	for c in callsites:
	f.write(" %s: %s\n" % (c, callsites[c]))

	f.write("\n------------------------------\n")

	for d in data:
	f.write("------------------------------\n")
	f.write("Received in %s in process %s (pid %s)\n" % (d.callsite, d.process_type, d.process_pid))
	message = jsbeautifier.beautify(d.message, opts)
	f.write(" " + message.replace("\n", "\n ") + "\n\n")




	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("-o", "--out-dir", default="mm-sorted-%s" % int(time.time()), help="Directory to write output logs in.")
	parser.add_argument("log_files", action="append", help="Log files to parse.")
	args = parser.parse_args()

	if len(args.log_files) <= 0:
	print("Supply log files to separate on the command line.")
	sys.exit(1)

	pathlib.Path(args.out_dir).mkdir(parents=True, exist_ok=True)

	for f in args.log_files:
	print("Processing ", f)
	process(f, args.out_dir)