Skip to content

Instantly share code, notes, and snippets.

@bbengfort
Created March 20, 2017 15:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bbengfort/574e0b5acf0068527f74bba897538dcb to your computer and use it in GitHub Desktop.
Save bbengfort/574e0b5acf0068527f74bba897538dcb to your computer and use it in GitHub Desktop.
Fork of memory_profiler that performs per-child memory profiling for multiprocessing applications.
#!/usr/bin/env python3
"""
Multiprocessing version of memory profiling of Python programs.
"""
import os
import re
import time
import glob
import argparse
import subprocess
import memory_profiler as mp
from collections import defaultdict
try:
import numpy as np
import matplotlib.pyplot as plt
except ImportError:
plt = None
np = None
# Command Descriptions and Constants
DESCRIPTION = "Multiprocessing memory profiling over time."
EPILOG = "If there are any bugs or concerns, submit an issue on Github"
VERSION = "mpmprof v{}".format(mp.__version__)
FILETIME = "%Y%m%d%H%M%S"
BLANKS = set(' \t')
def run_action(args):
"""
Run the given program and profile its memory usage.
"""
# Determine where to write the output to
if args.output is None:
args.output = "mprofile_{}.dat".format(
time.strftime(FILETIME, time.localtime())
)
# Determine if the command is a Python command
if args.command[0].endswith('.py') and not args.nopython:
args.python = True
# Run the executable with the extra features
if args.python:
print("running as a Python program ...")
if not args.command[0].startswith('python'):
args.command.insert(0, 'python')
# Inform the user we're sampling
print("mpmprof: Sampling memory every {} seconds".format(args.interval))
# Put the command back together from the argument parsing
command = " ".join([
c if BLANKS.isdisjoint(c) else "'{}'".format(c) for c in args.command
])
# Open a subprocess to the given command
proc = subprocess.Popen(args.command)
# This is where a call to mp.memory_usage should go.
# Instead we're adding the custom code for sampling spawned memory
with open(args.output, "a") as f:
# Write the command to the data file
f.write("CMDLINE {}\n".format(command))
# Continue sampling until the subprocess is over, counting lines
lines = 0
while True:
# Determine if the subprocess is still running
if proc.poll() is not None: break
# Collect memory usage of master program and write to profile
mem = mp._get_memory(proc.pid)
f.write("MEM {0:.6f} {1:.4f}\n".format(mem, time.time()))
lines += 1
# Collect memory usage of spawned children and write to profile
for idx, mem in enumerate(mp._get_child_memory(proc.pid)):
f.write("CHLD{0} {1:.6f} {2:.4f}\n".format(idx, mem, time.time()))
lines += 1
# Flush every 50 lines
if lines > 50:
lines = 0
f.flush()
# Sleep for the given interval
time.sleep(args.interval)
# Return the results of the run action
return "memory profile written to {}".format(args.output)
def plot_action(args):
"""
Use matplotlib to draw the memory usage of a mprofile .dat file.
"""
if plt is None:
raise ImportError(
"matplotlib is needed for plotting."
)
def read_mprofile_file(path):
"""
Reads the specialized version of the mprofile for multiprocessing
"""
# Regular expression line parsers for parsing data
cmdre = re.compile(r"^CMDLINE\s+(.+)$")
memre = re.compile(r"^MEM\s+([\d\.e]+)\s+([\d\.e]+)$")
cldre = re.compile(r"^CHLD(\d+)\s+([\d\.e]+)\s+([\d\.e]+)$")
# Data structure returned is a series of names (mem, ts) tuples.
series = defaultdict(list)
command = None
with open(path, 'r') as f:
for line in f:
# Match children memory usage lines
match = cldre.match(line)
if match:
idx, mem, ts = match.groups()
series["child " + idx].append((float(mem), float(ts)))
continue
# Match main process memory usage lines
match = memre.match(line)
if match:
series['main'].append(tuple(map(float, match.groups())))
continue
# Match command line(s)
# NOTE: mprofile files are openeded for appending, could be multiple
match = cmdre.match(line)
if match:
command = match.groups()[0]
return command, series
def plot_mprofile_file(path, title=None):
"""
Plots an mprofile file that contains specialized child process data.
"""
# Parse the mprofile file to get the data
command, series = read_mprofile_file(path)
title = title or command
# Create and configure the figure
fig = plt.figure(figsize=(14, 6), dpi=90)
axe = fig.add_axes([0.1, 0.1, 0.6, 0.75])
axe.set_xlabel("time (in seconds)")
axe.set_ylabel("memory used (in MiB)")
axe.set_title(title)
# Find the start timestamp for the process and track the maximal memory point
# This currently assumes that the series were written in order
start = series['main'][0][1]
mpoint = (0, 0)
# Plot all of the series, the main process and the child.
for proc, data in series.items():
# Create the numpy arrays from the series data
ts = np.asarray([item[1] for item in data]) - start
mem = np.asarray([item[0] for item in data])
# Plot the line to the figure
plt.plot(ts, mem, "+-", label=proc)
# Detect the maximal memory point
max_mem = mem.max()
if max_mem > mpoint[1]:
mpoint = (mem.argmax(), max_mem)
# Add the marker lines for the maximal memory usage
plt.hlines(mpoint[1], plt.xlim()[0]+0.001, plt.xlim()[1] - 0.001, 'r', '--')
plt.vlines(ts[mpoint[0]], plt.ylim()[0]+0.001, plt.ylim()[1] - 0.001, 'r', '--')
# Add the legend
legend = axe.legend(loc='center left', bbox_to_anchor=(1, 0.5))
legend.get_frame().set_alpha(0.5)
axe.grid()
# Get the latest profile if no profile files were passed in.
if not args.profile:
# Glob profiles of our format and sort them.
profiles = glob.glob("mprofile_??????????????.dat")
profiles.sort()
if not profiles:
raise ValueError((
"No input file found.\nThis program looks for mprofile_*.dat "
"files generated by the `mpmprof run` command."
))
# Assign the latest profile to visualize
args.profile = profiles[0:1]
# Filter out any files that do not exist
args.profile = list(filter(os.path.exists, args.profile))
if not args.profile:
raise ValueError("No input files found!")
# For each passed in file, create a figure from the mprofile.
for path in args.profile:
axe = plot_mprofile_file(path, args.title)
if args.output:
plt.savefig(args.output)
else:
plt.show()
return "{} memory profiles plotted.".format(len(args.profile))
if __name__ == '__main__':
# Create the argument parser and subparsers for each command
parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG)
subparsers = parser.add_subparsers(title='commands')
# Add the version command
parser.add_argument('-v', '--version', action='version', version=VERSION)
# Commands defined in an dictionary for easy adding
commands = (
# Run command definition
{
'name': 'run',
'action': run_action,
'help': 'monitor the memory usage of a command',
'args': {
'--python': {
'default': False,
'action': 'store_true',
'help': 'activates extra features for Python programs',
},
'--nopython': {
'default': False,
'action': 'store_true',
'help': 'disables extra features for Python programs',
},
('-T', '--interval'): {
'type': float,
'default': 0.1,
'metavar': 'S',
'help': 'sampling period (in seconds), defaults to 0.1',
},
('-o', '--output'): {
'type': str,
'default': None,
'metavar': 'PATH',
'help': 'location to write the memory profiler output to',
},
'command': {
'nargs': argparse.REMAINDER,
'help': 'command to run and profile memory usage',
}
}
},
# Plot command definition
{
'name': 'plot',
'action': plot_action,
'help': 'plot the memory usage of a mprofile data file',
'args': {
('-t', '--title'): {
'type': str,
'default': None,
'metavar': 'S',
'help': 'set the title of the figure',
},
('-o', '--output'): {
'type': str,
'default': None,
'metavar': 'PATH',
'help': 'write the figure as a png to disk'
},
'profile': {
'nargs': '*',
'help': 'profile to plot, omit to use the latest',
}
}
}
)
# Add the commands and their arguments.
for cmd in commands:
# Create the command subparser and add the action
cmd_parser = subparsers.add_parser(cmd['name'], help=cmd['help'])
cmd_parser.set_defaults(func=cmd['action'])
# Add the arguments
for args, kwargs in cmd['args'].items():
if isinstance(args, str):
args = (args,)
cmd_parser.add_argument(*args, **kwargs)
# Handle input from the command line
args = parser.parse_args() # Parse the arguments
try:
msg = args.func(args) # Call the default function
parser.exit(0, msg+"\n") # Exit cleanly with message
except Exception as e:
parser.error(str(e)) # Exit with error
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment