Skip to content

Instantly share code, notes, and snippets.

@mvdbeek
Created March 6, 2023 11:59
Show Gist options
  • Save mvdbeek/741dc2293dc1c6e301423dbe890eb030 to your computer and use it in GitHub Desktop.
Save mvdbeek/741dc2293dc1c6e301423dbe890eb030 to your computer and use it in GitHub Desktop.
import os
import subprocess
import time
from datetime import (
date,
datetime,
)
import click
import psutil
DEFAULT_SLEEP = 10
DEFAULT_INCREASE = 50
DEFAULT_MIN_RSS_CUTOFF = 1400
prev_rsss = {}
def memory_spike_events(sleep, increase, min_rss_cutoff):
while True:
pids = get_gunicorn_pids()
update_pids(pids)
for pid, prev_rss in prev_rsss.items():
rss = proc_rss(pid)
if abs(rss - prev_rss) > increase:
print(f"EVENT: {pid}: {prev_rss} MB -> {rss} MB ({rss - prev_rss:+} MB in {sleep}s)")
yield (pid, rss)
#else:
# print(f"{pid}: {prev_rss} MB -> {rss} MB ({rss - prev_rss:+} MB)")
prev_rsss[pid] = rss
time.sleep(sleep)
def get_gunicorn_pids():
pids = set()
for p in psutil.process_iter():
cmdline = p.cmdline()
if len(cmdline) > 1:
if cmdline[0].endswith("gunicorn") or cmdline[1].endswith("gunicorn"):
pids.add(p.pid)
return pids
def proc_rss(pid):
p = psutil.Process(pid)
return int(p.memory_info().rss / 1024 / 1024)
def update_pids(cur_pids):
prev_pids = set(prev_rsss.keys())
new_pids = cur_pids - prev_pids
stale_pids = prev_pids - cur_pids
for pid in new_pids:
rss = proc_rss(pid)
prev_rsss[pid] = rss
print(f"New pid: {pid} RSS {rss}")
for pid in stale_pids:
print(f"Stale pid: {pid}, dropped")
del prev_rsss[pid]
def run_pyspy_on_pid(pid):
output = subprocess.run(["py-spy", "dump", "--pid", str(pid)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return output.stdout
def write_dump(pid, rss, dump, directory="."):
os.makedirs(directory, exist_ok=True)
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
filename = f"{date.today()}-{current_time}-{rss}M-{pid}-dump.txt"
with open(os.path.join(directory, filename), "wb") as out:
out.write(dump)
@click.command()
@click.option("--sleep", type=click.INT, default=DEFAULT_SLEEP, help="in sec")
@click.option("--increase", type=click.INT, default=DEFAULT_INCREASE, help="in MB, over sleep period")
@click.option("--min-rss-cutoff", type=click.INT, default=DEFAULT_MIN_RSS_CUTOFF, help="in MB, to filter out restarts")
@click.argument("directory", type=click.Path(file_okay=False))
def pyspy_on_memory_spike(sleep, increase, min_rss_cutoff, directory):
for pid, rss in memory_spike_events(sleep, increase, min_rss_cutoff):
dump = run_pyspy_on_pid(pid)
write_dump(pid, rss, dump, directory)
if __name__ == "__main__":
pyspy_on_memory_spike()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment