Skip to content

Instantly share code, notes, and snippets.

@janjagusch
Last active March 13, 2021 13:20
Show Gist options
  • Save janjagusch/c95a21c44fe8d8a1f3384c0cb080046d to your computer and use it in GitHub Desktop.
Save janjagusch/c95a21c44fe8d8a1f3384c0cb080046d to your computer and use it in GitHub Desktop.
Takes a [folded stack samples file](https://github.com/brendangregg/FlameGraph#2-fold-stacks) and a regex pattern to to filter for specific stack frames and folds them again. This is useful when you want to profile a program that runs on multiple processes and want to aggregate stack frames regardless on which process they ran on.
import re
from collections import namedtuple
import click
OCCURENCES_PATTERN = re.compile(r"\d+$")
FoldedStack = namedtuple("FolderStack", ("stack", "occurences"))
def _create_folded_stack(line):
occurences = OCCURENCES_PATTERN.search(line).group(0)
stack = line[: -len(occurences)].strip()
return FoldedStack(stack, int(occurences))
def _filter_folded_stack(folded_stack, pattern):
search = pattern.search(folded_stack.stack)
if not search:
return None
return FoldedStack(search.group(0), folded_stack.occurences)
def _filter_folded_stacks(folded_stacks, pattern):
folded_stacks = (
_filter_folded_stack(folded_stack, pattern) for folded_stack in folded_stacks
)
return tuple(folded_stack for folded_stack in folded_stacks if folded_stack)
def _fold_folded_stacks(folded_stacks):
folded_folded_stacks = {}
for folded_stack in folded_stacks:
folded_folded_stacks[folded_stack.stack] = (
folded_folded_stacks.get(folded_stack.stack, 0) + folded_stack.occurences
)
return tuple(
FoldedStack(stack, occurences)
for stack, occurences in folded_folded_stacks.items()
)
def _read_folded_stacks(filepath):
print(f"Reading folded stacks from {filepath}.")
with open(filepath, mode="r") as fp:
return (line for line in fp.read().split("\n") if line)
def _write_folded_stacks(folded_stacks, filepath):
print(f"Writing folded stacks to {filepath}.")
with open(filepath, mode="w") as fp:
for folded_stack in folded_stacks:
fp.write(f"{folded_stack.stack} {folded_stack.occurences}\n")
def _default_outpath(inpath):
path = ".".join(inpath.split(".")[:-1])
extension = inpath.split(".")[-1]
return f"{path}-folded.{extension}"
@click.command()
@click.option("--inpath", help="Path where to read the folded stack file from.")
@click.option(
"--outpath", help="Path where to write the folded stack file to.", default=None
)
@click.option(
"--pattern",
help=r"Pattern where your call stack should start, e.g: '<module> \(compute_script.py:.*$'",
)
def main(
inpath: str, outpath: str, pattern: str,
):
"""Fold folded stack calls.
Reads in the folded stack file, filters out irrelevant stack calls,
aggreates equivalent stack calls and writes it into a file.
"""
outpath = outpath or _default_outpath(inpath)
folded_stacks = (_create_folded_stack(line) for line in _read_folded_stacks(inpath))
folded_stacks = tuple(
folded_stack for folded_stack in folded_stacks if folded_stack
)
print(f"Initial distinct stacks: {len(folded_stacks)}.")
print(
f"Initial occurences: {sum(folded_stack.occurences for folded_stack in folded_stacks)}"
)
folded_stacks = _filter_folded_stacks(folded_stacks, re.compile(pattern))
print(f"Filtered distinct stacks: {len(folded_stacks)}.")
print(
f"Filtered occurences: {sum(folded_stack.occurences for folded_stack in folded_stacks)}"
)
folded_stacks = _fold_folded_stacks(folded_stacks)
print(f"Folded distinct stacks: {len(folded_stacks)}.")
_write_folded_stacks(folded_stacks, outpath)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment