Last active
March 13, 2021 13:20
-
-
Save janjagusch/c95a21c44fe8d8a1f3384c0cb080046d to your computer and use it in GitHub Desktop.
Takes a [folded stack samples file](https://github.com/brendangregg/FlameGraph#2-fold-stacks) and a regex pattern to to filter for specific stack frames and folds them again. This is useful when you want to profile a program that runs on multiple processes and want to aggregate stack frames regardless on which process they ran on.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from collections import namedtuple | |
import click | |
OCCURENCES_PATTERN = re.compile(r"\d+$") | |
FoldedStack = namedtuple("FolderStack", ("stack", "occurences")) | |
def _create_folded_stack(line): | |
occurences = OCCURENCES_PATTERN.search(line).group(0) | |
stack = line[: -len(occurences)].strip() | |
return FoldedStack(stack, int(occurences)) | |
def _filter_folded_stack(folded_stack, pattern): | |
search = pattern.search(folded_stack.stack) | |
if not search: | |
return None | |
return FoldedStack(search.group(0), folded_stack.occurences) | |
def _filter_folded_stacks(folded_stacks, pattern): | |
folded_stacks = ( | |
_filter_folded_stack(folded_stack, pattern) for folded_stack in folded_stacks | |
) | |
return tuple(folded_stack for folded_stack in folded_stacks if folded_stack) | |
def _fold_folded_stacks(folded_stacks): | |
folded_folded_stacks = {} | |
for folded_stack in folded_stacks: | |
folded_folded_stacks[folded_stack.stack] = ( | |
folded_folded_stacks.get(folded_stack.stack, 0) + folded_stack.occurences | |
) | |
return tuple( | |
FoldedStack(stack, occurences) | |
for stack, occurences in folded_folded_stacks.items() | |
) | |
def _read_folded_stacks(filepath): | |
print(f"Reading folded stacks from {filepath}.") | |
with open(filepath, mode="r") as fp: | |
return (line for line in fp.read().split("\n") if line) | |
def _write_folded_stacks(folded_stacks, filepath): | |
print(f"Writing folded stacks to {filepath}.") | |
with open(filepath, mode="w") as fp: | |
for folded_stack in folded_stacks: | |
fp.write(f"{folded_stack.stack} {folded_stack.occurences}\n") | |
def _default_outpath(inpath): | |
path = ".".join(inpath.split(".")[:-1]) | |
extension = inpath.split(".")[-1] | |
return f"{path}-folded.{extension}" | |
@click.command() | |
@click.option("--inpath", help="Path where to read the folded stack file from.") | |
@click.option( | |
"--outpath", help="Path where to write the folded stack file to.", default=None | |
) | |
@click.option( | |
"--pattern", | |
help=r"Pattern where your call stack should start, e.g: '<module> \(compute_script.py:.*$'", | |
) | |
def main( | |
inpath: str, outpath: str, pattern: str, | |
): | |
"""Fold folded stack calls. | |
Reads in the folded stack file, filters out irrelevant stack calls, | |
aggreates equivalent stack calls and writes it into a file. | |
""" | |
outpath = outpath or _default_outpath(inpath) | |
folded_stacks = (_create_folded_stack(line) for line in _read_folded_stacks(inpath)) | |
folded_stacks = tuple( | |
folded_stack for folded_stack in folded_stacks if folded_stack | |
) | |
print(f"Initial distinct stacks: {len(folded_stacks)}.") | |
print( | |
f"Initial occurences: {sum(folded_stack.occurences for folded_stack in folded_stacks)}" | |
) | |
folded_stacks = _filter_folded_stacks(folded_stacks, re.compile(pattern)) | |
print(f"Filtered distinct stacks: {len(folded_stacks)}.") | |
print( | |
f"Filtered occurences: {sum(folded_stack.occurences for folded_stack in folded_stacks)}" | |
) | |
folded_stacks = _fold_folded_stacks(folded_stacks) | |
print(f"Folded distinct stacks: {len(folded_stacks)}.") | |
_write_folded_stacks(folded_stacks, outpath) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment