Skip to content

Instantly share code, notes, and snippets.

@nascheme
Created January 13, 2023 04:54
Show Gist options
  • Save nascheme/8652a77af79837a2cd6e24e88b457fc6 to your computer and use it in GitHub Desktop.
Save nascheme/8652a77af79837a2cd6e24e88b457fc6 to your computer and use it in GitHub Desktop.
# script to generate GC summary from statistics gathered with
# https://github.com/python/cpython/pull/100958
import sys
import pandas as pd
import numpy as np
import argparse
def get_gc_summary(df):
return df.groupby('generation_number').aggregate(
num_collections=('generation_number', 'count'),
mean_collected_cycles=('collected_cycles', 'mean'),
mean_time=('collection_time', 'mean'),
mean_total_objects=('total_objects', 'mean'),
total_time=('collection_time', np.sum),
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('files', nargs='+')
args = parser.parse_args()
summary = pd.DataFrame()
for fn in args.files:
df = pd.read_csv(fn)
sdf = get_gc_summary(df)
sdf['filename'] = fn
summary = pd.concat([summary, sdf])
summary['mean_time'] *= 1e9 # convert to ns
summary['time_per_total'] = summary['mean_time'] / summary['mean_total_objects']
summary['time_per_collected'] = summary['mean_time'] / summary['mean_collected_cycles']
summary['ratio_collected'] = summary['mean_collected_cycles'] / summary['mean_total_objects']
summary.to_csv(sys.stdout)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment