Skip to content

Instantly share code, notes, and snippets.

@greed2411
Last active November 7, 2023 06:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save greed2411/5d157c0fed77a1994b5bfd9217a9ddb4 to your computer and use it in GitHub Desktop.
Save greed2411/5d157c0fed77a1994b5bfd9217a9ddb4 to your computer and use it in GitHub Desktop.
finding memory allocations (and memory leak) in python source code, how many times, size in bytes with tracemalloc
"""
Python script to find the memory allocations
i. over a module/function that is running
ii. between the loops
It is a script so that it can be torn apart for your own use-cases.
Based on this nice article on trying to find memory leaks in python:
https://www.fugue.co/blog/diagnosing-and-fixing-memory-leaks-in-python.html
But the code is non-existent and isn't reproducible as of 2023.
Experiment was done using: Python 3.8 & 3.12
tracemalloc module is present from 3.4 itself
run this with,
PYTHONTRACEMALLOC=1 python profile_malloc.py
"""
import os
import linecache
import tracemalloc
import typing
from typing import List
# From docs:
# https://docs.python.org/3/library/tracemalloc.html#tracemalloc.start
# """
# Storing more than 1 frame is only useful to compute statistics grouped by 'traceback' or to compute cumulative statistics
# """
# if grouping statistics by:
# * lineno, then use it as 1 (recommended)
# * filename, then use it as 1 only (recommended)
# * traceback, then use it as 10 (recommended), 25, 500
TRACEMALLOC_NFRAMES_COUNT = int(os.getenv("PYTHONTRACEMALLOC", "1"))
# From docs:
# https://docs.python.org/3/library/tracemalloc.html#tracemalloc.Snapshot.statistics
# groupby on this key-type for allocation count, diff, size etc
# for best-results in per-module / per-function memory monitoring, set STATS_GROUPBY_KEY_TYPE = "lineno" (recommended if fn is small) or "filename" (recommended if fn is large)
# for best-results in per-iteration memory monitoring, set STATS_GROUPBY_KEY_TYPE = "lineno"
STATS_GROUPBY_KEY_TYPE = "lineno"
# STATS_GROUPBY_KEY_TYPE = "filename"
# From docs:
# https://docs.python.org/3/library/tracemalloc.html#tracemalloc.Snapshot.statistics
# """
# If cumulative is True, cumulate size and count of memory blocks of all frames of the traceback of a trace, not only the most recent frame.
# The cumulative mode can only be used with key_type equals to 'filename' and 'lineno'
# """
# if this value is set to True, it'll aggregate the allocations across all frames
# if this value is set to False, it'll show each allocation's value for the recent frame
# for best-results in per-module / per-function memory monitoring, set STATS_CUMULATIVE = True
# for best-results in per-iteration memory monitoring, set STATS_CUMULATIVE = False
STATS_CUMULATIVE = True
# saving the tracemalloc's snapshot's statistics in a readable format
STATS_OUPTUT_MD_FILE = "./out.md"
# show only top n biggest stats on allocations
STATS_TOP_N = 10
def write_stats_to_md(stats):
"""
writes tracemalloc's stats to a .md file for readability
stats: List[tracemalloc.Statistic | tracemalloc.StatisticDiff]
"""
# based on the value of STATS_CUMULATIVE, we say whether allocations are aggregated/cumulative or non-aggregated/non-cumulative for the groupby.
cumulative_string = "{}cumulative".format(["non-",""][STATS_CUMULATIVE])
if not stats:
raise ValueError("Empty stats, please recheck your filters & monitored-function's return values are appropriate.")
if isinstance(stats[0], tracemalloc.Statistic):
with open(STATS_OUPTUT_MD_FILE, "w") as fp:
fp.write(f"Top {STATS_TOP_N} highest {cumulative_string} allocations, grouped by {STATS_GROUPBY_KEY_TYPE!r} ...\n")
fp.write(f"Number of Frames stored in each Traceback of a trace' = {TRACEMALLOC_NFRAMES_COUNT}\n\n")
fp.write("```python")
for index, stat in enumerate(stats, 1):
stat_recent_traceback = stat.traceback[0]
name_of_the_py_file = stat_recent_traceback.filename
lineno_of_the_py_file = stat_recent_traceback.lineno
src_code_line_in_py_file = linecache.getline(filename=name_of_the_py_file, lineno=lineno_of_the_py_file).strip()
stat_output = f'''
#{index} {name_of_the_py_file}:{lineno_of_the_py_file}
# allocated {pretty_bytes(stat.size)}, {stat.count} times cumulatively,
# on average {pretty_bytes(stat.size / stat.count)}/allocation
{src_code_line_in_py_file}
'''
fp.write(stat_output)
fp.write("\n```")
elif isinstance(stats[0], tracemalloc.StatisticDiff):
with open(STATS_OUPTUT_MD_FILE, "w") as fp:
fp.write(f"Top {STATS_TOP_N} highest {cumulative_string} allocations, grouped by {STATS_GROUPBY_KEY_TYPE!r}, compared to previous iteration's snapshot...\n")
fp.write(f"Number of Frames stored in each Traceback of a trace' = {TRACEMALLOC_NFRAMES_COUNT}\n")
for index, stat in enumerate(stats[:10], start=1):
stat_output = "\n"
stat_output += f"#{index}\n"
stat_output += f'Diff: {pretty_bytes(stat.size_diff)} newly allocated for {stat.count_diff} memory blocks.\n'
stat_output += f'Total: {pretty_bytes(stat.size)} allocated for a total of {stat.count} memory blocks.\n'
stat_output += f'Allocation Stack Traceback: \n'
stat_output += "```python\n"
stat_output += "\n".join(stat.traceback.format())
stat_output += "\n```"
fp.write(stat_output)
def pretty_bytes(byte_size) -> str:
"""
converts an integer or float representing bytes into B, KB, MB or GB.
byte_size (int | float) : value representing the allocated bytes.
return: str
"""
if not isinstance(byte_size, int):
byte_size = int(byte_size)
if byte_size > 1e9:
return f"{round(byte_size / 1e9, 1)} GB"
elif byte_size > 1e6:
return f"{round(byte_size / 1e6, 1)} MB"
elif byte_size > 1e3:
return f"{round(byte_size/ 1e3, 1)} KB"
return f"{byte_size} B"
def allocate_alot_of_memory(_: int) -> str:
"""Allocate a large amount of memory."""
z = "abcdefghijklmnopqrstuvwxyz" * 500_000
return z
def function_that_needs_memory_monitoring() -> List[str]:
# the function or module's code you want to monitor
values = []
for i in range(200):
returned_value = allocate_alot_of_memory(i)
values.append(returned_value)
return values
# end of application code
def per_function_memory_monitoring_usecase():
"""
Monitor memory usage while executing a function and
return values with a tracemalloc's snapshot (taken at the end)
"""
snapshot = None
tracemalloc.start(TRACEMALLOC_NFRAMES_COUNT)
try:
# this should return something, for it to be monitored
# since atleast one reference holding other objects references
# should exist.
# OR atleast modify something globally
values = function_that_needs_memory_monitoring()
except Exception as e:
print(e)
finally:
snapshot = tracemalloc.take_snapshot()
tracemalloc.stop()
return values, snapshot
def per_iteration_memory_monitoring_usecase():
"""
Monitor memory usage per iteration while executing a function and
return values with tracemalloc's snapshots (take at each iteration)
"""
tracemalloc.start(TRACEMALLOC_NFRAMES_COUNT)
snapshots = []
values = []
for i in range(200):
returned_value = allocate_alot_of_memory(i)
values.append(returned_value)
snapshots.append(tracemalloc.take_snapshot())
return values, snapshots
def stats_on_diff_snapshots(snapshots: List[tracemalloc.Snapshot], snapshot_idx: int, filters: List[tracemalloc.Filter]) -> List[tracemalloc.StatisticDiff]:
"""Calculate memory stats between two snapshots and return the top N differences."""
first_snapshot: tracemalloc.Snapshot = snapshots[snapshot_idx].filter_traces(filters)
second_snapshot: tracemalloc.Snapshot = snapshots[snapshot_idx + 1].filter_traces(filters)
# check comments, at the top, for STATS_GROUPBY_KEY_TYPE and STATS_CUMULATIVE
stats = second_snapshot.compare_to(first_snapshot, key_type=STATS_GROUPBY_KEY_TYPE, cumulative=STATS_CUMULATIVE)
return stats[:STATS_TOP_N]
def stats_on_snapshot(snapshot: tracemalloc.Snapshot, filters: List[tracemalloc.Filter]) -> List[tracemalloc.Statistic]:
"""Calculate memory stats for a single snapshot and return the top N statistics."""
snapshot = snapshot.filter_traces(filters=filters)
stats = snapshot.statistics(key_type=STATS_GROUPBY_KEY_TYPE, cumulative=STATS_CUMULATIVE)
return stats[:STATS_TOP_N]
def main():
default_filters = [
tracemalloc.Filter(inclusive=False, filename_pattern="<frozen*"),
tracemalloc.Filter(inclusive=False, filename_pattern="<unknown>"),
tracemalloc.Filter(inclusive=False, filename_pattern=typing.__file__), # ignoring the typing module's imports in memory allocations
tracemalloc.Filter(inclusive=False, filename_pattern=tracemalloc.__file__), # ignoring the tracing module's overhead in memory allocations
tracemalloc.Filter(inclusive=False, filename_pattern=linecache.__file__), # ignoring the linecache module's overhead, while trying to fetch source code related to memory alloations
]
# if you want to include/exclude per module specific logs you can add them here.
# refer to Filter docs here: https://docs.python.org/3/library/tracemalloc.html#filter
# recommend you to start out with no new filters, as `stats` will be empty even on a wrong inclusive=True filename_pattern
filters = default_filters + [
# tracemalloc.Filter(inclusive=True, filename_pattern="*/profile_malloc.py"), # show only this python file's allocations
# tracemalloc.Filter(inclusive=True, filename_pattern="*requests*"), # show only requests module's allocations
]
# example code for per-module or per-function memory monitoring usecases, running it, collecting snapshots etc.
_, snapshot = per_function_memory_monitoring_usecase()
stats = stats_on_snapshot(snapshot, filters=filters)
# just a convenience, if you want the dump on terminal
# swap all fp.write with print statements
write_stats_to_md(stats)
# per-iteration memory monitoring usecases
# uncomment the below code to try it out.
# _, snapshots = per_iteration_memory_monitoring_usecase()
# # penultimate_snapshot_pos = len(snapshots) - 2 # for going from reverse if you want.
# # we'll be comparing snapshots between 0th and 1st iteration,
# # to see the net memory allocations, between the loop
# stats = stats_on_diff_snapshots(snapshots, snapshot_idx=0, filters=filters)
# # just a convenience, if you want the dump on terminal
# # swap all fp.write with print statements
# write_stats_to_md(stats)
if __name__ == "__main__":
main()
# PYTHONTRACEMALLOC=1 python profile_malloc.py
# TODO:
# [ ] recurrent functions
# [ ] asyncio coroutine iterations, gathering
# [ ] async recurring coroutines (webscrapers)
# [ ] Nvidia GPU malloc tracing using torch.cuda # https://pytorch.org/docs/stable/cuda.html#memory-management
# [ ] monitor memory usage % (y-axis) for a running python process over time (x-axis) using psutils, https://github.com/pythonprofilers/memory_profiler (now dead)
# [ ] monitor runtime metrics of memory allocations, uncollected objects from GC pov
# https://docs.python.org/3/library/gc.html#gc.garbage
# stackimpact-python (now dead) https://stackoverflow.com/a/44858750/6905674
# https://github.com/stackimpact/stackimpact-python/blob/master/stackimpact/profilers/allocation_profiler.py
# [ ] tracemalloc per-function monitoring as a context manager
@greed2411
Copy link
Author

greed2411 commented Nov 1, 2023

For seeing a particular python function's memory allocations line by line, cumulatively

this is how the output looks like for tracemalloc trying to monitor a particular function's memory allocations.
by lineno and aggregated.

Top 10 highest cumulative allocations, grouped by 'lineno' ...
Number of Frames stored in each Traceback of a trace' = 1

                #1 profile_malloc.py:159
                #        allocated 2.6 GB, 200 times cumulatively, 
                #        on average 13.0 MB/allocation
                        z = "abcdefghijklmnopqrstuvwxyz" * 500_000
                
                #2 /home/dazai/.pyenv/versions/3.8.11/lib/python3.8/abc.py:85
                #        allocated 112.4 KB, 520 times cumulatively, 
                #        on average 216 B/allocation
                        cls = super().__new__(mcls, name, bases, namespace, **kwargs)

                  ....

@greed2411
Copy link
Author

greed2411 commented Nov 1, 2023

For seeing a particular python code's memory allocations in-between a loop, non-cumulatively

this is how the output looks like for tracemalloc trying to monitor a particular iterations's memory allocations diff (net new allocations)
by filename (each python file) and non-aggregated.

Top 10 highest non-cumulative allocations, grouped by 'filename', compared to previous iteration's snapshot...
Number of Frames stored in each Traceback of a trace' = 1

#1
Diff: 13.0 MB newly allocated for -1 memory blocks.
Total: 26.0 MB allocated for a total of 44 memory blocks.
Allocation Stack Traceback:

  File "profile_malloc.py", line 0

#2
Diff: 0 B newly allocated for 0 memory blocks.
Total: 181.6 KB allocated for a total of 1027 memory blocks.
Allocation Stack Traceback:

  File "/home/dazai/.pyenv/versions/3.8.11/lib/python3.8/abc.py", line 0

...

@greed2411
Copy link
Author

greed2411 commented Nov 1, 2023

actual memory leak example, code from OpenAI's ChatGPT-3.5

replacing per_iteration_memory_monitoring_usecase content with this,

def per_iteration_memory_monitoring_usecase():

    tracemalloc.start(TRACEMALLOC_NFRAMES_COUNT)
    snapshots = []

    # Memory leak example
    leaked_list = []

    def append_to_list():
        for i in range(10_000):
            huge_string = "This is a long string that will leak memory." * 1000
            leaked_list.append(huge_string)
            del huge_string

    j = 0
    while j <= 2:

        append_to_list()
        j += 1

        snapshots.append(tracemalloc.take_snapshot())

    return leaked_list, snapshots

and lineno based non-aggregated memory allocations profiling gives us this:

we get this as the highest allocating candidate line, just in between the while loop (of j going from 0 to 1):


Top 10 highest non-cumulative allocations, grouped by 'lineno', compared to previous iteration's snapshot...
Number of Frames stored in each Traceback of a trace' = 1

#1
Diff: 440.5 MB newly allocated for 10000 memory blocks.
Total: 881.0 MB allocated for a total of 20000 memory blocks.
Allocation Stack Traceback:

  File "profile_malloc.py", line 211
    huge_string = "This is a long string that will leak memory." * 1000

#2
Diff: 90.4 KB newly allocated for 0 memory blocks.
Total: 178.0 KB allocated for a total of 1 memory blocks.
Allocation Stack Traceback:

  File "profile_malloc.py", line 212
    leaked_list.append(huge_string)

the above should be read in mind, that the GC is not able to collect it (no de-allocation negative diff line for del huge_string) because the leaked_list holds the reference for the allocated huge_string,
it is a valid python code only. But can quickly go OOM.

this following line only deletes the reference to the object, does not guarantee the deletion of the object from memory

del huge_string

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment