Last active
January 10, 2024 18:54
-
-
Save regispires/c3c5ad9e5101df06b17e79bf4c993084 to your computer and use it in GitHub Desktop.
Python Memory Utility Functions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import numpy as np | |
import pandas as pd | |
def sizeof_fmt(num, suffix='B'): | |
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: | |
if abs(num) < 1024.0: | |
return "%3.1f %s%s" % (num, unit, suffix) | |
num /= 1024.0 | |
return "%.1f %s%s" % (num, 'Yi', suffix) | |
def print_top_mem_vars(variables): | |
"""Usage: | |
mem.print_top_mem_vars(locals()) | |
mem.print_top_mem_vars(globals()) | |
""" | |
for name, size in sorted(((name, sys.getsizeof(value)) for name, value in variables.items()), key= lambda x: -x[1])[:10]: | |
print("{:>30}: {:>8}".format(name, sizeof_fmt(size))) | |
def reduce_mem_usage_automatic(df): | |
"""Reduces the memory usage of the given dataframe. | |
Parameter | |
--------- | |
df : dataframe | |
The input data to which the operation of memory reduction will be performed. | |
""" | |
start_mem = df.memory_usage().sum() / 1024**2 | |
if 'dask' in str(type(df)): | |
start_mem, = dd.compute(start_mem) | |
print("Memory usage of dataframe is {:.2f} MB".format(start_mem)) | |
cols = df.columns | |
for col in cols: | |
col_type = df[col].dtype | |
if col_type == int: | |
c_min = df[col].min() | |
c_max = df[col].max() | |
if 'dask' in str(type(df)): | |
c_min, c_max = dd.compute(c_min, c_max) | |
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: | |
df[col] = df[col].astype(np.int8) | |
elif c_min > np.iinfo(np.uint8).min and c_max < np.iinfo(np.uint8).max: | |
df[col] = df[col].astype(np.uint8) | |
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: | |
df[col] = df[col].astype(np.int16) | |
elif c_min > np.iinfo(np.uint16).min and c_max < np.iinfo(np.uint16).max: | |
df[col] = df[col].astype(np.uint16) | |
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max: | |
df[col] = df[col].astype(np.int32) | |
elif c_min > np.iinfo(np.uint32).min and c_max < np.iinfo(np.uint32).max: | |
df[col] = df[col].astype(np.uint32) | |
elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max: | |
df[col] = df[col].astype(np.int64) | |
elif c_min > np.iinfo(np.uint64).min and c_max < np.iinfo(np.uint64).max: | |
df[col] = df[col].astype(np.uint64) | |
elif col_type.char in np.typecodes["Float"] : | |
typecodes = np.typecodes["Float"] | |
float_32_char = np.dtype(np.float32).char | |
float_32_ind = typecodes.index(float_32_char) | |
typecodes = typecodes[float_32_ind:] | |
# from smallest to largest | |
for typecode in typecodes: | |
dtype = np.dtype(typecode) | |
if dtype.itemsize < df[col].dtype.itemsize: | |
df[col] = pd.core.dtypes.cast.maybe_downcast_numeric(df[col], dtype) | |
# successful conversion | |
if df[col].dtype == dtype: | |
break | |
end_mem = df.memory_usage().sum() / 1024**2 | |
if 'dask' in str(type(df)): | |
end_mem, = dd.compute(end_mem) | |
print("Memory usage after optimization is: {:.2f} MB".format(end_mem)) | |
print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Source: https://stackoverflow.com/questions/24455615/python-how-to-display-size-of-all-variables