Skip to content

Instantly share code, notes, and snippets.

@regispires
Last active January 10, 2024 18:54
Show Gist options
  • Save regispires/c3c5ad9e5101df06b17e79bf4c993084 to your computer and use it in GitHub Desktop.
Save regispires/c3c5ad9e5101df06b17e79bf4c993084 to your computer and use it in GitHub Desktop.
Python Memory Utility Functions
import sys
import numpy as np
import pandas as pd
def sizeof_fmt(num, suffix='B'):
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f %s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f %s%s" % (num, 'Yi', suffix)
def print_top_mem_vars(variables):
"""Usage:
mem.print_top_mem_vars(locals())
mem.print_top_mem_vars(globals())
"""
for name, size in sorted(((name, sys.getsizeof(value)) for name, value in variables.items()), key= lambda x: -x[1])[:10]:
print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))
def reduce_mem_usage_automatic(df):
"""Reduces the memory usage of the given dataframe.
Parameter
---------
df : dataframe
The input data to which the operation of memory reduction will be performed.
"""
start_mem = df.memory_usage().sum() / 1024**2
if 'dask' in str(type(df)):
start_mem, = dd.compute(start_mem)
print("Memory usage of dataframe is {:.2f} MB".format(start_mem))
cols = df.columns
for col in cols:
col_type = df[col].dtype
if col_type == int:
c_min = df[col].min()
c_max = df[col].max()
if 'dask' in str(type(df)):
c_min, c_max = dd.compute(c_min, c_max)
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
df[col] = df[col].astype(np.int8)
elif c_min > np.iinfo(np.uint8).min and c_max < np.iinfo(np.uint8).max:
df[col] = df[col].astype(np.uint8)
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
df[col] = df[col].astype(np.int16)
elif c_min > np.iinfo(np.uint16).min and c_max < np.iinfo(np.uint16).max:
df[col] = df[col].astype(np.uint16)
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
df[col] = df[col].astype(np.int32)
elif c_min > np.iinfo(np.uint32).min and c_max < np.iinfo(np.uint32).max:
df[col] = df[col].astype(np.uint32)
elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
df[col] = df[col].astype(np.int64)
elif c_min > np.iinfo(np.uint64).min and c_max < np.iinfo(np.uint64).max:
df[col] = df[col].astype(np.uint64)
elif col_type.char in np.typecodes["Float"] :
typecodes = np.typecodes["Float"]
float_32_char = np.dtype(np.float32).char
float_32_ind = typecodes.index(float_32_char)
typecodes = typecodes[float_32_ind:]
# from smallest to largest
for typecode in typecodes:
dtype = np.dtype(typecode)
if dtype.itemsize < df[col].dtype.itemsize:
df[col] = pd.core.dtypes.cast.maybe_downcast_numeric(df[col], dtype)
# successful conversion
if df[col].dtype == dtype:
break
end_mem = df.memory_usage().sum() / 1024**2
if 'dask' in str(type(df)):
end_mem, = dd.compute(end_mem)
print("Memory usage after optimization is: {:.2f} MB".format(end_mem))
print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment