Skip to content

Instantly share code, notes, and snippets.

@fny
Created December 13, 2019 00:12
Show Gist options
  • Save fny/279e7cb3ae06acafe3178a12741ba4c0 to your computer and use it in GitHub Desktop.
Save fny/279e7cb3ae06acafe3178a12741ba4c0 to your computer and use it in GitHub Desktop.
Reduce Memory Usage of a Pandas Dataframe
import numpy as np
def shrink_df(df, categorize=False, verbose=False):
"""Reduces the memory use of a data frame by using more compact types.
Args:
df (pandas.DataFrame): The dataframe
categorize (bool): Whether strings should be converted to categorical values.
Note this may cause memory use to increase slightly.
verbose (bool): Whether to print memory savings to stdout.
Returns:
df (pandas.DataFrame) A shrunken data frame.
"""
if verbose:
start_mem = df.memory_usage().sum() / (1024**2)
print('Memory usage of dataframe: {:.2f} MB'.format(start_mem))
int_types = [np.int8, np.int16, np.int32, np.int64]
float_types = [np.float16, np.float32, np.float64]
for col in df.columns:
col_type = df[col].dtype
c_min = df[col].min()
c_max = df[col].max()
# Shrink ints
if col_type in int_types:
for int_type in int_types:
if col_type == int_type:
break
if c_min >= np.iinfo(int_type).min and c_max <= np.iinfo(int_type).max:
df[col] = df[col].astype(int_type)
break
# Shrink floats
if col_type in float_types:
for float_type in float_types:
if col_type == float_type:
break
if c_min >= np.iinfo(int_type).min and c_max <= np.iinfo(int_type).max:
df[col] = df[col].astype(float_type)
break
# Optionally treat strings as categories
if categorize and col_type == object:
df[col] = df[col].astype('category')
if verbose:
end_mem = df.memory_usage().sum() / 1024**2
print('Memory usage after optimization: {:.2f} MB'.format(end_mem))
print('Savings of {:.1f}%!'.format(100 * (start_mem - end_mem) / start_mem))
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment