Skip to content

Instantly share code, notes, and snippets.

@deedy5
Created August 28, 2022 11:34
Show Gist options
  • Save deedy5/093e6b9b73cda98c019eacf3e64591f4 to your computer and use it in GitHub Desktop.
Save deedy5/093e6b9b73cda98c019eacf3e64591f4 to your computer and use it in GitHub Desktop.
Reduce pandas dataframe memory size
def df_reduce_memory(df):
"""Reduce pandas dataframe memory size
Args:
df (pd.DataFrame): pandas dataframe
Returns:
pd.DataFrame: reduced pandas dataframe
"""
# Example: df = pd.read_csv(data_dir, parse_dates=True, keep_date_col=True)
start_mem = df.memory_usage(deep=True).sum() / 1024**2
print(f"Memory usage of dataframe is {start_mem:.2f} MB")
for col in df.columns:
col_type = df[col].dtype
if col_type != object:
c_min = df[col].min()
c_max = df[col].max()
if str(col_type)[:3] == "int":
for int_type in (np.int8, np.int16, np.int32, np.int64):
if np.iinfo(int_type).min < c_min and np.iinfo(int_type).max > c_max:
df[col] = df[col].astype(int_type)
break
else:
for float_type in (np.float16, np.float32, np.float64):
if np.finfo(float_type).min < c_min and np.finfo(float_type).max > c_max:
df[col] = df[col].astype(float_type)
break
else:
# if column type = object. Convert to category if unique rows <= 20%.
if df[col].nunique() / df[col].size * 100 <= 20:
df[col] = df[col].astype("category")
end_mem = df.memory_usage(deep=True).sum() / 1024**2
print(f"Memory usage after optimization is: {end_mem:.2f} MB")
print(f"Decreased by {100 * (start_mem - end_mem) / start_mem:.1f}%")
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment