Skip to content

Instantly share code, notes, and snippets.

@jhbuhrman
Created August 11, 2019 19:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jhbuhrman/7a23097ccd01ef93caf85ebc78b49a9a to your computer and use it in GitHub Desktop.
Save jhbuhrman/7a23097ccd01ef93caf85ebc78b49a9a to your computer and use it in GitHub Desktop.
Trying to get a grip on Pandas memory management
import gc
import pandas as pd
import numpy as np
import psutil
process = psutil.Process()
print(f"pid = {process.pid}")
dtypes = ['int64', 'float64', 'complex128', 'object', 'bool']
data = dict([(t, np.ones(shape=0x1000 * 4096).astype(t)) for t in dtypes])
data2 = dict([(t, np.zeros(shape=1).astype(t)) for t in dtypes])
gc.collect()
uss1 = process.memory_full_info().uss
print(f"uss1 = {uss1:#x}")
df = pd.DataFrame(data)
print(f"{'-' * 60}\ndf = pd.DataFrame(data)\ndf.head():...\n{df.head()}\ndf.info():...")
df.info()
gc.collect()
uss2 = process.memory_full_info().uss
print(f"uss2 = {uss2:#x} ({uss2 - uss1:+#x})")
df2 = pd.DataFrame(data)
print(f"{'-' * 60}\ndf2 = pd.DataFrame(data)\ndf2.head():...\n{df2.head()}\ndf2.info():...")
df2.info()
gc.collect()
uss3 = process.memory_full_info().uss
print(f"uss3 = {uss3:#x} ({uss3 - uss2:+#x})")
del df2; gc.collect(); df2 = df.loc[:, dtypes]
print(f"{'-' * 60}\ndel df2; gc.collect(); df2 = df.loc[:, dtypes]\ndf2.head():...\n{df2.head()}\ndf2.info():...")
df2.info()
gc.collect()
uss4 = process.memory_full_info().uss
print(f"uss4 = {uss4:#x} ({uss4 - uss3:+#x})")
df2.loc[0, :] = [0, 0, 0, 0, False]
print(f"{'-' * 60}\ndf2.loc[0, :] = [0, 0, 0, 0, False]\ndf.head():...\n{df.head()}\ndf.info():...")
df2.info()
print(f"{'-' * 60}\n...............\ndf2.head():...\n{df2.head()}\ndf2.info():...")
df2.info()
gc.collect()
uss5 = process.memory_full_info().uss
print(f"uss5 = {uss5:#x} ({uss5 - uss4:+#x})")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment