Skip to content

Instantly share code, notes, and snippets.

@Diyago
Created September 8, 2018 10:56
Show Gist options
  • Save Diyago/c2a67ff01897685b08e75c5be5b6debb to your computer and use it in GitHub Desktop.
Save Diyago/c2a67ff01897685b08e75c5be5b6debb to your computer and use it in GitHub Desktop.
Reducing memory usage by pandas dataframe
def reduce_mem_usage(data, verbose = True):
start_mem = data.memory_usage().sum() / 1024**2
if verbose:
print('Memory usage of dataframe: {:.2f} MB'.format(start_mem))
for col in data.columns:
col_type = data[col].dtype
if col_type != object:
c_min = data[col].min()
c_max = data[col].max()
if str(col_type)[:3] == 'int':
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
data[col] = data[col].astype(np.int8)
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
data[col] = data[col].astype(np.int16)
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
data[col] = data[col].astype(np.int32)
elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
data[col] = data[col].astype(np.int64)
else:
if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
data[col] = data[col].astype(np.float16)
elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
data[col] = data[col].astype(np.float32)
else:
data[col] = data[col].astype(np.float64)
end_mem = data.memory_usage().sum() / 1024**2
if verbose:
print('Memory usage after optimization: {:.2f} MB'.format(end_mem))
print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
return data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment