Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glassresistor/fab72b5e30f7c8f01f1d1d09e403176b to your computer and use it in GitHub Desktop.
Save glassresistor/fab72b5e30f7c8f01f1d1d09e403176b to your computer and use it in GitHub Desktop.
import sys
import pandas as pd
import numpy as np
import time
import sqlalchemy as sa
import gc
from guppy import hpy
def obj_size_fmt(num):
if num < 10 ** 3:
return "{:.2f}{}".format(num, "B")
elif (num >= 10 ** 3) & (num < 10 ** 6):
return "{:.2f}{}".format(num / (1.024 * 10 ** 3), "KB")
elif (num >= 10 ** 6) & (num < 10 ** 9):
return "{:.2f}{}".format(num / (1.024 * 10 ** 6), "MB")
else:
return "{:.2f}{}".format(num / (1.024 * 10 ** 9), "GB")
def memory_usage():
memory_usage_by_variable = pd.DataFrame(
{k: sys.getsizeof(v) for (k, v) in globals().items()}, index=["Size"]
)
memory_usage_by_variable = memory_usage_by_variable.T
memory_usage_by_variable = memory_usage_by_variable.sort_values(
by="Size", ascending=False
).head(10)
memory_usage_by_variable["Size"] = memory_usage_by_variable["Size"].apply(
lambda x: obj_size_fmt(x)
)
return memory_usage_by_variable
heap = hpy()
ETL_DATABASE_CONNECTION = "postgresql+psycopg2:///test_etl"
SQL_CONNECTION = sa.create_engine(ETL_DATABASE_CONNECTION)
index = range(0, 10)
for i in index:
print(i)
df = pd.DataFrame(np.random.random((10 ** 5, 200)))
s = time.time()
df.to_sql(
name="random", if_exists="replace", con=SQL_CONNECTION, chunksize=100
)
print((time.time() - s))
print(df.info(memory_usage="deep"))
breakpoint()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment