Skip to content

Instantly share code, notes, and snippets.

@lidavidm
Last active December 6, 2024 08:17
Show Gist options
  • Save lidavidm/0752ac1d42dde6dbd785b9dd791be387 to your computer and use it in GitHub Desktop.
Save lidavidm/0752ac1d42dde6dbd785b9dd791be387 to your computer and use it in GitHub Desktop.
PostgreSQL Conference Japan 2024 Lightning Talk

Code/slides for the presentation

Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
name: temp
channels:
- conda-forge
dependencies:
- adbc-driver-manager=1.3.0=py313h928ef07_1
- adbc-driver-postgresql=1.3.0=pyh694c41f_1
- aws-c-auth=0.8.0=h9b725a8_10
- aws-c-cal=0.8.0=h5d7ee29_2
- aws-c-common=0.10.3=h5505292_0
- aws-c-compression=0.3.0=h5d7ee29_2
- aws-c-event-stream=0.5.0=h13ead76_7
- aws-c-http=0.9.1=hf483d09_2
- aws-c-io=0.15.2=h39f8ad8_2
- aws-c-mqtt=0.11.0=h68a0d7e_8
- aws-c-s3=0.7.1=h840aca7_3
- aws-c-sdkutils=0.2.1=h5d7ee29_1
- aws-checksums=0.2.2=h5d7ee29_1
- aws-crt-cpp=0.29.5=h6832833_0
- aws-sdk-cpp=1.11.449=h8577fd2_3
- azure-core-cpp=1.14.0=hd50102c_0
- azure-identity-cpp=1.10.0=hc602bab_0
- azure-storage-blobs-cpp=12.13.0=h7585a09_1
- azure-storage-common-cpp=12.8.0=h9ca1f76_1
- azure-storage-files-datalake-cpp=12.12.0=hcdd55da_1
- bzip2=1.0.8=h99b78c6_7
- c-ares=1.34.3=h5505292_1
- ca-certificates=2024.8.30=hf0a4a13_0
- cyrus-sasl=2.1.27=h60b93bd_7
- gflags=2.2.2=hf9b8971_1005
- glog=0.7.1=heb240a5_0
- greenlet=3.1.1=py313h3579c5c_0
- icu=75.1=hfee45f7_0
- importlib_resources=6.4.5=pyhd8ed1ab_0
- krb5=1.21.3=h237132a_0
- libabseil=20240722.0=cxx17_hf9b8971_1
- libadbc-driver-postgresql=1.3.0=h905962f_1
- libarrow=18.0.0=hb943b0e_9_cpu
- libarrow-acero=18.0.0=h286801f_9_cpu
- libarrow-dataset=18.0.0=h286801f_9_cpu
- libarrow-substrait=18.0.0=h6a6e5c5_9_cpu
- libblas=3.9.0=25_osxarm64_openblas
- libbrotlicommon=1.1.0=hd74edd7_2
- libbrotlidec=1.1.0=hd74edd7_2
- libbrotlienc=1.1.0=hd74edd7_2
- libcblas=3.9.0=25_osxarm64_openblas
- libcrc32c=1.1.2=hbdafb3b_0
- libcurl=8.10.1=h13a7ad3_0
- libcxx=19.1.4=ha82da77_0
- libedit=3.1.20191231=hc8eb9b7_2
- libev=4.33=h93a5062_2
- libevent=2.1.12=h2757513_1
- libexpat=2.6.4=h286801f_0
- libffi=3.4.2=h3422bc3_5
- libgfortran=5.0.0=13_2_0_hd922786_3
- libgfortran5=13.2.0=hf226fd6_3
- libgoogle-cloud=2.31.0=h8d8be31_0
- libgoogle-cloud-storage=2.31.0=h7081f7f_0
- libgrpc=1.67.1=hc70892a_0
- libiconv=1.17=h0d3ecfb_2
- liblapack=3.9.0=25_osxarm64_openblas
- libmpdec=4.0.0=h99b78c6_0
- libnghttp2=1.64.0=h6d7220d_0
- libntlm=1.4=h3422bc3_1002
- libopenblas=0.3.28=openmp_hf332438_1
- libparquet=18.0.0=hda0ea68_9_cpu
- libpq=17.2=h9b1ab17_0
- libprotobuf=5.28.2=h8f0b736_0
- libre2-11=2024.07.02=h2348fd5_1
- libsqlite=3.47.0=hbaaea75_1
- libssh2=1.11.1=h9cc3647_0
- libthrift=0.21.0=h64651cc_0
- libutf8proc=2.8.0=h1a8c8d9_0
- libxml2=2.13.5=hbbdcc80_0
- libzlib=1.3.1=h8359307_2
- llvm-openmp=19.1.4=hdb05f8b_0
- lz4-c=1.9.4=hb7217d7_0
- ncurses=6.5=h7bae524_1
- numpy=2.1.3=py313hca4752e_0
- openldap=2.6.8=h50f2afc_0
- openssl=3.4.0=h39f12f2_0
- orc=2.0.3=h121fd32_0
- pandas=2.2.3=py313h47b39a6_1
- psycopg=3.2.3=py313h2784285_1
- psycopg-c=3.2.3=py313h85ebe8c_1
- pyarrow=18.0.0=py313h39782a4_2
- pyarrow-core=18.0.0=py313hf9431ad_2_cpu
- python=3.13.0=h75c3a9f_100_cp313
- python-dateutil=2.9.0.post0=pyhff2d567_0
- python-tzdata=2024.2=pyhd8ed1ab_0
- python_abi=3.13=5_cp313
- pytz=2024.1=pyhd8ed1ab_0
- re2=2024.07.02=hcd0e937_1
- readline=8.2=h92ec313_1
- six=1.16.0=pyh6c4a22f_0
- snappy=1.2.1=hd02b534_0
- sqlalchemy=2.0.36=py313h63a2874_0
- tk=8.6.13=h5083fa2_1
- typing-extensions=4.12.2=hd8ed1ab_0
- typing_extensions=4.12.2=pyha770c72_0
- tzdata=2024b=hc8b5060_0
- xz=5.2.6=h57fd34a_0
- zipp=3.21.0=pyhd8ed1ab_0
- zstd=1.5.6=hb46c0d2_0
prefix: /Users/lidavidm/mambaforge/envs/temp
import random
import string
import timeit
import adbc_driver_postgresql.dbapi
import numpy
import pandas
import sqlalchemy
nrows = 1_000_000
alphabet = numpy.array(list(string.ascii_letters + string.digits), dtype=(numpy.str_, 1))
df = pandas.DataFrame({
"ints": numpy.random.randint(-1024, 1024, size=nrows),
"strs": [
"".join(numpy.random.choice(alphabet, random.randint(1, 10)))
for _ in range(nrows)
],
})
print(df)
engine = sqlalchemy.create_engine("postgresql+psycopg://postgres:password@localhost:5432", echo=False)
def bench(callable):
timer = timeit.Timer(callable)
times = timer.repeat(repeat=5, number=10)
print("Times:", ", ".join([f"{time:.03f}" for time in sorted(times)]))
print(f"Min: {min(times):.03f}s")
with adbc_driver_postgresql.dbapi.connect("postgresql://postgres:password@localhost:5432") as adbc_conn, \
engine.connect() as sqla_conn:
print("SQLAlchemy, write")
bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace"))
# print("SQLAlchemy, write (multi, chunk size=100)")
# bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace", chunksize=100, method="multi"))
# print("SQLAlchemy, write (multi, chunk size=1000)")
# bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace", chunksize=1000, method="multi"))
# print("SQLAlchemy, write (multi, chunk size=4000)")
# bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace", chunksize=4000, method="multi"))
print("ADBC, write")
bench(lambda: df.to_sql("adbc", con=adbc_conn, if_exists="replace"))
print("SQLAlchemy, read")
bench(lambda: pandas.read_sql_table("sqla", con=sqla_conn))
print("ADBC, read")
bench(lambda: pandas.read_sql_table("adbc", con=adbc_conn))
# ints strs
# 0 961 cR
# 1 540 b
# 2 97 lk86
# 3 -281 ME
# 4 190 HEs
# ... ... ...
# 99995 290 tVimuBkR
# 99996 -711 eckk
# 99997 637 V
# 99998 114 G2x
# 99999 -305 MwpnAFdMS9
# [100000 rows x 2 columns]
# SQLAlchemy, write
# Times: 14.217, 14.230, 14.306, 14.332, 14.473
# Min: 14.217s
# SQLAlchemy, write (multi, chunk size=100)
# Times: 54.011, 54.014, 54.179, 54.643, 55.448
# Min: 54.011s
# SQLAlchemy, write (multi, chunk size=1000)
# Times: 33.202, 33.400, 33.419, 33.456, 33.697
# Min: 33.202s
# SQLAlchemy, write (multi, chunk size=4000)
# Times: 32.910, 32.917, 32.930, 32.961, 32.992
# Min: 32.910s
# ADBC, write
# Times: 0.479, 0.523, 0.530, 0.550, 0.773
# Min: 0.479s
# SQLAlchemy, read
# Times: 0.965, 0.965, 0.967, 0.967, 0.987
# Min: 0.965s
# ADBC, read
# Times: 0.313, 0.313, 0.315, 0.383, 0.433
# Min: 0.313s
# ints strs
# 0 -965 1Dh6Xq9gr
# 1 426 nklkvg
# 2 -222 99Kr9U06Jb
# 3 -993 6ClfldXEc0
# 4 370 Pc
# ... ... ...
# 999995 -321 9h
# 999996 37 nQlBMX6vM
# 999997 -247 pCdBjHXj
# 999998 -168 w
# 999999 -478 aki
# [1000000 rows x 2 columns]
# SQLAlchemy, write
# Times: 139.436, 139.585, 139.745, 139.808, 139.856
# Min: 139.436s
# ADBC, write
# Times: 4.817, 4.849, 4.912, 4.950, 5.157
# Min: 4.817s
# SQLAlchemy, read
# Times: 7.746, 7.759, 7.879, 8.644, 8.769
# Min: 7.746s
# ADBC, read
# Times: 3.205, 3.231, 3.270, 3.271, 3.361
# Min: 3.205s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment