Skip to content

Instantly share code, notes, and snippets.

@lidavidm
Last active December 6, 2024 08:17
Show Gist options
  • Select an option

  • Save lidavidm/0752ac1d42dde6dbd785b9dd791be387 to your computer and use it in GitHub Desktop.

Select an option

Save lidavidm/0752ac1d42dde6dbd785b9dd791be387 to your computer and use it in GitHub Desktop.
PostgreSQL Conference Japan 2024 Lightning Talk

Code/slides for the presentation

Display the source blob
Display the rendered blob
Raw
name: temp
channels:
- conda-forge
dependencies:
- adbc-driver-manager=1.3.0=py313h928ef07_1
- adbc-driver-postgresql=1.3.0=pyh694c41f_1
- aws-c-auth=0.8.0=h9b725a8_10
- aws-c-cal=0.8.0=h5d7ee29_2
- aws-c-common=0.10.3=h5505292_0
- aws-c-compression=0.3.0=h5d7ee29_2
- aws-c-event-stream=0.5.0=h13ead76_7
- aws-c-http=0.9.1=hf483d09_2
- aws-c-io=0.15.2=h39f8ad8_2
- aws-c-mqtt=0.11.0=h68a0d7e_8
- aws-c-s3=0.7.1=h840aca7_3
- aws-c-sdkutils=0.2.1=h5d7ee29_1
- aws-checksums=0.2.2=h5d7ee29_1
- aws-crt-cpp=0.29.5=h6832833_0
- aws-sdk-cpp=1.11.449=h8577fd2_3
- azure-core-cpp=1.14.0=hd50102c_0
- azure-identity-cpp=1.10.0=hc602bab_0
- azure-storage-blobs-cpp=12.13.0=h7585a09_1
- azure-storage-common-cpp=12.8.0=h9ca1f76_1
- azure-storage-files-datalake-cpp=12.12.0=hcdd55da_1
- bzip2=1.0.8=h99b78c6_7
- c-ares=1.34.3=h5505292_1
- ca-certificates=2024.8.30=hf0a4a13_0
- cyrus-sasl=2.1.27=h60b93bd_7
- gflags=2.2.2=hf9b8971_1005
- glog=0.7.1=heb240a5_0
- greenlet=3.1.1=py313h3579c5c_0
- icu=75.1=hfee45f7_0
- importlib_resources=6.4.5=pyhd8ed1ab_0
- krb5=1.21.3=h237132a_0
- libabseil=20240722.0=cxx17_hf9b8971_1
- libadbc-driver-postgresql=1.3.0=h905962f_1
- libarrow=18.0.0=hb943b0e_9_cpu
- libarrow-acero=18.0.0=h286801f_9_cpu
- libarrow-dataset=18.0.0=h286801f_9_cpu
- libarrow-substrait=18.0.0=h6a6e5c5_9_cpu
- libblas=3.9.0=25_osxarm64_openblas
- libbrotlicommon=1.1.0=hd74edd7_2
- libbrotlidec=1.1.0=hd74edd7_2
- libbrotlienc=1.1.0=hd74edd7_2
- libcblas=3.9.0=25_osxarm64_openblas
- libcrc32c=1.1.2=hbdafb3b_0
- libcurl=8.10.1=h13a7ad3_0
- libcxx=19.1.4=ha82da77_0
- libedit=3.1.20191231=hc8eb9b7_2
- libev=4.33=h93a5062_2
- libevent=2.1.12=h2757513_1
- libexpat=2.6.4=h286801f_0
- libffi=3.4.2=h3422bc3_5
- libgfortran=5.0.0=13_2_0_hd922786_3
- libgfortran5=13.2.0=hf226fd6_3
- libgoogle-cloud=2.31.0=h8d8be31_0
- libgoogle-cloud-storage=2.31.0=h7081f7f_0
- libgrpc=1.67.1=hc70892a_0
- libiconv=1.17=h0d3ecfb_2
- liblapack=3.9.0=25_osxarm64_openblas
- libmpdec=4.0.0=h99b78c6_0
- libnghttp2=1.64.0=h6d7220d_0
- libntlm=1.4=h3422bc3_1002
- libopenblas=0.3.28=openmp_hf332438_1
- libparquet=18.0.0=hda0ea68_9_cpu
- libpq=17.2=h9b1ab17_0
- libprotobuf=5.28.2=h8f0b736_0
- libre2-11=2024.07.02=h2348fd5_1
- libsqlite=3.47.0=hbaaea75_1
- libssh2=1.11.1=h9cc3647_0
- libthrift=0.21.0=h64651cc_0
- libutf8proc=2.8.0=h1a8c8d9_0
- libxml2=2.13.5=hbbdcc80_0
- libzlib=1.3.1=h8359307_2
- llvm-openmp=19.1.4=hdb05f8b_0
- lz4-c=1.9.4=hb7217d7_0
- ncurses=6.5=h7bae524_1
- numpy=2.1.3=py313hca4752e_0
- openldap=2.6.8=h50f2afc_0
- openssl=3.4.0=h39f12f2_0
- orc=2.0.3=h121fd32_0
- pandas=2.2.3=py313h47b39a6_1
- psycopg=3.2.3=py313h2784285_1
- psycopg-c=3.2.3=py313h85ebe8c_1
- pyarrow=18.0.0=py313h39782a4_2
- pyarrow-core=18.0.0=py313hf9431ad_2_cpu
- python=3.13.0=h75c3a9f_100_cp313
- python-dateutil=2.9.0.post0=pyhff2d567_0
- python-tzdata=2024.2=pyhd8ed1ab_0
- python_abi=3.13=5_cp313
- pytz=2024.1=pyhd8ed1ab_0
- re2=2024.07.02=hcd0e937_1
- readline=8.2=h92ec313_1
- six=1.16.0=pyh6c4a22f_0
- snappy=1.2.1=hd02b534_0
- sqlalchemy=2.0.36=py313h63a2874_0
- tk=8.6.13=h5083fa2_1
- typing-extensions=4.12.2=hd8ed1ab_0
- typing_extensions=4.12.2=pyha770c72_0
- tzdata=2024b=hc8b5060_0
- xz=5.2.6=h57fd34a_0
- zipp=3.21.0=pyhd8ed1ab_0
- zstd=1.5.6=hb46c0d2_0
prefix: /Users/lidavidm/mambaforge/envs/temp
import random
import string
import timeit
import adbc_driver_postgresql.dbapi
import numpy
import pandas
import sqlalchemy
nrows = 1_000_000
alphabet = numpy.array(list(string.ascii_letters + string.digits), dtype=(numpy.str_, 1))
df = pandas.DataFrame({
"ints": numpy.random.randint(-1024, 1024, size=nrows),
"strs": [
"".join(numpy.random.choice(alphabet, random.randint(1, 10)))
for _ in range(nrows)
],
})
print(df)
engine = sqlalchemy.create_engine("postgresql+psycopg://postgres:password@localhost:5432", echo=False)
def bench(callable):
timer = timeit.Timer(callable)
times = timer.repeat(repeat=5, number=10)
print("Times:", ", ".join([f"{time:.03f}" for time in sorted(times)]))
print(f"Min: {min(times):.03f}s")
with adbc_driver_postgresql.dbapi.connect("postgresql://postgres:password@localhost:5432") as adbc_conn, \
engine.connect() as sqla_conn:
print("SQLAlchemy, write")
bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace"))
# print("SQLAlchemy, write (multi, chunk size=100)")
# bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace", chunksize=100, method="multi"))
# print("SQLAlchemy, write (multi, chunk size=1000)")
# bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace", chunksize=1000, method="multi"))
# print("SQLAlchemy, write (multi, chunk size=4000)")
# bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace", chunksize=4000, method="multi"))
print("ADBC, write")
bench(lambda: df.to_sql("adbc", con=adbc_conn, if_exists="replace"))
print("SQLAlchemy, read")
bench(lambda: pandas.read_sql_table("sqla", con=sqla_conn))
print("ADBC, read")
bench(lambda: pandas.read_sql_table("adbc", con=adbc_conn))
# ints strs
# 0 961 cR
# 1 540 b
# 2 97 lk86
# 3 -281 ME
# 4 190 HEs
# ... ... ...
# 99995 290 tVimuBkR
# 99996 -711 eckk
# 99997 637 V
# 99998 114 G2x
# 99999 -305 MwpnAFdMS9
# [100000 rows x 2 columns]
# SQLAlchemy, write
# Times: 14.217, 14.230, 14.306, 14.332, 14.473
# Min: 14.217s
# SQLAlchemy, write (multi, chunk size=100)
# Times: 54.011, 54.014, 54.179, 54.643, 55.448
# Min: 54.011s
# SQLAlchemy, write (multi, chunk size=1000)
# Times: 33.202, 33.400, 33.419, 33.456, 33.697
# Min: 33.202s
# SQLAlchemy, write (multi, chunk size=4000)
# Times: 32.910, 32.917, 32.930, 32.961, 32.992
# Min: 32.910s
# ADBC, write
# Times: 0.479, 0.523, 0.530, 0.550, 0.773
# Min: 0.479s
# SQLAlchemy, read
# Times: 0.965, 0.965, 0.967, 0.967, 0.987
# Min: 0.965s
# ADBC, read
# Times: 0.313, 0.313, 0.315, 0.383, 0.433
# Min: 0.313s
# ints strs
# 0 -965 1Dh6Xq9gr
# 1 426 nklkvg
# 2 -222 99Kr9U06Jb
# 3 -993 6ClfldXEc0
# 4 370 Pc
# ... ... ...
# 999995 -321 9h
# 999996 37 nQlBMX6vM
# 999997 -247 pCdBjHXj
# 999998 -168 w
# 999999 -478 aki
# [1000000 rows x 2 columns]
# SQLAlchemy, write
# Times: 139.436, 139.585, 139.745, 139.808, 139.856
# Min: 139.436s
# ADBC, write
# Times: 4.817, 4.849, 4.912, 4.950, 5.157
# Min: 4.817s
# SQLAlchemy, read
# Times: 7.746, 7.759, 7.879, 8.644, 8.769
# Min: 7.746s
# ADBC, read
# Times: 3.205, 3.231, 3.270, 3.271, 3.361
# Min: 3.205s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment