Code/slides for the presentation
Last active
December 6, 2024 08:17
-
-
Save lidavidm/0752ac1d42dde6dbd785b9dd791be387 to your computer and use it in GitHub Desktop.
PostgreSQL Conference Japan 2024 Lightning Talk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: temp | |
channels: | |
- conda-forge | |
dependencies: | |
- adbc-driver-manager=1.3.0=py313h928ef07_1 | |
- adbc-driver-postgresql=1.3.0=pyh694c41f_1 | |
- aws-c-auth=0.8.0=h9b725a8_10 | |
- aws-c-cal=0.8.0=h5d7ee29_2 | |
- aws-c-common=0.10.3=h5505292_0 | |
- aws-c-compression=0.3.0=h5d7ee29_2 | |
- aws-c-event-stream=0.5.0=h13ead76_7 | |
- aws-c-http=0.9.1=hf483d09_2 | |
- aws-c-io=0.15.2=h39f8ad8_2 | |
- aws-c-mqtt=0.11.0=h68a0d7e_8 | |
- aws-c-s3=0.7.1=h840aca7_3 | |
- aws-c-sdkutils=0.2.1=h5d7ee29_1 | |
- aws-checksums=0.2.2=h5d7ee29_1 | |
- aws-crt-cpp=0.29.5=h6832833_0 | |
- aws-sdk-cpp=1.11.449=h8577fd2_3 | |
- azure-core-cpp=1.14.0=hd50102c_0 | |
- azure-identity-cpp=1.10.0=hc602bab_0 | |
- azure-storage-blobs-cpp=12.13.0=h7585a09_1 | |
- azure-storage-common-cpp=12.8.0=h9ca1f76_1 | |
- azure-storage-files-datalake-cpp=12.12.0=hcdd55da_1 | |
- bzip2=1.0.8=h99b78c6_7 | |
- c-ares=1.34.3=h5505292_1 | |
- ca-certificates=2024.8.30=hf0a4a13_0 | |
- cyrus-sasl=2.1.27=h60b93bd_7 | |
- gflags=2.2.2=hf9b8971_1005 | |
- glog=0.7.1=heb240a5_0 | |
- greenlet=3.1.1=py313h3579c5c_0 | |
- icu=75.1=hfee45f7_0 | |
- importlib_resources=6.4.5=pyhd8ed1ab_0 | |
- krb5=1.21.3=h237132a_0 | |
- libabseil=20240722.0=cxx17_hf9b8971_1 | |
- libadbc-driver-postgresql=1.3.0=h905962f_1 | |
- libarrow=18.0.0=hb943b0e_9_cpu | |
- libarrow-acero=18.0.0=h286801f_9_cpu | |
- libarrow-dataset=18.0.0=h286801f_9_cpu | |
- libarrow-substrait=18.0.0=h6a6e5c5_9_cpu | |
- libblas=3.9.0=25_osxarm64_openblas | |
- libbrotlicommon=1.1.0=hd74edd7_2 | |
- libbrotlidec=1.1.0=hd74edd7_2 | |
- libbrotlienc=1.1.0=hd74edd7_2 | |
- libcblas=3.9.0=25_osxarm64_openblas | |
- libcrc32c=1.1.2=hbdafb3b_0 | |
- libcurl=8.10.1=h13a7ad3_0 | |
- libcxx=19.1.4=ha82da77_0 | |
- libedit=3.1.20191231=hc8eb9b7_2 | |
- libev=4.33=h93a5062_2 | |
- libevent=2.1.12=h2757513_1 | |
- libexpat=2.6.4=h286801f_0 | |
- libffi=3.4.2=h3422bc3_5 | |
- libgfortran=5.0.0=13_2_0_hd922786_3 | |
- libgfortran5=13.2.0=hf226fd6_3 | |
- libgoogle-cloud=2.31.0=h8d8be31_0 | |
- libgoogle-cloud-storage=2.31.0=h7081f7f_0 | |
- libgrpc=1.67.1=hc70892a_0 | |
- libiconv=1.17=h0d3ecfb_2 | |
- liblapack=3.9.0=25_osxarm64_openblas | |
- libmpdec=4.0.0=h99b78c6_0 | |
- libnghttp2=1.64.0=h6d7220d_0 | |
- libntlm=1.4=h3422bc3_1002 | |
- libopenblas=0.3.28=openmp_hf332438_1 | |
- libparquet=18.0.0=hda0ea68_9_cpu | |
- libpq=17.2=h9b1ab17_0 | |
- libprotobuf=5.28.2=h8f0b736_0 | |
- libre2-11=2024.07.02=h2348fd5_1 | |
- libsqlite=3.47.0=hbaaea75_1 | |
- libssh2=1.11.1=h9cc3647_0 | |
- libthrift=0.21.0=h64651cc_0 | |
- libutf8proc=2.8.0=h1a8c8d9_0 | |
- libxml2=2.13.5=hbbdcc80_0 | |
- libzlib=1.3.1=h8359307_2 | |
- llvm-openmp=19.1.4=hdb05f8b_0 | |
- lz4-c=1.9.4=hb7217d7_0 | |
- ncurses=6.5=h7bae524_1 | |
- numpy=2.1.3=py313hca4752e_0 | |
- openldap=2.6.8=h50f2afc_0 | |
- openssl=3.4.0=h39f12f2_0 | |
- orc=2.0.3=h121fd32_0 | |
- pandas=2.2.3=py313h47b39a6_1 | |
- psycopg=3.2.3=py313h2784285_1 | |
- psycopg-c=3.2.3=py313h85ebe8c_1 | |
- pyarrow=18.0.0=py313h39782a4_2 | |
- pyarrow-core=18.0.0=py313hf9431ad_2_cpu | |
- python=3.13.0=h75c3a9f_100_cp313 | |
- python-dateutil=2.9.0.post0=pyhff2d567_0 | |
- python-tzdata=2024.2=pyhd8ed1ab_0 | |
- python_abi=3.13=5_cp313 | |
- pytz=2024.1=pyhd8ed1ab_0 | |
- re2=2024.07.02=hcd0e937_1 | |
- readline=8.2=h92ec313_1 | |
- six=1.16.0=pyh6c4a22f_0 | |
- snappy=1.2.1=hd02b534_0 | |
- sqlalchemy=2.0.36=py313h63a2874_0 | |
- tk=8.6.13=h5083fa2_1 | |
- typing-extensions=4.12.2=hd8ed1ab_0 | |
- typing_extensions=4.12.2=pyha770c72_0 | |
- tzdata=2024b=hc8b5060_0 | |
- xz=5.2.6=h57fd34a_0 | |
- zipp=3.21.0=pyhd8ed1ab_0 | |
- zstd=1.5.6=hb46c0d2_0 | |
prefix: /Users/lidavidm/mambaforge/envs/temp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import string | |
import timeit | |
import adbc_driver_postgresql.dbapi | |
import numpy | |
import pandas | |
import sqlalchemy | |
nrows = 1_000_000 | |
alphabet = numpy.array(list(string.ascii_letters + string.digits), dtype=(numpy.str_, 1)) | |
df = pandas.DataFrame({ | |
"ints": numpy.random.randint(-1024, 1024, size=nrows), | |
"strs": [ | |
"".join(numpy.random.choice(alphabet, random.randint(1, 10))) | |
for _ in range(nrows) | |
], | |
}) | |
print(df) | |
engine = sqlalchemy.create_engine("postgresql+psycopg://postgres:password@localhost:5432", echo=False) | |
def bench(callable): | |
timer = timeit.Timer(callable) | |
times = timer.repeat(repeat=5, number=10) | |
print("Times:", ", ".join([f"{time:.03f}" for time in sorted(times)])) | |
print(f"Min: {min(times):.03f}s") | |
with adbc_driver_postgresql.dbapi.connect("postgresql://postgres:password@localhost:5432") as adbc_conn, \ | |
engine.connect() as sqla_conn: | |
print("SQLAlchemy, write") | |
bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace")) | |
# print("SQLAlchemy, write (multi, chunk size=100)") | |
# bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace", chunksize=100, method="multi")) | |
# print("SQLAlchemy, write (multi, chunk size=1000)") | |
# bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace", chunksize=1000, method="multi")) | |
# print("SQLAlchemy, write (multi, chunk size=4000)") | |
# bench(lambda: df.to_sql("sqla", con=sqla_conn, if_exists="replace", chunksize=4000, method="multi")) | |
print("ADBC, write") | |
bench(lambda: df.to_sql("adbc", con=adbc_conn, if_exists="replace")) | |
print("SQLAlchemy, read") | |
bench(lambda: pandas.read_sql_table("sqla", con=sqla_conn)) | |
print("ADBC, read") | |
bench(lambda: pandas.read_sql_table("adbc", con=adbc_conn)) | |
# ints strs | |
# 0 961 cR | |
# 1 540 b | |
# 2 97 lk86 | |
# 3 -281 ME | |
# 4 190 HEs | |
# ... ... ... | |
# 99995 290 tVimuBkR | |
# 99996 -711 eckk | |
# 99997 637 V | |
# 99998 114 G2x | |
# 99999 -305 MwpnAFdMS9 | |
# [100000 rows x 2 columns] | |
# SQLAlchemy, write | |
# Times: 14.217, 14.230, 14.306, 14.332, 14.473 | |
# Min: 14.217s | |
# SQLAlchemy, write (multi, chunk size=100) | |
# Times: 54.011, 54.014, 54.179, 54.643, 55.448 | |
# Min: 54.011s | |
# SQLAlchemy, write (multi, chunk size=1000) | |
# Times: 33.202, 33.400, 33.419, 33.456, 33.697 | |
# Min: 33.202s | |
# SQLAlchemy, write (multi, chunk size=4000) | |
# Times: 32.910, 32.917, 32.930, 32.961, 32.992 | |
# Min: 32.910s | |
# ADBC, write | |
# Times: 0.479, 0.523, 0.530, 0.550, 0.773 | |
# Min: 0.479s | |
# SQLAlchemy, read | |
# Times: 0.965, 0.965, 0.967, 0.967, 0.987 | |
# Min: 0.965s | |
# ADBC, read | |
# Times: 0.313, 0.313, 0.315, 0.383, 0.433 | |
# Min: 0.313s | |
# ints strs | |
# 0 -965 1Dh6Xq9gr | |
# 1 426 nklkvg | |
# 2 -222 99Kr9U06Jb | |
# 3 -993 6ClfldXEc0 | |
# 4 370 Pc | |
# ... ... ... | |
# 999995 -321 9h | |
# 999996 37 nQlBMX6vM | |
# 999997 -247 pCdBjHXj | |
# 999998 -168 w | |
# 999999 -478 aki | |
# [1000000 rows x 2 columns] | |
# SQLAlchemy, write | |
# Times: 139.436, 139.585, 139.745, 139.808, 139.856 | |
# Min: 139.436s | |
# ADBC, write | |
# Times: 4.817, 4.849, 4.912, 4.950, 5.157 | |
# Min: 4.817s | |
# SQLAlchemy, read | |
# Times: 7.746, 7.759, 7.879, 8.644, 8.769 | |
# Min: 7.746s | |
# ADBC, read | |
# Times: 3.205, 3.231, 3.270, 3.271, 3.361 | |
# Min: 3.205s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment