Skip to content

Instantly share code, notes, and snippets.

@hussainsultan
Created May 6, 2022 21:31
Show Gist options
  • Save hussainsultan/f4184eab98e53d1c2c2404afe58bf41f to your computer and use it in GitHub Desktop.
Save hussainsultan/f4184eab98e53d1c2c2404afe58bf41f to your computer and use it in GitHub Desktop.
import pyarrow as pa
import duckdb
import numpy as np
conn = duckdb.connect()
arr1 = pa.array(np.arange(1000), type=pa.float64())
arr2 = pa.array(np.arange(1000), type=pa.float64())
t1_f = pa.table([arr1], names=["id"])
t2_f = pa.table([arr2], names=["id"])
%%time
conn.execute("select * from t1_f, t2_f where t1.id = t2.id").fetchdf()
arr1_int = pa.array(np.arange(1000), type=pa.int64())
arr2_int = pa.array(np.arange(1000), type=pa.int64())
t1_int = pa.table([arr1], names=["id"])
t2_int = pa.table([arr2], names=["id"])
%%time
conn.execute("select * from t1_int, t2_int where t1.id = t2.id").fetchdf()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment