Skip to content

Instantly share code, notes, and snippets.

View hussainsultan's full-sized avatar

Hussain Sultan hussainsultan

View GitHub Profile
import pyarrow as pa
import duckdb
import numpy as np
conn = duckdb.connect()
arr1 = pa.array(np.arange(1000), type=pa.float64())
arr2 = pa.array(np.arange(1000), type=pa.float64())
t1_f = pa.table([arr1], names=["id"])
t2_f = pa.table([arr2], names=["id"])
from pathlib import Path
import ibis
def create_duckdb(datadir):
db = ibis.duckdb.connect("mortgage.db")
perf_path = Path(datadir) / "perf/*.parquet"
acq_path = Path(datadir) / "acq/*.parquet"
db.con.execute(f"CREATE OR REPLACE VIEW perf AS SELECT * FROM '{perf_path}'")
import pandas as pd ; import numpy as np; import dask.dataframe as dd; from sklearn.datasets import load_boston
df = dd.from_pandas(pd.DataFrame(load_boston().data),npartitions=10)
def operation(df):
df['new'] = df[0]
return df[['new']]
df.pipe(operation).to_csv('boston*.csv')

README is empty