Skip to content

Instantly share code, notes, and snippets.

View pdet's full-sized avatar
🤙

Pedro Holanda pdet

🤙
View GitHub Profile
import threading
import psutil
import resource
import duckdb
import pyarrow as pa
import os
import time
def create_db():
con = duckdb.connect("duck.db")
@pdet
pdet / external_bench.py
Created May 25, 2023 13:07
Compare UDFs and External Function Execution
import duckdb
import pyarrow as pa
import pandas as pd
import time
def time_function(function):
res = []
for i in range (0,5):
start_time = time.monotonic()
ans = function()
@pdet
pdet / udf_bench_arrow_builtin.py
Created May 25, 2023 11:14
Compare DuckDB UDFS: Built-In UDFs vs PyArrow UDFs
import duckdb
import pyarrow as pa
import pandas as pd
import time
import pyarrow.compute as pc
def time_function(function):
res = []
for i in range (0,5):
start_time = time.monotonic()
@pdet
pdet / taxi_prediction_example.py
Created May 25, 2023 10:11
Example of taxi fare prediction with PyArrow-DuckDB USF
# Files used in the example:
# !wget "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2016-01.parquet"
# !wget "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2016-02.parquet"
import duckdb
from duckdb.typing import *
import torch
import torch.nn as nn
import pyarrow as pa
import torch
import torch.nn as nn
import duckdb
import pyarrow as pa
import matplotlib.pyplot as plt
class LinearRegression(nn.Module):
def __init__(self, input_dim, output_dim):
super(LinearRegression, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)