Created
May 25, 2023 11:14
-
-
Save pdet/ebd201475581756c29e4533a8fa4106e to your computer and use it in GitHub Desktop.
Compare DuckDB UDFS: Built-In UDFs vs PyArrow UDFs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import duckdb | |
import pyarrow as pa | |
import pandas as pd | |
import time | |
import pyarrow.compute as pc | |
def time_function(function): | |
res = [] | |
for i in range (0,5): | |
start_time = time.monotonic() | |
ans = function() | |
end_time = time.monotonic() | |
res.append(end_time-start_time) | |
res.sort() | |
print ("Time: " + str(res[2])) | |
print (ans) | |
def exec_native(): | |
global con | |
return con.sql("select sum(add_native(i)) from numbers").fetchall() | |
def exec_arrow(): | |
global con | |
return con.sql("select sum(add_arrow(i)) from numbers").fetchall() | |
def add_native(x): | |
return x + 1 | |
def add_arrow(x): | |
return pc.add(x,1) | |
con = duckdb.connect() | |
con.create_function('add_native', add_native, ['BIGINT'], 'BIGINT', type='native') | |
con.create_function('add_arrow', add_arrow, ['BIGINT'], 'BIGINT', type='arrow') | |
con.sql(""" | |
select | |
i | |
from range(10000000) tbl(i); | |
""").to_view("numbers") | |
native_res = con.sql("select add_native(i) from numbers tbl(i)").fetchall() | |
arrow_res = con.sql("select add_arrow(i) from numbers tbl(i)").fetchall() | |
assert native_res == arrow_res | |
# Benchmark native UDF | |
print ("Built-In:") | |
time_function(exec_native) | |
print ("PyArrow:") | |
print(time_function(exec_arrow)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment