Created
May 25, 2023 13:07
-
-
Save pdet/2907290725539d390df7981e799ed593 to your computer and use it in GitHub Desktop.
Compare UDFs and External Function Execution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import duckdb | |
import pyarrow as pa | |
import pandas as pd | |
import time | |
def time_function(function): | |
res = [] | |
for i in range (0,5): | |
start_time = time.monotonic() | |
ans = function() | |
end_time = time.monotonic() | |
res.append(end_time-start_time) | |
res.sort() | |
print ("Time: " + str(res[2])) | |
print (ans) | |
def string_length_arrow(x): | |
tuples = len(x) | |
values = [len(i.as_py()) if i.as_py() != None else 0 for i in x] | |
array = pa.array(values, type=pa.int32(), size=tuples) | |
return array | |
def exec_internal(): | |
global con | |
return con.sql("select sum(strlen_arrow(i)) from strings tbl(i)").fetchall() | |
def exec_external(): | |
global con | |
arrow_table = con.sql("select i from strings tbl(i)").arrow() | |
arrow_column = arrow_table['i'] | |
tuples = len(arrow_column) | |
values = [len(i.as_py()) if i.as_py() != None else 0 for i in arrow_column] | |
array = pa.array(values, type=pa.int32(), size=tuples) | |
arrow_tbl = pa.Table.from_arrays([array], names=['i']) | |
return con.sql("select sum(i) from arrow_tbl").fetchall() | |
con = duckdb.connect() | |
con.create_function('strlen_arrow', string_length_arrow, ['VARCHAR'], int, type='arrow') | |
con.sql(""" | |
select | |
case when i != 0 and i % 42 = 0 | |
then | |
NULL | |
else | |
repeat(chr((65 + (i % 26))::INTEGER), (4 + (i % 12))) end | |
from range(10000000) tbl(i); | |
""").to_view("strings") | |
print ("Internal:") | |
time_function(exec_internal) | |
print ("External:") | |
time_function(exec_external) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment