Created
September 17, 2019 13:20
-
-
Save tkilias/20e8dbb8f6f0e045b205cc92916cf71c to your computer and use it in GitHub Desktop.
Experiments with import_from_pandas from pyexasol
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with new connection for each import: | |
import took for 1 iterations with 1000 rows: 309.32ms | |
import took for 10 iterations with 100 rows: 2.29s | |
import took for 100 iterations with 10 rows: 23.49s | |
import took for 1000 iterations with 1 rows: 243.97s | |
without new connection: | |
import took for 1 iterations with 1000 rows: 238.84ms | |
import took for 10 iterations with 100 rows: 1.96s | |
import took for 100 iterations with 10 rows: 20.50s | |
import took for 1000 iterations with 1 rows: 213.98s | |
insert into + string generation: | |
import took for 1000 iterations with 1 rows: 140.21s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyexasol | |
import textwrap | |
import pandas as pd | |
import numpy as np | |
from stopwatch import Stopwatch | |
def create_df(rows): | |
r=np.arange(rows) | |
df=pd.DataFrame(r,columns=["a",]) | |
df["b"]="test"+df["a"].astype(str) | |
df["c"]=df["a"]*1.5 | |
return df | |
steps=[1]#[1000, 100, 10, 1] | |
max_rows=1000 #steps[0] | |
for rows in steps: | |
df=create_df(rows) | |
conn = pyexasol.connect(dsn="MyCluster_11:8888", user="sys", password="exasol", compression=True) | |
conn.execute("create schema if not exists test_import;") | |
conn.open_schema("test_import") | |
conn.execute("create or replace table test(a int, b varchar(1000), c double)") | |
conn.close() | |
conn = pyexasol.connect(dsn="MyCluster_11:8888", user="sys", password="exasol", compression=True) | |
stopwatch = Stopwatch() | |
iterations=max_rows//rows | |
for i in range(iterations): | |
#conn = pyexasol.connect(dsn="MyCluster_11:8888", user="sys", password="exasol", compression=True) | |
conn.open_schema("test_import") | |
conn.import_from_pandas(df, "test") | |
#sql=f"""insert into test values ({df["a"][0]},'{df["b"][0]}',{df["c"][0]})""" | |
#conn.execute(sql) | |
#conn.close() | |
print("import took for %s iterations with %s rows: %s"%(iterations, rows, str(stopwatch))) | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment