Skip to content

Instantly share code, notes, and snippets.

@Dreeseaw
Created October 22, 2022 22:56
Show Gist options
  • Save Dreeseaw/ce46bf15cb6e322c30df5c83d753ec4b to your computer and use it in GitHub Desktop.
Save Dreeseaw/ce46bf15cb6e322c30df5c83d753ec4b to your computer and use it in GitHub Desktop.
a simple python app to mimic snowflake via duckdb
import sqlparse
import duckdb
from duckdb import DuckDBPyConnection as DuckConn
from fastapi import FastAPI
from datetime import datetime
from typing import List, Any, Tuple
app = FastAPI()
conn = duckdb.connect(database=':memory:', read_only=False)
cached_tables = dict()
def check_cache(tables: List[str]) -> List[str]:
ret = list()
for table in tables:
if not cached_tables.get(table, None):
ret.append(table)
else:
print(f"{table} cached: not preloading")
cached_tables[table] = datetime.now()
return ret
def find_tables(query: str) -> List[str]:
tables = list()
parsed = sqlparse.parse(query)[0]
for tok in parsed.tokens:
if isinstance(tok, sqlparse.sql.Identifier):
tables.append(tok.value)
return tables
def preload_tables(conn: DuckConn, tables: List[str]) -> List[bool]:
cached = list()
for table in tables:
q = f"create table {table} as select * from read_csv_auto('{table}.csv');"
try:
conn.execute(q).fetchall()
cached.append(False)
except:
cached.append(True)
return cached
def preload_exec(conn: DuckConn, query: str) -> List[Tuple]:
tables = find_tables(query)
tables = check_cache(tables)
_ = preload_tables(conn, tables)
return conn.execute(query).fetchall()
@app.get("/query")
def exec_query(query: str):
return preload_exec(conn, query.replace("_", " "))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment