Skip to content

Instantly share code, notes, and snippets.

View gordthompson's full-sized avatar

Gord Thompson gordthompson

  • self-employed
  • Calgary, AB, Canada
View GitHub Profile
@gordthompson
gordthompson / crdb_to_sql_trans.py
Last active May 3, 2024 14:48
using .to_sql() with run_transaction()
import pandas as pd
import sqlalchemy as sa
from sqlalchemy_cockroachdb.transaction import run_transaction
engine = sa.create_engine(
"cockroachdb+psycopg2://root@localhost:26257/defaultdb"
)
def crdb_insert_trans(table, eng, keys, data_iter):
@gordthompson
gordthompson / crdb_read_sql_query_trans.py
Last active May 3, 2024 14:48
using .read_sql_query() with run_transaction()
import pandas as pd
import sqlalchemy as sa
from sqlalchemy_cockroachdb.transaction import run_transaction
connection_url = "cockroachdb+psycopg2://root@localhost:26257/defaultdb"
connection_url = "postgresql+psycopg2://scott:tiger@192.168.0.199/test"
engine = sa.create_engine(connection_url)
def read_sql_qry_trans(qry, engine_):
@gordthompson
gordthompson / test_df_for_rescans.py
Last active October 21, 2023 19:41
search DataFrame for null values that could trigger a row scan with fast_executemany
"""
A DataFrame that contains a lot of "null" values (NaN, etc.) can cause a
significant performance penalty with fast_executemany=True. For details, see
https://github.com/mkleehammer/pyodbc/issues/741
This routine scans the specified DataFrame to see how many "row scans" would be
required when using .to_sql() with fast_executemany=True. The first row must
always be scanned, but after that a re-scan will flush the current rows to the
server, increasing network traffic and slowing things down.
@gordthompson
gordthompson / postgresql_df_upsert.py
Last active April 12, 2024 20:09
Build a PostgreSQL INSERT … ON CONFLICT statement and upsert a DataFrame
# version 1.2 - 2022-10-04
import pandas as pd
import sqlalchemy as sa
def df_upsert(data_frame, table_name, engine, schema=None, match_columns=None):
"""
Perform an "upsert" on a PostgreSQL table from a DataFrame.
Constructs an INSERT … ON CONFLICT statement, uploads the DataFrame to a
@gordthompson
gordthompson / mssql_df_upsert.py
Last active October 19, 2023 09:37
Build a T-SQL MERGE statement and upsert a DataFrame
# Copyright 2023 Gordon D. Thompson, gord@gordthompson.com
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@gordthompson
gordthompson / mssql_insert_json.py
Last active March 28, 2024 17:16
Alternative to_sql() *method* for mssql+pyodbc
# Alternative to_sql() *method* for mssql+pyodbc or mssql+pymssql
#
# adapted from https://pandas.pydata.org/docs/user_guide/io.html#io-sql-method
#
# version 1.6 - 2024-03-28
from datetime import date
import json
import pandas as pd
import sqlalchemy as sa
@gordthompson
gordthompson / ComExampleUcanaccessDynamicLinkMain.java
Last active May 31, 2022 21:13
dynamically create "linking database" to have UCanAccess only load selected tables
/*
* Copyright 2017 Gordon D. Thompson
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software