Skip to content

Instantly share code, notes, and snippets.

View mikenac's full-sized avatar

Mike Nacey mikenac

View GitHub Profile
@mikenac
mikenac / main.py
Last active November 4, 2025 14:05
Late binding concept relationship (needs duckdb and tabulate)
import duckdb
from datetime import datetime, date
import hashlib
import json
class ConceptMapper:
def __init__(self, db_path=':memory:'):
"""Initialize the concept mapper with DuckDB backend"""
self.conn = duckdb.connect(db_path)
self._create_schema()
/**
* De-duplicate a table using a standard window function
*
* @param spark SparkSession
* @param table Dataframe to be de-duped
* @param discriminator Level 1 discriminator, used in addition to unique keys. This should be tenantid.
* @param orderByColumn column to use for ordering
* @param uniqueKeys unique keys to partition by when calculating unique latest. Should be the unique entity id.
* @return A dataframe of unique rows based on the criteria
*/
from typing import Iterable, Mapping, Tuple
import re
import csv
from flashtext import KeywordProcessor
from openpyxl.reader.excel import load_workbook
class MissingTokenException(Exception):
''' Mismatching token error.
token_map -> file: list of missing from the file
@mikenac
mikenac / parse_sql.py
Last active July 12, 2021 14:03
Parse SQL to look for certain fields being selected from certain tables. This is helpful when doing audits of query logs.
from typing import Dict, Mapping, Tuple, List, Iterable
import sqlparse # type: ignore
from more_itertools import peekable # type: ignore
from sqlparse.sql import IdentifierList, Identifier, Statement # type: ignore
from sqlparse.tokens import DML, Whitespace, Newline, Keyword, Wildcard # type: ignore
def get_fields_selected(sql: str) -> Iterable[Tuple[Iterable[str], Iterable[str]]]:
'''
@mikenac
mikenac / same_physician.sql
Last active May 19, 2021 14:45
Sequential Schedule Detection
CREATE OR REPLACE TABLE PROC (
procedure_id TEXT,
physician TEXT,
room TEXT,
start_date TIMESTAMP
);
INSERT INTO PROC VALUES
('100', 'Bob', 'OR-1', '2021-01-15 08:00:00'), -- not same - first bob with larry after
('101', 'Larry', 'OR-1', '2021-01-15 09:00:00'), -- not same prev phys not the same
import pandas as pd
import json
json_data = """
[
{
"person": {
"id": 45,
"firstName": "Robert",
"lastName": "Johnson",
from queue import Queue
from threading import Thread
import uuid
import time
class PrinterWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
@mikenac
mikenac / BankAccount.py
Last active October 29, 2020 17:00
Simple Bank Account class for Interview Testing
import locale
class Account:
""" A simple bank account class """
"""constructor"""
""" in_locale = local string (e.g. 'en_US.utf8')
initial_balance = amount of currency to add to the initial account balance
"""
def __init__(self, in_locale, initial_balance):
@mikenac
mikenac / BankAccount.java
Last active August 18, 2020 17:48
Simple Bank Account Class for Interview Testing
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
/***
* Simple Bank Account class
*/
class Account {
double balance;
@mikenac
mikenac / schema_compare.py
Created July 29, 2020 16:03
Compare JSON schemas
import pandas as pd
import json
def json_to_df(json_file):
""" Convert JSON file to flattened Pandas frame. The full path of each attribute
will be the name of the column."""
with open(json_file) as json_file:
data = json.load(json_file)
frame = pd.json_normalize(data)