Mike Nacey mikenac

## main.py
import duckdb
from datetime import datetime, date
import hashlib
import json

class ConceptMapper:
    def __init__(self, db_path=':memory:'):
        """Initialize the concept mapper with DuckDB backend"""
        self.conn = duckdb.connect(db_path)
        self._create_schema()

## dataframe.scala
 /**
   * De-duplicate a table using a standard window function
   *
   * @param spark         SparkSession
   * @param table         Dataframe to be de-duped
   * @param discriminator Level 1 discriminator, used in addition to unique keys. This should be tenantid.
   * @param orderByColumn column to use for ordering
   * @param uniqueKeys    unique keys to partition by when calculating unique latest. Should be the unique entity id.
   * @return A dataframe of unique rows based on the criteria
   */

## Localizer.py

from typing import Iterable, Mapping, Tuple
import re
import csv
from flashtext import KeywordProcessor
from openpyxl.reader.excel import load_workbook

class MissingTokenException(Exception):
    ''' Mismatching token error.
        token_map -> file: list of missing from the file

## parse_sql.py

from typing import Dict, Mapping, Tuple, List, Iterable
import sqlparse # type: ignore
from more_itertools import peekable # type: ignore
from sqlparse.sql import IdentifierList, Identifier, Statement # type: ignore
from sqlparse.tokens import DML, Whitespace, Newline, Keyword, Wildcard # type: ignore


def get_fields_selected(sql: str) -> Iterable[Tuple[Iterable[str], Iterable[str]]]:
    '''

## same_physician.sql
CREATE OR REPLACE TABLE PROC (
    procedure_id TEXT,
    physician TEXT,
    room TEXT,
    start_date TIMESTAMP
);

INSERT INTO PROC VALUES
('100', 'Bob', 'OR-1', '2021-01-15 08:00:00'), -- not same - first bob with larry after
('101', 'Larry', 'OR-1', '2021-01-15 09:00:00'), -- not same prev phys not the same

## composite_record.py
import pandas as pd
import json

json_data = """
[
	{
		"person": {
			"id": 45,
			"firstName": "Robert",
			"lastName": "Johnson",

## parallel_processing.py

from queue import Queue
from threading import Thread
import uuid
import time

class PrinterWorker(Thread):

    def __init__(self, queue):
        Thread.__init__(self)

## BankAccount.py
import locale

class Account:
	""" A simple bank account class """

	"""constructor"""
	""" in_locale = local string (e.g. 'en_US.utf8')
			initial_balance = amount of currency to add to the initial account balance
	"""
	def __init__(self, in_locale, initial_balance):

## BankAccount.java
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;

/***
 * Simple Bank Account class
 */
class Account {

    double balance;

## schema_compare.py
import pandas as pd
import json

def json_to_df(json_file):
    """ Convert JSON file to flattened Pandas frame. The full path of each attribute

    will be the name of the column."""
    with open(json_file) as json_file:
        data = json.load(json_file)
        frame = pd.json_normalize(data)
	import duckdb
	from datetime import datetime, date
	import hashlib
	import json

	class ConceptMapper:
	def __init__(self, db_path=':memory:'):
	"""Initialize the concept mapper with DuckDB backend"""
	self.conn = duckdb.connect(db_path)
	self._create_schema()
	/**
	* De-duplicate a table using a standard window function
	*
	* @param spark SparkSession
	* @param table Dataframe to be de-duped
	* @param discriminator Level 1 discriminator, used in addition to unique keys. This should be tenantid.
	* @param orderByColumn column to use for ordering
	* @param uniqueKeys unique keys to partition by when calculating unique latest. Should be the unique entity id.
	* @return A dataframe of unique rows based on the criteria
	*/

	from typing import Iterable, Mapping, Tuple
	import re
	import csv
	from flashtext import KeywordProcessor
	from openpyxl.reader.excel import load_workbook

	class MissingTokenException(Exception):
	''' Mismatching token error.
	token_map -> file: list of missing from the file

	from typing import Dict, Mapping, Tuple, List, Iterable
	import sqlparse # type: ignore
	from more_itertools import peekable # type: ignore
	from sqlparse.sql import IdentifierList, Identifier, Statement # type: ignore
	from sqlparse.tokens import DML, Whitespace, Newline, Keyword, Wildcard # type: ignore


	def get_fields_selected(sql: str) -> Iterable[Tuple[Iterable[str], Iterable[str]]]:
	'''
	CREATE OR REPLACE TABLE PROC (
	procedure_id TEXT,
	physician TEXT,
	room TEXT,
	start_date TIMESTAMP
	);

	INSERT INTO PROC VALUES
	('100', 'Bob', 'OR-1', '2021-01-15 08:00:00'), -- not same - first bob with larry after
	('101', 'Larry', 'OR-1', '2021-01-15 09:00:00'), -- not same prev phys not the same
	import pandas as pd
	import json

	json_data = """
	[
	{
	"person": {
	"id": 45,
	"firstName": "Robert",
	"lastName": "Johnson",

	from queue import Queue
	from threading import Thread
	import uuid
	import time

	class PrinterWorker(Thread):

	def __init__(self, queue):
	Thread.__init__(self)
	import locale

	class Account:
	""" A simple bank account class """

	"""constructor"""
	""" in_locale = local string (e.g. 'en_US.utf8')
	initial_balance = amount of currency to add to the initial account balance
	"""
	def __init__(self, in_locale, initial_balance):
	import java.lang.reflect.InvocationTargetException;
	import java.lang.reflect.Method;

	/***
	* Simple Bank Account class
	*/
	class Account {

	double balance;
	import pandas as pd
	import json

	def json_to_df(json_file):
	""" Convert JSON file to flattened Pandas frame. The full path of each attribute

	will be the name of the column."""
	with open(json_file) as json_file:
	data = json.load(json_file)
	frame = pd.json_normalize(data)