Skip to content

Instantly share code, notes, and snippets.

View jamescalam's full-sized avatar
👻

James Briggs jamescalam

👻
View GitHub Profile
@jamescalam
jamescalam / plot_radar_chart.py
Created September 11, 2019 08:03
Example math behind creating a radar plot using continuous data split in several categories.
import numpy as np
import matplotlib.pyplot as plt
def xy(a, b, c, d, n):
v = [a, b, c, d]
r = (np.average(v)-np.min(v))/(np.max(v)-np.min(v))
x = r*np.cos((np.pi/3)*n)
y = r*np.sin((np.pi/3)*n)
return x, y
@jamescalam
jamescalam / bayes_import_visualise.py
Created January 12, 2020 11:19
Example code snippet for Naive Bayes fundamentals article, part [1]
import pandas as pd
import matplotlib.pyplot as plt
# [1] import and visualise our data
dataset = pd.read_csv('adult.csv') # import data
# from Kaggle's 'Adult Census Income' dataset
# lets visualise the data, we classify by income (>$50k or not)
# based on number of years in education and hours worked per week
plt.figure(figsize=(12, 8))
@jamescalam
jamescalam / bayes_train_test_split.py
Created January 12, 2020 11:23
Example code snippet for Naive Bayes fundamentals article, part [2]
# [2] now split into train/test set
# create our mask (70%)
mask = np.random.rand(len(dataset)) < 0.7
train = dataset[mask] # get 70% of samples from mask indices
test = dataset[~mask] # get other 30% of samples
# we also need to split the data based on whether person earns
# more than or less than 50K
less = train[train['income'] == '<=50K']
@jamescalam
jamescalam / bayes_mean.py
Created January 12, 2020 11:39
Example code snippet for Naive Bayes fundamentals article, part [3]
def mean(x):
return sum(x) / len(x) # calculate mean (in reality use np.mean(x))
less_mean = mean(less)
more_mean = mean(more) # calculate mean for both models
@jamescalam
jamescalam / bayes_std.py
Created January 12, 2020 11:42
Example code snippet for Naive Bayes fundamentals article, part [4]
def std(x, mu):
# calculate standard deviation (in reality np.std(x))
return np.sqrt(sum(np.power(np.subtract(x, mu), 2)) / len(x))
std_less = np.std(less, mean_less)
std_more = np.std(more, mean_more) # calculate standard deviation for both models
@jamescalam
jamescalam / bayes_fruit_picker.py
Created January 12, 2020 15:38
Example code snippet for Naive Bayes fundamentals article
import numpy as np
def choose(): # here we setup our fruit picker script
if np.random.randint(0, 10) < 4:
# we have chosen bag A (40% probability)
if np.random.randint(0, 10) < 4:
# we have chosen an apple from bag A
return ('A', 'Apple')
else:
# we have chosen an orange from bag A
@jamescalam
jamescalam / mssql_push_dataframe.py
Last active February 22, 2020 19:14
Pushing a dataframe to MS SQL Server
def push_dataframe(self, data, table="raw_data", batchsize=500,
overwrite=False):
"""Function used to upload a Pandas DataFrame (data) to SQL Server.
Keyword arguments:
data -- the dataframe to be uploaded
table -- the name of the new table in SQL (default "raw_data")
batchsize -- the number of rows to upload to the new table within each
execution, recommend no more than 1000 (default 500)
overwrite -- safety measure used to ensure user does not accidentally
def push_dataframe(self, data, table="raw_data", batchsize=500):
# create execution cursor
cursor = self.cnxn.cursor()
# activate fast execute
cursor.fast_executemany = True
# create create table statement
query = "CREATE TABLE [" + table + "] (\n"
# iterate through each column to be included in create table statement
def manual(self, query, response=False):
cursor = self.cnxn.cursor() # create execution cursor
if response:
return read_sql(query, self.cnxn) # get sql query output to dataframe
try:
cursor.execute(query) # execute
except pyodbc.ProgrammingError as error:
print("Warning:\n{}".format(error)) # print error as a warning
def drop(self, tables):
# check if single or list
if isinstance(tables, str):
# if single string, convert to single item in list for for-loop
tables = [tables]
for table in tables:
# check for pre-existing table and delete if present
query = ("IF OBJECT_ID ('["+table+"]', 'U') IS NOT NULL "