James Briggs jamescalam

## plot_radar_chart.py
import numpy as np
import matplotlib.pyplot as plt


def xy(a, b, c, d, n):
    v = [a, b, c, d]
    r = (np.average(v)-np.min(v))/(np.max(v)-np.min(v))
    x = r*np.cos((np.pi/3)*n)
    y = r*np.sin((np.pi/3)*n)
    return x, y

## bayes_import_visualise.py
import pandas as pd
import matplotlib.pyplot as plt

# [1] import and visualise our data
dataset = pd.read_csv('adult.csv')  # import data
# from Kaggle's 'Adult Census Income' dataset

# lets visualise the data, we classify by income (>$50k or not)
# based on number of years in education and hours worked per week
plt.figure(figsize=(12, 8))

## bayes_train_test_split.py
# [2] now split into train/test set
# create our mask (70%)
mask = np.random.rand(len(dataset)) < 0.7

train = dataset[mask]  # get 70% of samples from mask indices
test = dataset[~mask]  # get other 30% of samples

# we also need to split the data based on whether person earns
# more than or less than 50K
less = train[train['income'] == '<=50K']

## bayes_mean.py
def mean(x):
    return sum(x) / len(x)  # calculate mean (in reality use np.mean(x))

less_mean = mean(less)
more_mean = mean(more)  # calculate mean for both models

## bayes_std.py
def std(x, mu):
    # calculate standard deviation (in reality np.std(x))
    return np.sqrt(sum(np.power(np.subtract(x, mu), 2)) / len(x))

std_less = np.std(less, mean_less)
std_more = np.std(more, mean_more)  # calculate standard deviation for both models

## bayes_fruit_picker.py
import numpy as np

def choose():  # here we setup our fruit picker script
    if np.random.randint(0, 10) < 4:
        # we have chosen bag A (40% probability)
        if np.random.randint(0, 10) < 4:
            # we have chosen an apple from bag A
            return ('A', 'Apple')
        else:
            # we have chosen an orange from bag A

## mssql_push_dataframe.py
def push_dataframe(self, data, table="raw_data", batchsize=500,
                   overwrite=False):
    """Function used to upload a Pandas DataFrame (data) to SQL Server.

    Keyword arguments:
    data -- the dataframe to be uploaded
    table -- the name of the new table in SQL (default "raw_data")
    batchsize -- the number of rows to upload to the new table within each
                 execution, recommend no more than 1000 (default 500)
    overwrite -- safety measure used to ensure user does not accidentally

## mssql_push_dataframe_short.py
def push_dataframe(self, data, table="raw_data", batchsize=500):
    # create execution cursor
    cursor = self.cnxn.cursor()
    # activate fast execute
    cursor.fast_executemany = True

    # create create table statement
    query = "CREATE TABLE [" + table + "] (\n"

    # iterate through each column to be included in create table statement

## pysqlplus_manual_short.py
def manual(self, query, response=False):
    cursor = self.cnxn.cursor()  # create execution cursor

    if response:
        return read_sql(query, self.cnxn)  # get sql query output to dataframe
    try:
        cursor.execute(query)  # execute
    except pyodbc.ProgrammingError as error:
        print("Warning:\n{}".format(error))  # print error as a warning

## pysqlplus_drop_short.py
def drop(self, tables):

    # check if single or list
    if isinstance(tables, str):
        # if single string, convert to single item in list for for-loop
        tables = [tables]

    for table in tables:
        # check for pre-existing table and delete if present
        query = ("IF OBJECT_ID ('["+table+"]', 'U') IS NOT NULL "
	import numpy as np
	import matplotlib.pyplot as plt


	def xy(a, b, c, d, n):
	v = [a, b, c, d]
	r = (np.average(v)-np.min(v))/(np.max(v)-np.min(v))
	x = rnp.cos((np.pi/3)n)
	y = rnp.sin((np.pi/3)n)
	return x, y
	import pandas as pd
	import matplotlib.pyplot as plt

	# [1] import and visualise our data
	dataset = pd.read_csv('adult.csv') # import data
	# from Kaggle's 'Adult Census Income' dataset

	# lets visualise the data, we classify by income (>$50k or not)
	# based on number of years in education and hours worked per week
	plt.figure(figsize=(12, 8))
	# [2] now split into train/test set
	# create our mask (70%)
	mask = np.random.rand(len(dataset)) < 0.7

	train = dataset[mask] # get 70% of samples from mask indices
	test = dataset[~mask] # get other 30% of samples

	# we also need to split the data based on whether person earns
	# more than or less than 50K
	less = train[train['income'] == '<=50K']
	def mean(x):
	return sum(x) / len(x) # calculate mean (in reality use np.mean(x))

	less_mean = mean(less)
	more_mean = mean(more) # calculate mean for both models
	def std(x, mu):
	# calculate standard deviation (in reality np.std(x))
	return np.sqrt(sum(np.power(np.subtract(x, mu), 2)) / len(x))

	std_less = np.std(less, mean_less)
	std_more = np.std(more, mean_more) # calculate standard deviation for both models
	import numpy as np

	def choose(): # here we setup our fruit picker script
	if np.random.randint(0, 10) < 4:
	# we have chosen bag A (40% probability)
	if np.random.randint(0, 10) < 4:
	# we have chosen an apple from bag A
	return ('A', 'Apple')
	else:
	# we have chosen an orange from bag A
	def push_dataframe(self, data, table="raw_data", batchsize=500,
	overwrite=False):
	"""Function used to upload a Pandas DataFrame (data) to SQL Server.

	Keyword arguments:
	data -- the dataframe to be uploaded
	table -- the name of the new table in SQL (default "raw_data")
	batchsize -- the number of rows to upload to the new table within each
	execution, recommend no more than 1000 (default 500)
	overwrite -- safety measure used to ensure user does not accidentally
	def push_dataframe(self, data, table="raw_data", batchsize=500):
	# create execution cursor
	cursor = self.cnxn.cursor()
	# activate fast execute
	cursor.fast_executemany = True

	# create create table statement
	query = "CREATE TABLE [" + table + "] (\n"

	# iterate through each column to be included in create table statement
	def manual(self, query, response=False):
	cursor = self.cnxn.cursor() # create execution cursor

	if response:
	return read_sql(query, self.cnxn) # get sql query output to dataframe
	try:
	cursor.execute(query) # execute
	except pyodbc.ProgrammingError as error:
	print("Warning:\n{}".format(error)) # print error as a warning
	def drop(self, tables):

	# check if single or list
	if isinstance(tables, str):
	# if single string, convert to single item in list for for-loop
	tables = [tables]

	for table in tables:
	# check for pre-existing table and delete if present
	query = ("IF OBJECT_ID ('["+table+"]', 'U') IS NOT NULL "