This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
def xy(a, b, c, d, n): | |
v = [a, b, c, d] | |
r = (np.average(v)-np.min(v))/(np.max(v)-np.min(v)) | |
x = r*np.cos((np.pi/3)*n) | |
y = r*np.sin((np.pi/3)*n) | |
return x, y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
# [1] import and visualise our data | |
dataset = pd.read_csv('adult.csv') # import data | |
# from Kaggle's 'Adult Census Income' dataset | |
# lets visualise the data, we classify by income (>$50k or not) | |
# based on number of years in education and hours worked per week | |
plt.figure(figsize=(12, 8)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# [2] now split into train/test set | |
# create our mask (70%) | |
mask = np.random.rand(len(dataset)) < 0.7 | |
train = dataset[mask] # get 70% of samples from mask indices | |
test = dataset[~mask] # get other 30% of samples | |
# we also need to split the data based on whether person earns | |
# more than or less than 50K | |
less = train[train['income'] == '<=50K'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def mean(x): | |
return sum(x) / len(x) # calculate mean (in reality use np.mean(x)) | |
less_mean = mean(less) | |
more_mean = mean(more) # calculate mean for both models |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def std(x, mu): | |
# calculate standard deviation (in reality np.std(x)) | |
return np.sqrt(sum(np.power(np.subtract(x, mu), 2)) / len(x)) | |
std_less = np.std(less, mean_less) | |
std_more = np.std(more, mean_more) # calculate standard deviation for both models |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def choose(): # here we setup our fruit picker script | |
if np.random.randint(0, 10) < 4: | |
# we have chosen bag A (40% probability) | |
if np.random.randint(0, 10) < 4: | |
# we have chosen an apple from bag A | |
return ('A', 'Apple') | |
else: | |
# we have chosen an orange from bag A |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def push_dataframe(self, data, table="raw_data", batchsize=500, | |
overwrite=False): | |
"""Function used to upload a Pandas DataFrame (data) to SQL Server. | |
Keyword arguments: | |
data -- the dataframe to be uploaded | |
table -- the name of the new table in SQL (default "raw_data") | |
batchsize -- the number of rows to upload to the new table within each | |
execution, recommend no more than 1000 (default 500) | |
overwrite -- safety measure used to ensure user does not accidentally |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def push_dataframe(self, data, table="raw_data", batchsize=500): | |
# create execution cursor | |
cursor = self.cnxn.cursor() | |
# activate fast execute | |
cursor.fast_executemany = True | |
# create create table statement | |
query = "CREATE TABLE [" + table + "] (\n" | |
# iterate through each column to be included in create table statement |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def manual(self, query, response=False): | |
cursor = self.cnxn.cursor() # create execution cursor | |
if response: | |
return read_sql(query, self.cnxn) # get sql query output to dataframe | |
try: | |
cursor.execute(query) # execute | |
except pyodbc.ProgrammingError as error: | |
print("Warning:\n{}".format(error)) # print error as a warning |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def drop(self, tables): | |
# check if single or list | |
if isinstance(tables, str): | |
# if single string, convert to single item in list for for-loop | |
tables = [tables] | |
for table in tables: | |
# check for pre-existing table and delete if present | |
query = ("IF OBJECT_ID ('["+table+"]', 'U') IS NOT NULL " |
OlderNewer