Will Koehrsen WillKoehrsen

## visualize_decision_tree.py
from sklearn.datasets import load_iris
iris = load_iris()

# Model (can also use single decision tree)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=10)

# Train
model.fit(iris.data, iris.target)
# Extract single tree

## histograms_matplotlib_seaborn.py
# Import the libraries
import matplotlib.pyplot as plt
import seaborn as sns

# matplotlib histogram
plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black',
         bins = int(180/5))

# seaborn histogram
sns.distplot(flights['arr_delay'], hist=True, kde=False,

## submit_assignment.py
# selenium for web driving
import selenium
from selenium import webdriver

# time for pausing between navigation
import time

# Datetime for recording time of submission
import datetime

## different_binwidths.py
# Show 4 different binwidths
for i, binwidth in enumerate([1, 5, 10, 15]):

    # Set up the plot
    ax = plt.subplot(2, 2, i + 1)

    # Draw the plot
    ax.hist(flights['arr_delay'], bins = int(180/binwidth),
             color = 'blue', edgecolor = 'black')


## query_vars.py
# Examines the effect of changing a single variable
# Takes in the name of the variable, the trace, and the data
def model_effect(query_var, trace, X):

    # Variables that do not change
    steady_vars = list(X.columns)
    steady_vars.remove(query_var)

    # Linear Model that estimates a grade based on the value of the query variable
    # and one sample from the trace

## keras_book_embedding.py
 # Both inputs are 1-dimensional
book = Input(name = 'book', shape = [1])
link = Input(name = 'link', shape = [1])

# Embedding the book (shape will be (None, 1, 50))
book_embedding = Embedding(name = 'book_embedding',
                           input_dim = len(book_index),
                           output_dim = embedding_size)(book)

# Embedding the link (shape will be (None, 1, 50))

## remove_collinear.py
# Threshold for removing correlated variables
threshold = 0.9

# Absolute value correlation matrix
corr_matrix = app.corr().abs()

# Upper triangle of correlations
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

# Select columns with correlations above threshold

## scatterplot_interact.py
import cufflinks as cf

@interact
def scatter_plot(x=list(df.select_dtypes('number').columns),
                 y=list(df.select_dtypes('number').columns)[1:],
                 theme=list(cf.themes.THEMES.keys()),
                 colorscale=list(cf.colors._scales_names.keys())):

    df.iplot(kind='scatter', x=x, y=y, mode='markers',
             xTitle=x.title(), yTitle=y.title(),

## dataloaders.py
from torchvision import datasets
from torch.utils.data import DataLoader

# Datasets from folders
data = {
    'train':
    datasets.ImageFolder(root=traindir, transform=image_transforms['train']),
    'valid':
    datasets.ImageFolder(root=validdir, transform=image_transforms['valid']),
}

## custom_pairgrid.py
# Function to calculate correlation coefficient between two arrays
def corr(x, y, **kwargs):

    # Calculate the value
    coef = np.corrcoef(x, y)[0][1]
    # Make the label
    label = r'$\rho$ = ' + str(round(coef, 2))

    # Add the label to the plot
    ax = plt.gca()
	from sklearn.datasets import load_iris
	iris = load_iris()

	# Model (can also use single decision tree)
	from sklearn.ensemble import RandomForestClassifier
	model = RandomForestClassifier(n_estimators=10)

	# Train
	model.fit(iris.data, iris.target)
	# Extract single tree
	# Import the libraries
	import matplotlib.pyplot as plt
	import seaborn as sns

	# matplotlib histogram
	plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black',
	bins = int(180/5))

	# seaborn histogram
	sns.distplot(flights['arr_delay'], hist=True, kde=False,
	# selenium for web driving
	import selenium
	from selenium import webdriver

	# time for pausing between navigation
	import time

	# Datetime for recording time of submission
	import datetime
	# Show 4 different binwidths
	for i, binwidth in enumerate([1, 5, 10, 15]):

	# Set up the plot
	ax = plt.subplot(2, 2, i + 1)

	# Draw the plot
	ax.hist(flights['arr_delay'], bins = int(180/binwidth),
	color = 'blue', edgecolor = 'black')
	# Examines the effect of changing a single variable
	# Takes in the name of the variable, the trace, and the data
	def model_effect(query_var, trace, X):

	# Variables that do not change
	steady_vars = list(X.columns)
	steady_vars.remove(query_var)

	# Linear Model that estimates a grade based on the value of the query variable
	# and one sample from the trace
	# Both inputs are 1-dimensional
	book = Input(name = 'book', shape = [1])
	link = Input(name = 'link', shape = [1])

	# Embedding the book (shape will be (None, 1, 50))
	book_embedding = Embedding(name = 'book_embedding',
	input_dim = len(book_index),
	output_dim = embedding_size)(book)

	# Embedding the link (shape will be (None, 1, 50))
	# Threshold for removing correlated variables
	threshold = 0.9

	# Absolute value correlation matrix
	corr_matrix = app.corr().abs()

	# Upper triangle of correlations
	upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

	# Select columns with correlations above threshold
	import cufflinks as cf

	@interact
	def scatter_plot(x=list(df.select_dtypes('number').columns),
	y=list(df.select_dtypes('number').columns)[1:],
	theme=list(cf.themes.THEMES.keys()),
	colorscale=list(cf.colors._scales_names.keys())):

	df.iplot(kind='scatter', x=x, y=y, mode='markers',
	xTitle=x.title(), yTitle=y.title(),
	from torchvision import datasets
	from torch.utils.data import DataLoader

	# Datasets from folders
	data = {
	'train':
	datasets.ImageFolder(root=traindir, transform=image_transforms['train']),
	'valid':
	datasets.ImageFolder(root=validdir, transform=image_transforms['valid']),
	}
	# Function to calculate correlation coefficient between two arrays
	def corr(x, y, **kwargs):

	# Calculate the value
	coef = np.corrcoef(x, y)[0][1]
	# Make the label
	label = r'$\rho$ = ' + str(round(coef, 2))

	# Add the label to the plot
	ax = plt.gca()