Skip to content

Instantly share code, notes, and snippets.

View WillKoehrsen's full-sized avatar
🌆
Improving

Will Koehrsen WillKoehrsen

🌆
Improving
View GitHub Profile
@WillKoehrsen
WillKoehrsen / visualize_decision_tree.py
Last active March 26, 2024 04:41
How to visualize a single decision tree in Python
from sklearn.datasets import load_iris
iris = load_iris()
# Model (can also use single decision tree)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=10)
# Train
model.fit(iris.data, iris.target)
# Extract single tree
# Import the libraries
import matplotlib.pyplot as plt
import seaborn as sns
# matplotlib histogram
plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black',
bins = int(180/5))
# seaborn histogram
sns.distplot(flights['arr_delay'], hist=True, kde=False,
@WillKoehrsen
WillKoehrsen / submit_assignment.py
Created March 10, 2018 20:26
Automation of Assignment Submisison
# selenium for web driving
import selenium
from selenium import webdriver
# time for pausing between navigation
import time
# Datetime for recording time of submission
import datetime
# Show 4 different binwidths
for i, binwidth in enumerate([1, 5, 10, 15]):
# Set up the plot
ax = plt.subplot(2, 2, i + 1)
# Draw the plot
ax.hist(flights['arr_delay'], bins = int(180/binwidth),
color = 'blue', edgecolor = 'black')
# Examines the effect of changing a single variable
# Takes in the name of the variable, the trace, and the data
def model_effect(query_var, trace, X):
# Variables that do not change
steady_vars = list(X.columns)
steady_vars.remove(query_var)
# Linear Model that estimates a grade based on the value of the query variable
# and one sample from the trace
# Both inputs are 1-dimensional
book = Input(name = 'book', shape = [1])
link = Input(name = 'link', shape = [1])
# Embedding the book (shape will be (None, 1, 50))
book_embedding = Embedding(name = 'book_embedding',
input_dim = len(book_index),
output_dim = embedding_size)(book)
# Embedding the link (shape will be (None, 1, 50))
# Threshold for removing correlated variables
threshold = 0.9
# Absolute value correlation matrix
corr_matrix = app.corr().abs()
# Upper triangle of correlations
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
# Select columns with correlations above threshold
import cufflinks as cf
@interact
def scatter_plot(x=list(df.select_dtypes('number').columns),
y=list(df.select_dtypes('number').columns)[1:],
theme=list(cf.themes.THEMES.keys()),
colorscale=list(cf.colors._scales_names.keys())):
df.iplot(kind='scatter', x=x, y=y, mode='markers',
xTitle=x.title(), yTitle=y.title(),
from torchvision import datasets
from torch.utils.data import DataLoader
# Datasets from folders
data = {
'train':
datasets.ImageFolder(root=traindir, transform=image_transforms['train']),
'valid':
datasets.ImageFolder(root=validdir, transform=image_transforms['valid']),
}
# Function to calculate correlation coefficient between two arrays
def corr(x, y, **kwargs):
# Calculate the value
coef = np.corrcoef(x, y)[0][1]
# Make the label
label = r'$\rho$ = ' + str(round(coef, 2))
# Add the label to the plot
ax = plt.gca()