Skip to content

Instantly share code, notes, and snippets.

Avatar
🌆
Improving

Will Koehrsen WillKoehrsen

🌆
Improving
View GitHub Profile
@WillKoehrsen
WillKoehrsen / submit_assignment.py
Created Mar 10, 2018
Automation of Assignment Submisison
View submit_assignment.py
# selenium for web driving
import selenium
from selenium import webdriver
# time for pausing between navigation
import time
# Datetime for recording time of submission
import datetime
View random_forest.py
from sklearn.ensemble import RandomForestClassifier
# Create the model with 100 trees
model = RandomForestClassifier(n_estimators=100,
bootstrap = True,
max_features = 'sqrt')
# Fit on training data
model.fit(train, train_labels)
View histograms_matplotlib_seaborn.py
# Import the libraries
import matplotlib.pyplot as plt
import seaborn as sns
# matplotlib histogram
plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black',
bins = int(180/5))
# seaborn histogram
sns.distplot(flights['arr_delay'], hist=True, kde=False,
@WillKoehrsen
WillKoehrsen / visualize_decision_tree.py
Last active Feb 11, 2021
How to visualize a single decision tree in Python
View visualize_decision_tree.py
from sklearn.datasets import load_iris
iris = load_iris()
# Model (can also use single decision tree)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=10)
# Train
model.fit(iris.data, iris.target)
# Extract single tree
View more_data_sample.py
# Observations from multiple trips
c = np.array([[3, 2, 1],
[2, 3, 1],
[3, 2, 1],
[2, 3, 1]])
with pm.Model() as model:
# Parameters are a dirichlet distribution
parameters = pm.Dirichlet('parameters', a=alphas, shape=3)
# Observed data is a multinomial distribution
View scatterplot_interact.py
import cufflinks as cf
@interact
def scatter_plot(x=list(df.select_dtypes('number').columns),
y=list(df.select_dtypes('number').columns)[1:],
theme=list(cf.themes.THEMES.keys()),
colorscale=list(cf.colors._scales_names.keys())):
df.iplot(kind='scatter', x=x, y=y, mode='markers',
xTitle=x.title(), yTitle=y.title(),
View normal_model.py
import pymc3 as pm
# Context for the model
with pm.Model() as normal_model:
# The prior for the data likelihood is a Normal Distribution
family = pm.glm.families.Normal()
# Creating the model requires a formula and data (and optionally a family)
pm.GLM.from_formula(formula, data = X_train, family = family)
View bokeh_delay_hovertool.py
# Create the blank plot
p = figure(plot_height = 600, plot_width = 600,
title = 'Histogram of Arrival Delays',
x_axis_label = 'Delay (min)]',
y_axis_label = 'Number of Flights')
# Add a quad glyph with source this time
p.quad(bottom=0, top='flights', left='left', right='right', source=src,
fill_color='red', line_color='black', fill_alpha = 0.75,
hover_fill_alpha = 1.0, hover_fill_color = 'navy')
View flights_main.py
# Pandas for data management
import pandas as pd
# os methods for manipulating paths
from os.path import dirname, join
# Bokeh basics
from bokeh.io import curdoc
from bokeh.models.widgets import Tabs
View sleep_model.py
with pm.Model() as sleep_model:
# Create the alpha and beta parameters
# Assume a normal distribution
alpha = pm.Normal('alpha', mu=0.0, tau=0.05, testval=0.0)
beta = pm.Normal('beta', mu=0.0, tau=0.05, testval=0.0)
# The sleep probability is modeled as a logistic function
p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha)))