Skip to content

Instantly share code, notes, and snippets.


Will Koehrsen WillKoehrsen

View GitHub Profile
WillKoehrsen /
Created Mar 10, 2018
Automation of Assignment Submisison
# selenium for web driving
import selenium
from selenium import webdriver
# time for pausing between navigation
import time
# Datetime for recording time of submission
import datetime
from sklearn.ensemble import RandomForestClassifier
# Create the model with 100 trees
model = RandomForestClassifier(n_estimators=100,
bootstrap = True,
max_features = 'sqrt')
# Fit on training data, train_labels)
# Import the libraries
import matplotlib.pyplot as plt
import seaborn as sns
# matplotlib histogram
plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black',
bins = int(180/5))
# seaborn histogram
sns.distplot(flights['arr_delay'], hist=True, kde=False,
WillKoehrsen /
Last active Feb 11, 2021
How to visualize a single decision tree in Python
from sklearn.datasets import load_iris
iris = load_iris()
# Model (can also use single decision tree)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=10)
# Train,
# Extract single tree
# Observations from multiple trips
c = np.array([[3, 2, 1],
[2, 3, 1],
[3, 2, 1],
[2, 3, 1]])
with pm.Model() as model:
# Parameters are a dirichlet distribution
parameters = pm.Dirichlet('parameters', a=alphas, shape=3)
# Observed data is a multinomial distribution
import cufflinks as cf
def scatter_plot(x=list(df.select_dtypes('number').columns),
df.iplot(kind='scatter', x=x, y=y, mode='markers',
xTitle=x.title(), yTitle=y.title(),
import pymc3 as pm
# Context for the model
with pm.Model() as normal_model:
# The prior for the data likelihood is a Normal Distribution
family = pm.glm.families.Normal()
# Creating the model requires a formula and data (and optionally a family)
pm.GLM.from_formula(formula, data = X_train, family = family)
# Create the blank plot
p = figure(plot_height = 600, plot_width = 600,
title = 'Histogram of Arrival Delays',
x_axis_label = 'Delay (min)]',
y_axis_label = 'Number of Flights')
# Add a quad glyph with source this time
p.quad(bottom=0, top='flights', left='left', right='right', source=src,
fill_color='red', line_color='black', fill_alpha = 0.75,
hover_fill_alpha = 1.0, hover_fill_color = 'navy')
# Pandas for data management
import pandas as pd
# os methods for manipulating paths
from os.path import dirname, join
# Bokeh basics
from import curdoc
from bokeh.models.widgets import Tabs
with pm.Model() as sleep_model:
# Create the alpha and beta parameters
# Assume a normal distribution
alpha = pm.Normal('alpha', mu=0.0, tau=0.05, testval=0.0)
beta = pm.Normal('beta', mu=0.0, tau=0.05, testval=0.0)
# The sleep probability is modeled as a logistic function
p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha)))