Skip to content

Instantly share code, notes, and snippets.

View WillKoehrsen's full-sized avatar
All watched over by machines of loving grace

Will Koehrsen WillKoehrsen

All watched over by machines of loving grace
View GitHub Profile
import pandas as pd
import numpy as np
# Pandas options
pd.options.display.max_columns = 30
pd.options.display.max_rows = 20
from IPython import get_ipython
ipython = get_ipython()
from sklearn.ensemble import RandomForestClassifier
# Create the model with 100 trees
model = RandomForestClassifier(n_estimators=100,
bootstrap = True,
max_features = 'sqrt')
# Fit on training data, train_labels)
# Observations from multiple trips
c = np.array([[3, 2, 1],
[2, 3, 1],
[3, 2, 1],
[2, 3, 1]])
with pm.Model() as model:
# Parameters are a dirichlet distribution
parameters = pm.Dirichlet('parameters', a=alphas, shape=3)
# Observed data is a multinomial distribution
import pymc3 as pm
# Context for the model
with pm.Model() as normal_model:
# The prior for the data likelihood is a Normal Distribution
family = pm.glm.families.Normal()
# Creating the model requires a formula and data (and optionally a family)
pm.GLM.from_formula(formula, data = X_train, family = family)
# Create the blank plot
p = figure(plot_height = 600, plot_width = 600,
title = 'Histogram of Arrival Delays',
x_axis_label = 'Delay (min)]',
y_axis_label = 'Number of Flights')
# Add a quad glyph with source this time
p.quad(bottom=0, top='flights', left='left', right='right', source=src,
fill_color='red', line_color='black', fill_alpha = 0.75,
hover_fill_alpha = 1.0, hover_fill_color = 'navy')
# Pandas for data management
import pandas as pd
# os methods for manipulating paths
from os.path import dirname, join
# Bokeh basics
from import curdoc
from bokeh.models.widgets import Tabs
with pm.Model() as sleep_model:
# Create the alpha and beta parameters
# Assume a normal distribution
alpha = pm.Normal('alpha', mu=0.0, tau=0.05, testval=0.0)
beta = pm.Normal('beta', mu=0.0, tau=0.05, testval=0.0)
# The sleep probability is modeled as a logistic function
p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha)))
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Masking, Embedding
model = Sequential()
# Embedding layer
input_length = training_length,
from sklearn.tree import DecisionTreeClassifier
# Make a decision tree and train
tree = DecisionTreeClassifier(random_state=RSEED), y)
import lightgbm as lgb
def identify_zero_importance_features(train, train_labels, iterations = 2):
Identify zero importance features in a training dataset based on the
feature importances from a gradient boosting model.
train : dataframe