Skip to content

Instantly share code, notes, and snippets.

View WillKoehrsen's full-sized avatar
🌆
All watched over by machines of loving grace

Will Koehrsen WillKoehrsen

🌆
All watched over by machines of loving grace
View GitHub Profile
import pandas as pd
import numpy as np
# Pandas options
pd.options.display.max_columns = 30
pd.options.display.max_rows = 20
from IPython import get_ipython
ipython = get_ipython()
from sklearn.ensemble import RandomForestClassifier
# Create the model with 100 trees
model = RandomForestClassifier(n_estimators=100,
bootstrap = True,
max_features = 'sqrt')
# Fit on training data
model.fit(train, train_labels)
# Observations from multiple trips
c = np.array([[3, 2, 1],
[2, 3, 1],
[3, 2, 1],
[2, 3, 1]])
with pm.Model() as model:
# Parameters are a dirichlet distribution
parameters = pm.Dirichlet('parameters', a=alphas, shape=3)
# Observed data is a multinomial distribution
import pymc3 as pm
# Context for the model
with pm.Model() as normal_model:
# The prior for the data likelihood is a Normal Distribution
family = pm.glm.families.Normal()
# Creating the model requires a formula and data (and optionally a family)
pm.GLM.from_formula(formula, data = X_train, family = family)
# Create the blank plot
p = figure(plot_height = 600, plot_width = 600,
title = 'Histogram of Arrival Delays',
x_axis_label = 'Delay (min)]',
y_axis_label = 'Number of Flights')
# Add a quad glyph with source this time
p.quad(bottom=0, top='flights', left='left', right='right', source=src,
fill_color='red', line_color='black', fill_alpha = 0.75,
hover_fill_alpha = 1.0, hover_fill_color = 'navy')
# Pandas for data management
import pandas as pd
# os methods for manipulating paths
from os.path import dirname, join
# Bokeh basics
from bokeh.io import curdoc
from bokeh.models.widgets import Tabs
with pm.Model() as sleep_model:
# Create the alpha and beta parameters
# Assume a normal distribution
alpha = pm.Normal('alpha', mu=0.0, tau=0.05, testval=0.0)
beta = pm.Normal('beta', mu=0.0, tau=0.05, testval=0.0)
# The sleep probability is modeled as a logistic function
p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha)))
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Masking, Embedding
model = Sequential()
# Embedding layer
model.add(
Embedding(input_dim=num_words,
input_length = training_length,
output_dim=100,
from sklearn.tree import DecisionTreeClassifier
# Make a decision tree and train
tree = DecisionTreeClassifier(random_state=RSEED)
tree.fit(X, y)
import lightgbm as lgb
def identify_zero_importance_features(train, train_labels, iterations = 2):
"""
Identify zero importance features in a training dataset based on the
feature importances from a gradient boosting model.
Parameters
--------
train : dataframe