Will Koehrsen WillKoehrsen

## start.py
import pandas as pd
import numpy as np

# Pandas options
pd.options.display.max_columns = 30
pd.options.display.max_rows = 20

from IPython import get_ipython
ipython = get_ipython()

## random_forest.py
from sklearn.ensemble import RandomForestClassifier

# Create the model with 100 trees
model = RandomForestClassifier(n_estimators=100,
                               bootstrap = True,
                               max_features = 'sqrt')
# Fit on training data
model.fit(train, train_labels)

## more_data_sample.py
# Observations from multiple trips
c = np.array([[3, 2, 1],
              [2, 3, 1],
              [3, 2, 1],
              [2, 3, 1]])

with pm.Model() as model:
    # Parameters are a dirichlet distribution
    parameters = pm.Dirichlet('parameters', a=alphas, shape=3)
    # Observed data is a multinomial distribution

## normal_model.py
import pymc3 as pm

# Context for the model
with pm.Model() as normal_model:

    # The prior for the data likelihood is a Normal Distribution
    family = pm.glm.families.Normal()

    # Creating the model requires a formula and data (and optionally a family)
    pm.GLM.from_formula(formula, data = X_train, family = family)

## bokeh_delay_hovertool.py
# Create the blank plot
p = figure(plot_height = 600, plot_width = 600,
           title = 'Histogram of Arrival Delays',
          x_axis_label = 'Delay (min)]',
           y_axis_label = 'Number of Flights')

# Add a quad glyph with source this time
p.quad(bottom=0, top='flights', left='left', right='right', source=src,
       fill_color='red', line_color='black', fill_alpha = 0.75,
       hover_fill_alpha = 1.0, hover_fill_color = 'navy')

## flights_main.py
# Pandas for data management
import pandas as pd

# os methods for manipulating paths
from os.path import dirname, join

# Bokeh basics
from bokeh.io import curdoc
from bokeh.models.widgets import Tabs

## sleep_model.py
with pm.Model() as sleep_model:

    # Create the alpha and beta parameters
    # Assume a normal distribution
    alpha = pm.Normal('alpha', mu=0.0, tau=0.05, testval=0.0)
    beta = pm.Normal('beta', mu=0.0, tau=0.05, testval=0.0)

    # The sleep probability is modeled as a logistic function
    p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha)))


## basic_rnn_keras.py
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Masking, Embedding

model = Sequential()

# Embedding layer
model.add(
    Embedding(input_dim=num_words,
              input_length = training_length,
              output_dim=100,

## decision_tree.py
from sklearn.tree import DecisionTreeClassifier

# Make a decision tree and train
tree = DecisionTreeClassifier(random_state=RSEED)
tree.fit(X, y)

## identify_zero_importance_features.py
import lightgbm as lgb

def identify_zero_importance_features(train, train_labels, iterations = 2):
    """
    Identify zero importance features in a training dataset based on the
    feature importances from a gradient boosting model.

    Parameters
    --------
    train : dataframe
	import pandas as pd
	import numpy as np

	# Pandas options
	pd.options.display.max_columns = 30
	pd.options.display.max_rows = 20

	from IPython import get_ipython
	ipython = get_ipython()
	from sklearn.ensemble import RandomForestClassifier

	# Create the model with 100 trees
	model = RandomForestClassifier(n_estimators=100,
	bootstrap = True,
	max_features = 'sqrt')
	# Fit on training data
	model.fit(train, train_labels)
	# Observations from multiple trips
	c = np.array([[3, 2, 1],
	[2, 3, 1],
	[3, 2, 1],
	[2, 3, 1]])

	with pm.Model() as model:
	# Parameters are a dirichlet distribution
	parameters = pm.Dirichlet('parameters', a=alphas, shape=3)
	# Observed data is a multinomial distribution
	import pymc3 as pm

	# Context for the model
	with pm.Model() as normal_model:

	# The prior for the data likelihood is a Normal Distribution
	family = pm.glm.families.Normal()

	# Creating the model requires a formula and data (and optionally a family)
	pm.GLM.from_formula(formula, data = X_train, family = family)
	# Create the blank plot
	p = figure(plot_height = 600, plot_width = 600,
	title = 'Histogram of Arrival Delays',
	x_axis_label = 'Delay (min)]',
	y_axis_label = 'Number of Flights')

	# Add a quad glyph with source this time
	p.quad(bottom=0, top='flights', left='left', right='right', source=src,
	fill_color='red', line_color='black', fill_alpha = 0.75,
	hover_fill_alpha = 1.0, hover_fill_color = 'navy')
	# Pandas for data management
	import pandas as pd

	# os methods for manipulating paths
	from os.path import dirname, join

	# Bokeh basics
	from bokeh.io import curdoc
	from bokeh.models.widgets import Tabs
	with pm.Model() as sleep_model:

	# Create the alpha and beta parameters
	# Assume a normal distribution
	alpha = pm.Normal('alpha', mu=0.0, tau=0.05, testval=0.0)
	beta = pm.Normal('beta', mu=0.0, tau=0.05, testval=0.0)

	# The sleep probability is modeled as a logistic function
	p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha)))
	from keras.models import Sequential
	from keras.layers import LSTM, Dense, Dropout, Masking, Embedding

	model = Sequential()

	# Embedding layer
	model.add(
	Embedding(input_dim=num_words,
	input_length = training_length,
	output_dim=100,
	from sklearn.tree import DecisionTreeClassifier

	# Make a decision tree and train
	tree = DecisionTreeClassifier(random_state=RSEED)
	tree.fit(X, y)
	import lightgbm as lgb

	def identify_zero_importance_features(train, train_labels, iterations = 2):
	"""
	Identify zero importance features in a training dataset based on the
	feature importances from a gradient boosting model.

	Parameters
	--------
	train : dataframe