Will Koehrsen WillKoehrsen

## different_binwidths.py
# Show 4 different binwidths
for i, binwidth in enumerate([1, 5, 10, 15]):

    # Set up the plot
    ax = plt.subplot(2, 2, i + 1)

    # Draw the plot
    ax.hist(flights['arr_delay'], bins = int(180/binwidth),
             color = 'blue', edgecolor = 'black')


## side_by_side_histogram.py
# Make a separate list for each airline
x1 = list(flights[flights['name'] == 'United Air Lines Inc.']['arr_delay'])
x2 = list(flights[flights['name'] == 'JetBlue Airways']['arr_delay'])
x3 = list(flights[flights['name'] == 'ExpressJet Airlines Inc.']['arr_delay'])
x4 = list(flights[flights['name'] == 'Delta Air Lines Inc.']['arr_delay'])
x5 = list(flights[flights['name'] == 'American Airlines Inc.']['arr_delay'])

# Assign colors for each airline and the names
colors = ['#E69F00', '#56B4E9', '#F0E442', '#009E73', '#D55E00']
names = ['United Air Lines Inc.', 'JetBlue Airways', 'ExpressJet Airlines Inc.'',

## density_airlines.py
# List of five airlines to plot
airlines = ['United Air Lines Inc.', 'JetBlue Airways', 'ExpressJet Airlines Inc.'',
         'Delta Air Lines Inc.', 'American Airlines Inc.']

# Iterate through the five airlines
for airline in airlines:
    # Subset to the airline
    subset = flights[flights['name'] == airline]

    # Draw the density plot

## density_rugplot_alaska.py
# Subset to Alaska Airlines
subset = flights[flights['name'] == 'Alaska Airlines Inc.']

# Density Plot with Rug Plot
sns.distplot(subset['arr_delay'], hist = False, kde = True, rug = True,
             color = 'darkblue',
             kde_kws={'linewidth': 3},
             rug_kws={'color': 'black'})

# Plot formatting

## histograms_matplotlib_seaborn.py
# Import the libraries
import matplotlib.pyplot as plt
import seaborn as sns

# matplotlib histogram
plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black',
         bins = int(180/5))

# seaborn histogram
sns.distplot(flights['arr_delay'], hist=True, kde=False,

## flights_main.py
# Pandas for data management
import pandas as pd

# os methods for manipulating paths
from os.path import dirname, join

# Bokeh basics
from bokeh.io import curdoc
from bokeh.models.widgets import Tabs

## custom_pairgrid.py
# Function to calculate correlation coefficient between two arrays
def corr(x, y, **kwargs):

    # Calculate the value
    coef = np.corrcoef(x, y)[0][1]
    # Make the label
    label = r'$\rho$ = ' + str(round(coef, 2))

    # Add the label to the plot
    ax = plt.gca()

## data_preparation.py
def format_data(df):
    # Targets are final grade of student
    labels = df['G3']
    # Drop the school and the grades from features
    df = df.drop(columns=['school', 'G1', 'G2', 'G3'])

    # One-Hot Encoding of Categorical Variables
    df = pd.get_dummies(df)

    df['y'] = list(labels)

## bayesian_linear_model.py
# Context for the model
with pm.Model() as normal_model:

    # The prior for the model parameters will be a normal distribution
    family = pm.glm.families.Normal()

    # Making the model only requires specifying the formula and the data
    pm.GLM.from_formula(formula, X_train_math, family = family)

    # Perform Markov Chain Monte Carlo sampling

## normal_model.py
import pymc3 as pm

# Context for the model
with pm.Model() as normal_model:

    # The prior for the data likelihood is a Normal Distribution
    family = pm.glm.families.Normal()

    # Creating the model requires a formula and data (and optionally a family)
    pm.GLM.from_formula(formula, data = X_train, family = family)
	# Show 4 different binwidths
	for i, binwidth in enumerate([1, 5, 10, 15]):

	# Set up the plot
	ax = plt.subplot(2, 2, i + 1)

	# Draw the plot
	ax.hist(flights['arr_delay'], bins = int(180/binwidth),
	color = 'blue', edgecolor = 'black')
	# Make a separate list for each airline
	x1 = list(flights[flights['name'] == 'United Air Lines Inc.']['arr_delay'])
	x2 = list(flights[flights['name'] == 'JetBlue Airways']['arr_delay'])
	x3 = list(flights[flights['name'] == 'ExpressJet Airlines Inc.']['arr_delay'])
	x4 = list(flights[flights['name'] == 'Delta Air Lines Inc.']['arr_delay'])
	x5 = list(flights[flights['name'] == 'American Airlines Inc.']['arr_delay'])

	# Assign colors for each airline and the names
	colors = ['#E69F00', '#56B4E9', '#F0E442', '#009E73', '#D55E00']
	names = ['United Air Lines Inc.', 'JetBlue Airways', 'ExpressJet Airlines Inc.'',
	# List of five airlines to plot
	airlines = ['United Air Lines Inc.', 'JetBlue Airways', 'ExpressJet Airlines Inc.'',
	'Delta Air Lines Inc.', 'American Airlines Inc.']

	# Iterate through the five airlines
	for airline in airlines:
	# Subset to the airline
	subset = flights[flights['name'] == airline]

	# Draw the density plot
	# Subset to Alaska Airlines
	subset = flights[flights['name'] == 'Alaska Airlines Inc.']

	# Density Plot with Rug Plot
	sns.distplot(subset['arr_delay'], hist = False, kde = True, rug = True,
	color = 'darkblue',
	kde_kws={'linewidth': 3},
	rug_kws={'color': 'black'})

	# Plot formatting
	# Import the libraries
	import matplotlib.pyplot as plt
	import seaborn as sns

	# matplotlib histogram
	plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black',
	bins = int(180/5))

	# seaborn histogram
	sns.distplot(flights['arr_delay'], hist=True, kde=False,
	# Pandas for data management
	import pandas as pd

	# os methods for manipulating paths
	from os.path import dirname, join

	# Bokeh basics
	from bokeh.io import curdoc
	from bokeh.models.widgets import Tabs
	# Function to calculate correlation coefficient between two arrays
	def corr(x, y, **kwargs):

	# Calculate the value
	coef = np.corrcoef(x, y)[0][1]
	# Make the label
	label = r'$\rho$ = ' + str(round(coef, 2))

	# Add the label to the plot
	ax = plt.gca()
	def format_data(df):
	# Targets are final grade of student
	labels = df['G3']
	# Drop the school and the grades from features
	df = df.drop(columns=['school', 'G1', 'G2', 'G3'])

	# One-Hot Encoding of Categorical Variables
	df = pd.get_dummies(df)

	df['y'] = list(labels)
	# Context for the model
	with pm.Model() as normal_model:

	# The prior for the model parameters will be a normal distribution
	family = pm.glm.families.Normal()

	# Making the model only requires specifying the formula and the data
	pm.GLM.from_formula(formula, X_train_math, family = family)

	# Perform Markov Chain Monte Carlo sampling
	import pymc3 as pm

	# Context for the model
	with pm.Model() as normal_model:

	# The prior for the data likelihood is a Normal Distribution
	family = pm.glm.families.Normal()

	# Creating the model requires a formula and data (and optionally a family)
	pm.GLM.from_formula(formula, data = X_train, family = family)