This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Show 4 different binwidths | |
for i, binwidth in enumerate([1, 5, 10, 15]): | |
# Set up the plot | |
ax = plt.subplot(2, 2, i + 1) | |
# Draw the plot | |
ax.hist(flights['arr_delay'], bins = int(180/binwidth), | |
color = 'blue', edgecolor = 'black') | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make a separate list for each airline | |
x1 = list(flights[flights['name'] == 'United Air Lines Inc.']['arr_delay']) | |
x2 = list(flights[flights['name'] == 'JetBlue Airways']['arr_delay']) | |
x3 = list(flights[flights['name'] == 'ExpressJet Airlines Inc.']['arr_delay']) | |
x4 = list(flights[flights['name'] == 'Delta Air Lines Inc.']['arr_delay']) | |
x5 = list(flights[flights['name'] == 'American Airlines Inc.']['arr_delay']) | |
# Assign colors for each airline and the names | |
colors = ['#E69F00', '#56B4E9', '#F0E442', '#009E73', '#D55E00'] | |
names = ['United Air Lines Inc.', 'JetBlue Airways', 'ExpressJet Airlines Inc.'', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# List of five airlines to plot | |
airlines = ['United Air Lines Inc.', 'JetBlue Airways', 'ExpressJet Airlines Inc.'', | |
'Delta Air Lines Inc.', 'American Airlines Inc.'] | |
# Iterate through the five airlines | |
for airline in airlines: | |
# Subset to the airline | |
subset = flights[flights['name'] == airline] | |
# Draw the density plot |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Subset to Alaska Airlines | |
subset = flights[flights['name'] == 'Alaska Airlines Inc.'] | |
# Density Plot with Rug Plot | |
sns.distplot(subset['arr_delay'], hist = False, kde = True, rug = True, | |
color = 'darkblue', | |
kde_kws={'linewidth': 3}, | |
rug_kws={'color': 'black'}) | |
# Plot formatting |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the libraries | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# matplotlib histogram | |
plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black', | |
bins = int(180/5)) | |
# seaborn histogram | |
sns.distplot(flights['arr_delay'], hist=True, kde=False, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Pandas for data management | |
import pandas as pd | |
# os methods for manipulating paths | |
from os.path import dirname, join | |
# Bokeh basics | |
from bokeh.io import curdoc | |
from bokeh.models.widgets import Tabs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to calculate correlation coefficient between two arrays | |
def corr(x, y, **kwargs): | |
# Calculate the value | |
coef = np.corrcoef(x, y)[0][1] | |
# Make the label | |
label = r'$\rho$ = ' + str(round(coef, 2)) | |
# Add the label to the plot | |
ax = plt.gca() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def format_data(df): | |
# Targets are final grade of student | |
labels = df['G3'] | |
# Drop the school and the grades from features | |
df = df.drop(columns=['school', 'G1', 'G2', 'G3']) | |
# One-Hot Encoding of Categorical Variables | |
df = pd.get_dummies(df) | |
df['y'] = list(labels) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Context for the model | |
with pm.Model() as normal_model: | |
# The prior for the model parameters will be a normal distribution | |
family = pm.glm.families.Normal() | |
# Making the model only requires specifying the formula and the data | |
pm.GLM.from_formula(formula, X_train_math, family = family) | |
# Perform Markov Chain Monte Carlo sampling |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymc3 as pm | |
# Context for the model | |
with pm.Model() as normal_model: | |
# The prior for the data likelihood is a Normal Distribution | |
family = pm.glm.families.Normal() | |
# Creating the model requires a formula and data (and optionally a family) | |
pm.GLM.from_formula(formula, data = X_train, family = family) |