{{ message }}

Instantly share code, notes, and snippets.

# Will Koehrsen WillKoehrsen

🌆
Improving
Last active Mar 23, 2018
View different_binwidths.py
 # Show 4 different binwidths for i, binwidth in enumerate([1, 5, 10, 15]): # Set up the plot ax = plt.subplot(2, 2, i + 1) # Draw the plot ax.hist(flights['arr_delay'], bins = int(180/binwidth), color = 'blue', edgecolor = 'black')
Created Mar 23, 2018
View side_by_side_histogram.py
 # Make a separate list for each airline x1 = list(flights[flights['name'] == 'United Air Lines Inc.']['arr_delay']) x2 = list(flights[flights['name'] == 'JetBlue Airways']['arr_delay']) x3 = list(flights[flights['name'] == 'ExpressJet Airlines Inc.']['arr_delay']) x4 = list(flights[flights['name'] == 'Delta Air Lines Inc.']['arr_delay']) x5 = list(flights[flights['name'] == 'American Airlines Inc.']['arr_delay']) # Assign colors for each airline and the names colors = ['#E69F00', '#56B4E9', '#F0E442', '#009E73', '#D55E00'] names = ['United Air Lines Inc.', 'JetBlue Airways', 'ExpressJet Airlines Inc.'',
Created Mar 23, 2018
View density_airlines.py
 # List of five airlines to plot airlines = ['United Air Lines Inc.', 'JetBlue Airways', 'ExpressJet Airlines Inc.'', 'Delta Air Lines Inc.', 'American Airlines Inc.'] # Iterate through the five airlines for airline in airlines: # Subset to the airline subset = flights[flights['name'] == airline] # Draw the density plot
Created Mar 23, 2018
 # Subset to Alaska Airlines subset = flights[flights['name'] == 'Alaska Airlines Inc.'] # Density Plot with Rug Plot sns.distplot(subset['arr_delay'], hist = False, kde = True, rug = True, color = 'darkblue', kde_kws={'linewidth': 3}, rug_kws={'color': 'black'}) # Plot formatting
Created Mar 23, 2018
View histograms_matplotlib_seaborn.py
 # Import the libraries import matplotlib.pyplot as plt import seaborn as sns # matplotlib histogram plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black', bins = int(180/5)) # seaborn histogram sns.distplot(flights['arr_delay'], hist=True, kde=False,
Created Mar 30, 2018
View flights_main.py
 # Pandas for data management import pandas as pd # os methods for manipulating paths from os.path import dirname, join # Bokeh basics from bokeh.io import curdoc from bokeh.models.widgets import Tabs
Last active Dec 29, 2018
View custom_pairgrid.py
 # Function to calculate correlation coefficient between two arrays def corr(x, y, **kwargs): # Calculate the value coef = np.corrcoef(x, y) # Make the label label = r'$\rho$ = ' + str(round(coef, 2)) # Add the label to the plot ax = plt.gca()
Created Apr 11, 2018
View data_preparation.py
 def format_data(df): # Targets are final grade of student labels = df['G3'] # Drop the school and the grades from features df = df.drop(columns=['school', 'G1', 'G2', 'G3']) # One-Hot Encoding of Categorical Variables df = pd.get_dummies(df) df['y'] = list(labels)
Last active Apr 11, 2018
View bayesian_linear_model.py
 # Context for the model with pm.Model() as normal_model: # The prior for the model parameters will be a normal distribution family = pm.glm.families.Normal() # Making the model only requires specifying the formula and the data pm.GLM.from_formula(formula, X_train_math, family = family) # Perform Markov Chain Monte Carlo sampling
Last active Aug 21, 2020
View normal_model.py
 import pymc3 as pm # Context for the model with pm.Model() as normal_model: # The prior for the data likelihood is a Normal Distribution family = pm.glm.families.Normal() # Creating the model requires a formula and data (and optionally a family) pm.GLM.from_formula(formula, data = X_train, family = family)