This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import load_iris | |
iris = load_iris() | |
# Model (can also use single decision tree) | |
from sklearn.ensemble import RandomForestClassifier | |
model = RandomForestClassifier(n_estimators=10) | |
# Train | |
model.fit(iris.data, iris.target) | |
# Extract single tree |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the libraries | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# matplotlib histogram | |
plt.hist(flights['arr_delay'], color = 'blue', edgecolor = 'black', | |
bins = int(180/5)) | |
# seaborn histogram | |
sns.distplot(flights['arr_delay'], hist=True, kde=False, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# selenium for web driving | |
import selenium | |
from selenium import webdriver | |
# time for pausing between navigation | |
import time | |
# Datetime for recording time of submission | |
import datetime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Show 4 different binwidths | |
for i, binwidth in enumerate([1, 5, 10, 15]): | |
# Set up the plot | |
ax = plt.subplot(2, 2, i + 1) | |
# Draw the plot | |
ax.hist(flights['arr_delay'], bins = int(180/binwidth), | |
color = 'blue', edgecolor = 'black') | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Examines the effect of changing a single variable | |
# Takes in the name of the variable, the trace, and the data | |
def model_effect(query_var, trace, X): | |
# Variables that do not change | |
steady_vars = list(X.columns) | |
steady_vars.remove(query_var) | |
# Linear Model that estimates a grade based on the value of the query variable | |
# and one sample from the trace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Both inputs are 1-dimensional | |
book = Input(name = 'book', shape = [1]) | |
link = Input(name = 'link', shape = [1]) | |
# Embedding the book (shape will be (None, 1, 50)) | |
book_embedding = Embedding(name = 'book_embedding', | |
input_dim = len(book_index), | |
output_dim = embedding_size)(book) | |
# Embedding the link (shape will be (None, 1, 50)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Threshold for removing correlated variables | |
threshold = 0.9 | |
# Absolute value correlation matrix | |
corr_matrix = app.corr().abs() | |
# Upper triangle of correlations | |
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) | |
# Select columns with correlations above threshold |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cufflinks as cf | |
@interact | |
def scatter_plot(x=list(df.select_dtypes('number').columns), | |
y=list(df.select_dtypes('number').columns)[1:], | |
theme=list(cf.themes.THEMES.keys()), | |
colorscale=list(cf.colors._scales_names.keys())): | |
df.iplot(kind='scatter', x=x, y=y, mode='markers', | |
xTitle=x.title(), yTitle=y.title(), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from torchvision import datasets | |
from torch.utils.data import DataLoader | |
# Datasets from folders | |
data = { | |
'train': | |
datasets.ImageFolder(root=traindir, transform=image_transforms['train']), | |
'valid': | |
datasets.ImageFolder(root=validdir, transform=image_transforms['valid']), | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to calculate correlation coefficient between two arrays | |
def corr(x, y, **kwargs): | |
# Calculate the value | |
coef = np.corrcoef(x, y)[0][1] | |
# Make the label | |
label = r'$\rho$ = ' + str(round(coef, 2)) | |
# Add the label to the plot | |
ax = plt.gca() |
NewerOlder