Skip to content

Instantly share code, notes, and snippets.


Michel Kana michelkana

View GitHub Profile
best_nb_iterations = np.array([ab_test_scores[:,i].argmax() for i in range(len(tree_depths)) ])
best_test_scores = np.array([ab_test_scores[best_nb_iterations[i],i] for i in range(len(tree_depths)) ])
optimal_tree_depth_idx = best_test_scores.argmax()
optimal_nb_iterations = best_nb_iterations[optimal_tree_depth_idx]
optimal_tree_depth = tree_depths[optimal_tree_depth_idx]
optimal_test_score = ab_test_scores[optimal_nb_iterations, optimal_tree_depth_idx]
optimal_train_score = ab_train_scores[optimal_nb_iterations, optimal_tree_depth_idx]
print('The combination of base learner depth {} and {} iterations achieves the best accuracy {}% on test set \
and {}% on training set.'.format(optimal_tree_depth, optimal_nb_iterations, round(optimal_test_score*100,5),
from sklearn.ensemble import AdaBoostClassifier
# function to run boosting with gradient descent
def run_adaboosting(X_train, y_train, X_test, y_test, depths=[3], iterations=800, lr=0.05):
fig, ax = plt.subplots(1,2,figsize=(20,5))
ab_train_scores = np.zeros((iterations, len(depths)))
ab_test_scores = np.zeros((iterations, len(depths)))
for i, depth in enumerate(depths):
ab_model = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=depth), n_estimators=iterations, learning_rate=lr), y_train);
# Single decision tree1 trained on original dataset
tree1 = DecisionTreeClassifier(max_depth=3).fit(X_train, y_train)
y_train_predicted_tree1 = tree1.predict(X_train)
y_test_predicted_tree1 = tree1.predict(X_test)
# Modified dataset, weighted by residuals
y_train_predicted_tree1_bool = y_train_predicted_tree1 == y_train
sample_weights = np.ones(len(X_train))
sample_weights[np.logical_not(y_train_predicted_tree1_bool)] = 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
# Load data
data_train = pd.read_csv('data/Higgs_train.csv')
data_test = pd.read_csv('data/Higgs_test.csv')
# akaike information criterion
aic_A = 2*5 - 2*math.log(ml_A)
aic_B = 2*5 - 2*math.log(ml_B)
print("AIC for Product A: {:.2f}".format(aic_A))
print("AIC for Product B: {:.2f}".format(aic_B))
with open("path/to/yelp_academic_dataset_review_small.json") as f:
reviews = f.readlines()
orig_reviews = [json.loads(r)['text'].replace('\n','') for r in reviews]
lowercase_reviews = [r.lower() for r in orig_reviews]
truecase_reviews = [truecasing(r) for r in lowercase_reviews]
bleu_scores = [get_bleu([ro], [rt]) for ro, rt in zip(orig_reviews, truecase_reviews)]
round(sum(bleu_scores)/len(bleu_scores), 2)
# packages needed
# !pip install nltk
# !pip install stanfordnlp
# !pip install --upgrade bleu
import nltk
from nltk.tokenize import sent_tokenize
import re
import stanfordnlp
from bleu import list_bleu
# pip install scipy==1.1.0
from vis.visualization import visualize_saliency
def plot_saliency(img_idx=None):
img_idx = plot_features_map(img_idx)
grads = visualize_saliency(cnn_saliency, -1, filter_indices=ytest[img_idx][0],
seed_input=x_test[img_idx], backprop_modifier=None,
predicted_label = labels[np.argmax(cnn.predict(x_test[img_idx].reshape(1,32,32,3)),1)[0]]
fig, ax = plt.subplots(1,2, figsize=(10,5))
from random import randint
import matplotlib.pylab as plt
import numpy as np
def get_feature_maps(model, layer_id, input_image):
model_ = Model(inputs=[model.input],
return model_.predict(np.expand_dims(input_image,
from keras.datasets import cifar10
from keras.utils import np_utils
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras import regularizers
from keras.layers import BatchNormalization
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator