This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import GradientBoostingClassifier | |
from sklearn.model_selection import cross_val_score | |
from skopt.plots import plot_convergence | |
from skopt.utils import use_named_args | |
from skopt.space import Real, Integer | |
from skopt import gp_minimize | |
import matplotlib.pyplot as plt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from skopt import BayesSearchCV | |
from sklearn.ensemble import GradientBoostingClassifier | |
from sklearn.model_selection import GridSearchCV | |
import matplotlib.pyplot as plt | |
plt.rcParams["figure.dpi"] = 100 | |
plt.rcParams["figure.figsize"] = [10,4] | |
def bayes_search_cv_example(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Dataprep | |
from sklearn.model_selection import train_test_split | |
from sklearn.datasets import load_wine | |
import numpy as np | |
import pandas as pd | |
import warnings | |
# silence warnings for parameter options we have already tried, useful when iterating | |
warnings.filterwarnings("ignore") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import GridSearchCV | |
from sklearn.ensemble import GradientBoostingClassifier | |
def grid_search_example(): | |
model = GradientBoostingClassifier(random_state=0, n_estimators=50) | |
pg = { | |
'learning_rate':list(np.arange(0.1,0.4,0.1)), | |
'n_estimators': list(range(50,200,50)), | |
'min_samples_split': list(range(2,10,2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tokenizer = nltk.tokenize.casual.TweetTokenizer() | |
def jaccard_similarity(list_x, list_y): | |
# Convert to set, capturing only unique values | |
set_x = set(list_x) | |
set_y = set(list_y) | |
intersection = set_x.intersection(set_y) | |
union = set_x.union(set_y) | |
# Prevent divide by 0 when there is an empty set, and return jaccard |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def edit_distance(string_x: str, string_y: str) -> list(list()): | |
# Left-pad a blank character to both strings | |
string_x = ' ' + string_x | |
string_y = ' ' + string_y | |
# Obtain the length of the padded string | |
len_x = len(string_x) | |
len_y = len(string_y) | |
# Initializing the distance matrix, we know that the first row and column |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Host notebook_name | |
HostName xx.xxx.xxx.x | |
IdentityFile ~/.ssh/google_compute_engine | |
User jupyter | |
# If you plan on ONLY using your primary account (do not recommend) | |
# since the UI always uses the jupyter user | |
Host notebook_name | |
HostName xx.xx.xx.xx |