Kyle Ziegler kyleziegler

## bayes_loop.py
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import cross_val_score

from skopt.plots import plot_convergence
from skopt.utils import use_named_args
from skopt.space import Real, Integer
from skopt import gp_minimize

import matplotlib.pyplot as plt

## bayes_search_cv.py
from skopt import BayesSearchCV
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import GridSearchCV

import matplotlib.pyplot as plt
plt.rcParams["figure.dpi"] = 100
plt.rcParams["figure.figsize"] = [10,4]

def bayes_search_cv_example():

## data_prep_hyperparameter_blog.py
# Dataprep
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine
import numpy as np
import pandas as pd

import warnings
# silence warnings for parameter options we have already tried, useful when iterating
warnings.filterwarnings("ignore")

## grid_search.py
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier

def grid_search_example():
    model = GradientBoostingClassifier(random_state=0, n_estimators=50)

    pg = {
        'learning_rate':list(np.arange(0.1,0.4,0.1)),
        'n_estimators': list(range(50,200,50)),
        'min_samples_split': list(range(2,10,2))

## shingling_jaccard.py
tokenizer = nltk.tokenize.casual.TweetTokenizer()

def jaccard_similarity(list_x, list_y):
    # Convert to set, capturing only unique values
    set_x = set(list_x)
    set_y = set(list_y)
    intersection = set_x.intersection(set_y)
    union = set_x.union(set_y)

    # Prevent divide by 0 when there is an empty set, and return jaccard

## edit_distance.py
def edit_distance(string_x: str, string_y: str) -> list(list()):
    # Left-pad a blank character to both strings
    string_x = ' ' + string_x
    string_y = ' ' + string_y

    # Obtain the length of the padded string
    len_x = len(string_x)
    len_y = len(string_y)

    # Initializing the distance matrix, we know that the first row and column

## config
Host notebook_name
  HostName xx.xxx.xxx.x
  IdentityFile ~/.ssh/google_compute_engine
  User jupyter

# If you plan on ONLY using your primary account (do not recommend)
# since the UI always uses the jupyter user

Host notebook_name
  HostName xx.xx.xx.xx
	from sklearn.ensemble import GradientBoostingClassifier
	from sklearn.model_selection import cross_val_score

	from skopt.plots import plot_convergence
	from skopt.utils import use_named_args
	from skopt.space import Real, Integer
	from skopt import gp_minimize

	import matplotlib.pyplot as plt
	from skopt import BayesSearchCV
	from sklearn.ensemble import GradientBoostingClassifier

	from sklearn.model_selection import GridSearchCV

	import matplotlib.pyplot as plt
	plt.rcParams["figure.dpi"] = 100
	plt.rcParams["figure.figsize"] = [10,4]

	def bayes_search_cv_example():
	# Dataprep
	from sklearn.model_selection import train_test_split
	from sklearn.datasets import load_wine
	import numpy as np
	import pandas as pd

	import warnings
	# silence warnings for parameter options we have already tried, useful when iterating
	warnings.filterwarnings("ignore")
	from sklearn.model_selection import GridSearchCV
	from sklearn.ensemble import GradientBoostingClassifier

	def grid_search_example():
	model = GradientBoostingClassifier(random_state=0, n_estimators=50)

	pg = {
	'learning_rate':list(np.arange(0.1,0.4,0.1)),
	'n_estimators': list(range(50,200,50)),
	'min_samples_split': list(range(2,10,2))
	tokenizer = nltk.tokenize.casual.TweetTokenizer()

	def jaccard_similarity(list_x, list_y):
	# Convert to set, capturing only unique values
	set_x = set(list_x)
	set_y = set(list_y)
	intersection = set_x.intersection(set_y)
	union = set_x.union(set_y)

	# Prevent divide by 0 when there is an empty set, and return jaccard
	def edit_distance(string_x: str, string_y: str) -> list(list()):
	# Left-pad a blank character to both strings
	string_x = ' ' + string_x
	string_y = ' ' + string_y

	# Obtain the length of the padded string
	len_x = len(string_x)
	len_y = len(string_y)

	# Initializing the distance matrix, we know that the first row and column
	Host notebook_name
	HostName xx.xxx.xxx.x
	IdentityFile ~/.ssh/google_compute_engine
	User jupyter

	# If you plan on ONLY using your primary account (do not recommend)
	# since the UI always uses the jupyter user

	Host notebook_name
	HostName xx.xx.xx.xx