Skip to content

Instantly share code, notes, and snippets.

@rayheberer
Created June 7, 2018 01:09
Show Gist options
  • Save rayheberer/16cbbfead8aef036c6d0b9e3b980d405 to your computer and use it in GitHub Desktop.
Save rayheberer/16cbbfead8aef036c6d0b9e3b980d405 to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
X, y = load_iris(return_X_y=True)
np.random.seed(41)
svc = SVC()
params1 = {'C': np.logspace(0,3, 10), 'gamma': np.linspace(0.0001, 0.001, 10)}
clf1 = GridSearchCV(svc, params1)
params2 = {'C': np.logspace(0, 3), 'gamma': np.linspace(0.0001, 0.001)}
clf2 = RandomizedSearchCV(svc, params2, n_iter=100)
fig, axs = plt.subplots(1, 2, figsize=(10, 6))
fig.subplots_adjust(wspace=0.3)
plt.suptitle('Exploring SVM Hyperparameter Values: Grid vs Randomized Search')
fig.text(0.5, 0.04, 'C', ha='center')
fig.text(0.04, 0.5, 'gamma', va='center', rotation='vertical')
for clf, ax in zip([clf1, clf2], axs.ravel()):
clf.fit(X, y)
scores = clf.cv_results_['mean_test_score']
C = clf.cv_results_['param_C']
gamma = clf.cv_results_['param_gamma']
plot = ax.scatter(C, gamma, c=scores, cmap='coolwarm')
ax.set(ylim=(0, 0.0011))
ax.set_xscale('log')
cbar = plt.colorbar(plot, ax=ax)
cbar.set_label('Validation Accuracy',
rotation=270,
rotation_mode='default',
verticalalignment='center')
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier
np.random.seed(41)
X, y = make_moons(noise=0.1)
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.linspace(x_min, x_max),
np.linspace(y_min, y_max))
fig, axs = plt.subplots(2, 2, figsize=(10, 6))
for n, ax in zip([1, 2, 5, 10], axs.ravel()):
clf = RandomForestClassifier(n_estimators=n)
clf.fit(X, y)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
ax.scatter(X[:, 0], X[:, 1], c=y, cmap='RdBu')
ax.contourf(xx, yy, Z, cmap='RdBu', alpha=0.7, corner_mask=False)
ax.set(title='n_estimators={}'.format(n))
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
np.random.seed(13)
X, y = make_classification(n_samples=4000)
def repeated_train_test_scores(test_size, trials=30):
scores = []
for _ in range(trials):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
clf = LogisticRegression()
clf.fit(X_train, y_train)
scores.append(clf.score(X_test, y_test))
return scores
sizes = np.arange(20, 1001, 20)
spreads = []
for test_size in sizes:
scores = repeated_train_test_scores(test_size)
spreads.append(np.std(scores))
plt.plot(sizes, spreads, 'o')
plt.title('Standard Deviations of Validation Scores of Trained Models')
plt.xlabel('Validation Set Samples')
plt.ylabel('Standard Deviation of Validation Accuracy')
plt.show()
fig, axs = plt.subplots(2, 2)
fig.subplots_adjust(hspace=0.2)
sizes = [100, 500, 1000, 2000]
for ax, size in zip(axs.ravel(), sizes):
scores = repeated_train_test_scores(size, trials=50)
sns.distplot(scores, bins=20, ax=ax, hist_kws={'range': (0.83, 0.97)})
ax.set(title='Validation Set: {} samples'.format(size))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment