Skip to content

Instantly share code, notes, and snippets.

@amueller
Created April 1, 2012 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amueller/2275541 to your computer and use it in GitHub Desktop.
Save amueller/2275541 to your computer and use it in GitHub Desktop.
Testing influence of dataset size on C
import numpy as np
from sklearn import datasets
from sklearn.cross_validation import ShuffleSplit
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import Scaler
#data = datasets.load_digits()
data = datasets.fetch_mldata("usps")
X, y = data.data, data.target
X = Scaler().fit_transform(X)
n_samples, n_features = X.shape
C_grid = dict(C=2. ** np.arange(0, 20))
cv = ShuffleSplit(n=n_samples, train_fraction=.7, test_fraction=.2, n_iterations=10)
grid_search = GridSearchCV(SVC(kernel='rbf'), param_grid=C_grid, cv=cv, n_jobs=12)
grid_search.fit(X, y)
print(grid_search.grid_scores_)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment