Skip to content

Instantly share code, notes, and snippets.

@fannix
Created December 10, 2011 15:08
Show Gist options
  • Save fannix/1455367 to your computer and use it in GitHub Desktop.
Save fannix/1455367 to your computer and use it in GitHub Desktop.
cross validation
from sklearn.datasets import load_svmlight_file
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm.sparse import LinearSVC
from sklearn.cross_validation import StratifiedKFold
from sklearn import metrics
import numpy as np
X, y = load_svmlight_file("fr.vec")
y[y == -1] = 0
kf = StratifiedKFold(y, k = 10, indices=True)
#clf = MultinomialNB()
clf = LinearSVC()
mean_li = []
for train_index, test_index in kf:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.fit(X_train, y_train)
y_predicted = clf.predict(X_test)
print metrics.confusion_matrix(y_test, y_predicted)
print metrics.classification_report(y_test, y_predicted)
mean_li.append(sum(y_predicted == y_test) / float(len(y_test)))
print np.mean(mean_li)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment