Skip to content

Instantly share code, notes, and snippets.

@amn41
Created August 28, 2016 16:31
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amn41/75e0a7cd97694fd03867721df59f3a85 to your computer and use it in GitHub Desktop.
Save amn41/75e0a7cd97694fd03867721df59f3a85 to your computer and use it in GitHub Desktop.
import numpy as np
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import pickle
samples = np.loadtxt('vectors.dat')
X=samples
labels_text = [ l.strip() for l in open('labels.dat').readlines()]
label_index = list(set(labels_text))
print(label_index)
n = len(labels_text)
labels = np.array([label_index.index(l) for l in labels_text]).astype('int64')
tuned_parameters = [
{'C': [1,2,5, 10,20, 100], 'kernel': ['linear']}
#{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]
X_train, X_test, y_train, y_test = train_test_split(
X, labels, test_size=0.1, random_state=0)
scores = ['precision']
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=3,
scoring='%s_weighted' % score)
clf.fit(X_train, y_train)
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
for params, mean_score, scores in clf.grid_scores_:
print("%0.3f (+/-%0.03f) for %r"
% (mean_score, scores.std() * 2, params))
print()
print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()
sentences = open('sentences.dat').readlines()
idx=np.random.choice(n, 10)
for i in range(10):
label_pred = clf.predict(samples[i,:].reshape(1, -1))
print("ut : {0} label {1} predict {2}".format(sentences[i].strip(),labels_text[i],label_index[label_pred]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment