Skip to content

Instantly share code, notes, and snippets.

@oyvindse
Created February 25, 2013 12:47
Show Gist options
  • Save oyvindse/5029603 to your computer and use it in GitHub Desktop.
Save oyvindse/5029603 to your computer and use it in GitHub Desktop.
SVM
#!/usr/bin/python
import sys
from numpy import loadtxt
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm.sparse import LinearSVC
my_data = loadtxt(sys.argv[1], delimiter='\t', dtype='S')
my_test_data = loadtxt(sys.argv[2], delimiter='\t', dtype='S')
text_clf = Pipeline([
('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', LinearSVC()),
])
print("Training...")
my_clf = text_clf.fit(my_data[:,4], my_data[:,3])
print("Done! \nClassifying test set...")
predicted = my_clf.predict(my_test_data[:,4])
print(np.mean(predicted == my_test_data[:,3]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment