Skip to content

Instantly share code, notes, and snippets.

@vene
Created August 12, 2014 11:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vene/b8a6e199ee764289c65e to your computer and use it in GitHub Desktop.
Save vene/b8a6e199ee764289c65e to your computer and use it in GitHub Desktop.
from __future__ import print_function
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.dummy import DummyClassifier
from sklearn.cross_validation import LeaveOneOut
docs = ["the cat lives in the hat", "the quick brown fox jumps over a dog",
"a clockwork orange"]
default_vect = CountVectorizer()
vocab_vect = CountVectorizer(vocabulary=["the", "a"])
print("Vectorizer with default setting: ", default_vect.fit(docs).vocabulary_)
print("Vectorizer with fixed vocab: ", vocab_vect.fit(docs).vocabulary_)
grid = GridSearchCV(make_pipeline(vocab_vect, DummyClassifier()),
dict(dummyclassifier__strategy=['uniform']),
cv=LeaveOneOut(3))
grid.fit(docs, [1, 0, 1])
print("Vectorizer with fixed vocab after grid search: ",
grid.best_estimator_.steps[0][1].vocabulary_)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment