vasinkd/updateable_cv.py

## updateable_cv.py
from sklearn.feature_extraction.text import CountVectorizer
import six

class UpdateableCountVectorizer(CountVectorizer):
    def update(self, text, stop_words=[]):
        require_sort = False
        for word in text.split():
            if (word not in self.vocabulary_) and \
                    (word not in stop_words):
                self.vocabulary_[word] = word
                require_sort = True
        if require_sort:
            sorted_features = sorted(six.iteritems(self.vocabulary_))
            for new_val, (term, old_val) in enumerate(sorted_features):
                self.vocabulary_[term] = new_val
	from sklearn.feature_extraction.text import CountVectorizer
	import six

	class UpdateableCountVectorizer(CountVectorizer):
	def update(self, text, stop_words=[]):
	require_sort = False
	for word in text.split():
	if (word not in self.vocabulary_) and \
	(word not in stop_words):
	self.vocabulary_[word] = word
	require_sort = True
	if require_sort:
	sorted_features = sorted(six.iteritems(self.vocabulary_))
	for new_val, (term, old_val) in enumerate(sorted_features):
	self.vocabulary_[term] = new_val