jnothman/cached_transform_mixin.py

## cached_transform_mixin.py
from sklearn.feature_extraction.text import CountVectorizer
from joblib import Memory
from sklearn.base import clone
from sklearn.datasets import fetch_20newsgroups


class CachedTransformMixin:
    memory = Memory('/tmp/cache')

    def transform(self, *args, **kwargs):
        return self.memory.cache(super(CachedTransformMixin, self).transform)(*args, **kwargs)


class CachedCountVectorizer(CachedTransformMixin, CountVectorizer):
    pass


X = fetch_20newsgroups().data

est = CachedCountVectorizer().fit(X)
%time Xt = est.transform(X)
%time Xt = est.transform(X)
est = clone(est).fit(X)
%time Xt = est.transform(X)
	from sklearn.feature_extraction.text import CountVectorizer
	from joblib import Memory
	from sklearn.base import clone
	from sklearn.datasets import fetch_20newsgroups


	class CachedTransformMixin:
	memory = Memory('/tmp/cache')

	def transform(self, args, *kwargs):
	return self.memory.cache(super(CachedTransformMixin, self).transform)(args, *kwargs)


	class CachedCountVectorizer(CachedTransformMixin, CountVectorizer):
	pass


	X = fetch_20newsgroups().data

	est = CachedCountVectorizer().fit(X)
	%time Xt = est.transform(X)
	%time Xt = est.transform(X)
	est = clone(est).fit(X)
	%time Xt = est.transform(X)