Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/env python
from __future__ import division, print_function
import os
import sys
import time
import psutil
from sklearn.externals import joblib
from sklearn import datasets
# the following imports are not needed, but if we won't import them
# memory usage numbers will include memory required for loading these modules
import array
import cPickle
import numpy
import scipy.sparse
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, HashingVectorizer
from collections import defaultdict
def _transform(vec, data):
# XXX: this code leaks memory:
X = vec.transform(data)
# XXX: and this code doesn't leak memory - why?
# for doc in data:
# X = vec.transform([doc])
return X.shape[1]
if __name__ == '__main__':
vecname = sys.argv[1]
fname = os.path.join('vec', vecname+'.joblib')
newsgroups_test = datasets.fetch_20newsgroups(subset='test')
p = psutil.Process(os.getpid())
before = p.get_memory_info().rss / 2**20
start_load = time.time()
vec = joblib.load(fname)
end_load = time.time()
after_load = p.get_memory_info().rss / 2**20
start_transform = time.time()
n_features = _transform(vec, newsgroups_test.data)
end_transform = time.time()
print("transform features: %d" % n_features)
after_transform = p.get_memory_info().rss / 2**20
print("load time: %0.1fs" % (end_load-start_load))
print("load memory usage: %0.1fMB" % (after_load-before))
print("transform time: %0.1fs" % (end_transform-start_transform))
print("transform memory leak: %0.1fMB" % (after_transform-after_load))
# print("total memory usage: %0.1fMB" % (after_transform-before))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.