Created
July 10, 2012 15:36
-
-
Save ogrisel/3084146 to your computer and use it in GitHub Desktop.
memmaping for random forests
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/Users/oliviergrisel/coding/scikit-learn/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=2), iterable=<generator object <genexpr> at 0x10467b3c0>) | |
470 self.n_dispatched = 0 | |
471 try: | |
472 for function, args, kwargs in iterable: | |
473 self.dispatch(function, args, kwargs) | |
474 | |
--> 475 self.retrieve() | |
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=2)> | |
476 # Make sure that we get a last message telling us we are done | |
477 elapsed_time = time.time() - self._start_time | |
478 self._print('Done %3i out of %3i | elapsed: %s finished', | |
479 (len(self._output), | |
--------------------------------------------------------------------------- | |
Sub-process traceback: | |
--------------------------------------------------------------------------- | |
AttributeError Tue Jul 10 17:50:08 2012 | |
PID: 46155 Python 2.6.1: /usr/bin/python | |
........................................................................... | |
/Users/oliviergrisel/coding/scikit-learn/sklearn/ensemble/forest.pyc in _parallel_build_trees(n_trees=50, forest=RandomForestClassifier(bootstrap=True, compute_i...te object at 0x1004b30f0>, | |
verbose=0), X=<class 'numpy.core.memmap.memmap'> instance, y=array([[ 1.], | |
[ 0.], | |
[ 1.], | |
..., | |
[ 0.], | |
[ 1.], | |
[ 1.]]), sample_mask=None, X_argsorted=None, seed=445760040, verbose=0) | |
73 tree.set_params(random_state=check_random_state(seed)) | |
74 | |
75 if forest.bootstrap: | |
76 n_samples = X.shape[0] | |
77 indices = random_state.randint(0, n_samples, n_samples) | |
---> 78 tree.fit(X[indices], y[indices], | |
i = 0 | |
79 sample_mask=sample_mask, X_argsorted=X_argsorted) | |
80 tree.indices_ = indices | |
81 | |
82 else: | |
........................................................................... | |
/Library/Python/2.6/site-packages/numpy/core/memmap.pyc in __array_finalize__(self=<class 'numpy.core.memmap.memmap'> instance, obj=<class 'numpy.core.memmap.memmap'> instance) | |
252 return self | |
253 | |
254 def __array_finalize__(self, obj): | |
255 if hasattr(obj, '_mmap'): | |
256 self._mmap = obj._mmap | |
--> 257 self.filename = obj.filename | |
258 self.offset = obj.offset | |
259 self.mode = obj.mode | |
260 else: | |
261 self._mmap = None | |
AttributeError: 'memmap' object has no attribute 'filename' | |
___________________________________________________________________________ | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import numpy as np | |
from sklearn.datasets.samples_generator import make_classification | |
from sklearn.externals import joblib | |
from sklearn.ensemble import RandomForestClassifier | |
print "generating dataset" | |
X, y = make_classification(n_samples=100000, n_features=500) | |
filename = '/tmp/dataset.joblib' | |
print "memory mapping to " + filename | |
joblib.dump(np.asarray(X, dtype=np.float32, order='F'), filename) | |
X = joblib.load(filename, mmap_mode='c') | |
print "fitting random forest:" | |
clf = RandomForestClassifier(n_estimators=100, n_jobs=2) | |
print clf.fit(X, y).score(X, y) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment