Skip to content

Instantly share code, notes, and snippets.

@arjoly
arjoly / gist:3665731
Created September 7, 2012 12:19
Truncation issue in _get_memory
import psutil
import os
import memory_profiler
pid = os.getpid()
a = memory_profiler._get_memory(pid)
process = psutil.Process(pid)
b = float(process.get_memory_info()[0]) / (1024 ** 2)
@arjoly
arjoly / bench_gradient_boosting.py
Created November 21, 2012 09:39
scikit-learn/scikit-learn #1388 - Bench 1 for issue #1047
import numpy as np
import gc
from datetime import datetime
# to store the results
scikit_classifier_results = []
scikit_regressor_results = []
mu_second = 0.0 + 10 ** 6 # number of microseconds in a second
@arjoly
arjoly / bench_gradient_boosting_2.py
Created November 21, 2012 14:54
scikit-learn/scikit-learn #1388 - Bench 2 for gbt on hastie for issue #1047
import numpy as np
import gc
from datetime import datetime
from sklearn.datasets import make_hastie_10_2
# to store the results
scikit_classifier_results = []
scikit_regressor_results = []
mu_second = 0.0 + 10 ** 6 # number of microseconds in a second
@arjoly
arjoly / bench_gradient_boosting.py
Created November 27, 2012 14:00
scikit-learn/scikit-learn #1388 - Bench 3 for issue #1047
import numpy as np
import gc
from datetime import datetime
from sklearn.utils import check_random_state
import pprint as pp
# to store the results
scikit_classifier_results = []
scikit_regressor_results = []
@arjoly
arjoly / gbt_prof
Created November 29, 2012 09:30
scikit-learn/scikit-learn #1388 - Line profiling for gradient tree boosting
Timer unit: 1e-06 s
File: /home/ajoly/git/scikit-learn/sklearn/ensemble/gradient_boosting.py
Function: fit_stage at line 453
Total time: 226.054 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
453 @profile
454 def fit_stage(self, i, X, X_argsorted, y, y_pred, sample_mask):
@arjoly
arjoly / gist:4170766
Created November 29, 2012 17:55
scikit-learn/scikit-learn #???? error on test_common
======================================================================
ERROR: sklearn.tests.test_common.test_transformers_sparse_data
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/ajoly/opt/python/lib/python2.7/site-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/home/ajoly/git/scikit-learn/sklearn/tests/test_common.py", line 254, in test_transformers_sparse_data
raise exc
ValueError: eps=0.500000 and n_samples=40 lead to a target dimension of 177 which is larger than the original space with n_features=10
@arjoly
arjoly / gist:4193621
Created December 3, 2012 08:22
scikit-learn/scikit-learn #1388 - Line profiling for gradient tree boosting (friedman#2 n_samples=1000, n_estimators=2500, max_depth=3)
Timer unit: 1e-06 s
File: /home/ajoly/git/scikit-learn/sklearn/ensemble/gradient_boosting.py
Function: fit_stage at line 453
Total time: 52.8549 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
453 @profile
454 def fit_stage(self, i, X, X_argsorted, y, y_pred, sample_mask):
@arjoly
arjoly / gist:4194105
Created December 3, 2012 10:30
scikit-learn/scikit-learn #1438 random_dot features
def random_dot(A, n_components, density='auto', random_state=None,
dense_output=False, out=None):
"""Implicit dot product by a random sparse matrix
Calling this function is equivalent (up to a random seed shift) to::
safe_sparse_dot(A, sparse_random_matrix(n_features, n_components)
The difference is that random matrix is never fully allocated in
memory but instead generated on the fly using a hash function.
@arjoly
arjoly / gist:4225143
Created December 6, 2012 15:09
scikit-learn/scikit-learn #1438 becnhmarking report for bernouill_random_matrix
ajoly at ajoly-MacBook in ~/git/scikit-learn on random_projection!
(sklearn) [1] $ kernprof.py -l benchmarks/bench_random_projections.py --sparse --transformer Bernouilli --n-times 10 --n-features 100000
Dataset statics
===========================
n_samples = 1000
n_features = 100000
n_components = 5920 (auto)
n_elements = 100000000
n_nonzeros = 100 per feature
ratio_nonzeros = 0.001
@arjoly
arjoly / bench_bernouill_random_matrix
Created December 20, 2012 11:01
scikit-learn/scikit-learn #1438 benchmarking report for bernouill_random_matrix
Dataset statics
===========================
n_samples = 500
n_features = 10000
n_components = 298 (auto)
n_elements = 5000000
n_nonzeros = 10 per feature
ratio_nonzeros = 0.001
Benchmarks