Arnaud Joly arjoly

## gist:3665731
import psutil
import os
import memory_profiler

pid = os.getpid()
a = memory_profiler._get_memory(pid)

process = psutil.Process(pid)
b = float(process.get_memory_info()[0]) / (1024 ** 2)

## bench_gradient_boosting.py
import numpy as np
import gc
from datetime import datetime

# to store the results
scikit_classifier_results = []
scikit_regressor_results = []

mu_second = 0.0 + 10 ** 6  # number of microseconds in a second

## bench_gradient_boosting_2.py
import numpy as np
import gc
from datetime import datetime
from sklearn.datasets import make_hastie_10_2

# to store the results
scikit_classifier_results = []
scikit_regressor_results = []

mu_second = 0.0 + 10 ** 6  # number of microseconds in a second

## bench_gradient_boosting.py
import numpy as np
import gc
from datetime import datetime
from sklearn.utils import check_random_state
import pprint as pp

# to store the results
scikit_classifier_results = []
scikit_regressor_results = []

## gbt_prof
Timer unit: 1e-06 s

File: /home/ajoly/git/scikit-learn/sklearn/ensemble/gradient_boosting.py
Function: fit_stage at line 453
Total time: 226.054 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   453                                               @profile
   454                                               def fit_stage(self, i, X, X_argsorted, y, y_pred, sample_mask):

## gist:4170766
======================================================================
ERROR: sklearn.tests.test_common.test_transformers_sparse_data
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/ajoly/opt/python/lib/python2.7/site-packages/nose/case.py", line 197, in runTest
    self.test(*self.arg)
  File "/home/ajoly/git/scikit-learn/sklearn/tests/test_common.py", line 254, in test_transformers_sparse_data
    raise exc
ValueError: eps=0.500000 and n_samples=40 lead to a target dimension of 177 which is larger than the original space with n_features=10

## gist:4193621
Timer unit: 1e-06 s

File: /home/ajoly/git/scikit-learn/sklearn/ensemble/gradient_boosting.py
Function: fit_stage at line 453
Total time: 52.8549 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   453                                               @profile
   454                                               def fit_stage(self, i, X, X_argsorted, y, y_pred, sample_mask):

## gist:4194105
def random_dot(A, n_components, density='auto', random_state=None,
               dense_output=False, out=None):
    """Implicit dot product by a random sparse matrix

    Calling this function is equivalent (up to a random seed shift) to::

        safe_sparse_dot(A, sparse_random_matrix(n_features, n_components)

    The difference is that random matrix is never fully allocated in
    memory but instead generated on the fly using a hash function.

## gist:4225143
ajoly at ajoly-MacBook in ~/git/scikit-learn on random_projection!
(sklearn) [1] $ kernprof.py -l benchmarks/bench_random_projections.py --sparse --transformer Bernouilli  --n-times 10 --n-features 100000
Dataset statics
===========================
n_samples 	= 1000
n_features 	= 100000
n_components 	= 5920 (auto)
n_elements 	= 100000000
n_nonzeros 	= 100 per feature
ratio_nonzeros 	= 0.001

## bench_bernouill_random_matrix
Dataset statics
===========================
n_samples   = 500
n_features 	= 10000
n_components 	= 298 (auto)
n_elements 	= 5000000
n_nonzeros 	= 10 per feature
ratio_nonzeros 	= 0.001

Benchmarks
	import psutil
	import os
	import memory_profiler

	pid = os.getpid()
	a = memory_profiler._get_memory(pid)

	process = psutil.Process(pid)
	b = float(process.get_memory_info()[0]) / (1024 ** 2)
	import numpy as np
	import gc
	from datetime import datetime

	# to store the results
	scikit_classifier_results = []
	scikit_regressor_results = []

	mu_second = 0.0 + 10 ** 6 # number of microseconds in a second
	Timer unit: 1e-06 s

	File: /home/ajoly/git/scikit-learn/sklearn/ensemble/gradient_boosting.py
	Function: fit_stage at line 453
	Total time: 226.054 s

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	453 @profile
	454 def fit_stage(self, i, X, X_argsorted, y, y_pred, sample_mask):
	======================================================================
	ERROR: sklearn.tests.test_common.test_transformers_sparse_data
	----------------------------------------------------------------------
	Traceback (most recent call last):
	File "/home/ajoly/opt/python/lib/python2.7/site-packages/nose/case.py", line 197, in runTest
	self.test(*self.arg)
	File "/home/ajoly/git/scikit-learn/sklearn/tests/test_common.py", line 254, in test_transformers_sparse_data
	raise exc
	ValueError: eps=0.500000 and n_samples=40 lead to a target dimension of 177 which is larger than the original space with n_features=10
	def random_dot(A, n_components, density='auto', random_state=None,
	dense_output=False, out=None):
	"""Implicit dot product by a random sparse matrix

	Calling this function is equivalent (up to a random seed shift) to::

	safe_sparse_dot(A, sparse_random_matrix(n_features, n_components)

	The difference is that random matrix is never fully allocated in
	memory but instead generated on the fly using a hash function.
	ajoly at ajoly-MacBook in ~/git/scikit-learn on random_projection!
	(sklearn) [1] $ kernprof.py -l benchmarks/bench_random_projections.py --sparse --transformer Bernouilli --n-times 10 --n-features 100000
	Dataset statics
	===========================
	n_samples = 1000
	n_features = 100000
	n_components = 5920 (auto)
	n_elements = 100000000
	n_nonzeros = 100 per feature
	ratio_nonzeros = 0.001
	Dataset statics
	===========================
	n_samples = 500
	n_features = 10000
	n_components = 298 (auto)
	n_elements = 5000000
	n_nonzeros = 10 per feature
	ratio_nonzeros = 0.001

	Benchmarks