kemaleren/line_profiler_code.py

## line_profiler_code.py
from sklearn.datasets import fetch_20newsgroups_vectorized
from sklearn.datasets import make_checkerboard
from sklearn.datasets import make_biclusters
from sklearn.cluster.bicluster import SpectralBiclustering
from sklearn.cluster.bicluster import SpectralCoclustering

newsgroups = fetch_20newsgroups_vectorized()

# Spectral Co-Clustering

a, rows, columns = make_biclusters((40000, 300), 10, noise=2)
model = SpectralCoclustering(10, mini_batch=False)
lprun -f model._fit model.fit(a)

model = SpectralCoclustering(20, mini_batch=False)
lprun -f model._fit model.fit(newsgroups.data)

model = SpectralCoclustering(20, mini_batch=True)
lprun -f model._fit model.fit(newsgroups.data)

# Spectral Biclustering

a, rows, columns = make_checkerboard((40000, 300), 10, noise=2)
model = SpectralBiclustering(10)
lprun -f model._fit model.fit(a)

model = SpectralBiclustering(20, mini_batch=False)
lprun -f model._fit model.fit(newsgroups.data)

model = SpectralBiclustering(20, mini_batch=True)
lprun -f model._fit model.fit(newsgroups.data)

## line_profiler_dump.txt
* SpectralCoclustering dense test: 40000 x 300 matrix

File: sklearn/cluster/bicluster/spectral.py
Function: _fit at line 271
Total time: 2.62557 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   271                                               def _fit(self, X):
   272         1       124484 124484.0      4.7          normalized_data, row_diag, col_diag = _scale_normalize(X)
   273         1           48     48.0      0.0          n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))
   274         1      1737672 1737672.0     66.2          u, v = self._svd(normalized_data, n_sv, n_discard=1)
   275         1          830    830.0      0.0          z = np.vstack((row_diag[:, np.newaxis] * u,
   276         1          243    243.0      0.0                         col_diag[:, np.newaxis] * v))
   277
   278         1       761520 761520.0     29.0          _, labels = self._k_means(z, self.n_clusters)
   279
   280         1            4      4.0      0.0          n_rows = X.shape[0]
   281         1            4      4.0      0.0          self.row_labels_ = labels[:n_rows]
   282         1            2      2.0      0.0          self.column_labels_ = labels[n_rows:]
   283
   284         1            3      3.0      0.0          self.rows_ = np.vstack(self.row_labels_ == c
   285         1          615    615.0      0.0                                 for c in range(self.n_clusters))
   286         1            2      2.0      0.0          self.columns_ = np.vstack(self.column_labels_ == c
   287         1          143    143.0      0.0                                    for c in range(self.n_clusters))


* SpectralCoclustering sparse test: 20newsgroups_vectorized, with mini_batch=False

File: sklearn/cluster/bicluster/spectral.py
Function: _fit at line 271
Total time: 32.9558 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   271                                               def _fit(self, X):
   272         1       138058 138058.0      0.4          normalized_data, row_diag, col_diag = _scale_normalize(X)
   273         1           17     17.0      0.0          n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))
   274         1       380753 380753.0      1.2          u, v = self._svd(normalized_data, n_sv, n_discard=1)
   275         1          259    259.0      0.0          z = np.vstack((row_diag[:, np.newaxis] * u,
   276         1         3901   3901.0      0.0                         col_diag[:, np.newaxis] * v))
   277
   278         1     32428991 32428991.0     98.4          _, labels = self._k_means(z, self.n_clusters)
   279
   280         1            6      6.0      0.0          n_rows = X.shape[0]
   281         1            4      4.0      0.0          self.row_labels_ = labels[:n_rows]
   282         1            3      3.0      0.0          self.column_labels_ = labels[n_rows:]
   283
   284         1            3      3.0      0.0          self.rows_ = np.vstack(self.row_labels_ == c
   285         1          590    590.0      0.0                                 for c in range(self.n_clusters))
   286         1            2      2.0      0.0          self.columns_ = np.vstack(self.column_labels_ == c
   287         1         3263   3263.0      0.0                                    for c in range(self.n_clusters))


* SpectralCoclustering sparse test: 20newsgroups_vectorized, with mini_batch=True

File: sklearn/cluster/bicluster/spectral.py
Function: _fit at line 271
Total time: 1.92757 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   271                                               def _fit(self, X):
   272         1       140340 140340.0      7.3          normalized_data, row_diag, col_diag = _scale_normalize(X)
   273         1           17     17.0      0.0          n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))
   274         1       401109 401109.0     20.8          u, v = self._svd(normalized_data, n_sv, n_discard=1)
   275         1          262    262.0      0.0          z = np.vstack((row_diag[:, np.newaxis] * u,
   276         1         3825   3825.0      0.2                         col_diag[:, np.newaxis] * v))
   277
   278         1      1377933 1377933.0     71.5          _, labels = self._k_means(z, self.n_clusters)
   279
   280         1            9      9.0      0.0          n_rows = X.shape[0]
   281         1            5      5.0      0.0          self.row_labels_ = labels[:n_rows]
   282         1            2      2.0      0.0          self.column_labels_ = labels[n_rows:]
   283
   284         1            4      4.0      0.0          self.rows_ = np.vstack(self.row_labels_ == c
   285         1          562    562.0      0.0                                 for c in range(self.n_clusters))
   286         1            2      2.0      0.0          self.columns_ = np.vstack(self.column_labels_ == c
   287         1         3501   3501.0      0.2                                    for c in range(self.n_clusters))


* SpectralBiclustering dense test: 40000 x 300 checkerboard matrix

File: sklearn/cluster/bicluster/spectral.py
Function: _fit at line 411
Total time: 9.88391 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   411                                               def _fit(self, X):
   412         1            4      4.0      0.0          n_sv = self.n_components
   413         1            3      3.0      0.0          if self.method == 'bistochastic':
   414         1      3718143 3718143.0     37.6              normalized_data = _bistochastic_normalize(X)
   415         1            3      3.0      0.0              n_sv += 1
   416                                                   elif self.method == 'scale':
   417                                                       normalized_data, _, _ = _scale_normalize(X)
   418                                                       n_sv += 1
   419                                                   elif self.method == 'log':
   420                                                       normalized_data = _log_normalize(X)
   421         1            3      3.0      0.0          n_discard = 0 if self.method == 'log' else 1
   422         1      2105260 2105260.0     21.3          u, v = self._svd(normalized_data, n_sv, n_discard)
   423         1            3      3.0      0.0          ut = u.T
   424         1            2      2.0      0.0          vt = v.T
   425
   426         1            2      2.0      0.0          try:
   427         1           12     12.0      0.0              n_row_clusters, n_col_clusters = self.n_clusters
   428         1            3      3.0      0.0          except TypeError:
   429         1            2      2.0      0.0              n_row_clusters = n_col_clusters = self.n_clusters
   430
   431         1            2      2.0      0.0          best_ut = self._fit_best_piecewise(ut, self.n_best,
   432         1      3033660 3033660.0     30.7                                             n_row_clusters)
   433
   434         1            5      5.0      0.0          best_vt = self._fit_best_piecewise(vt, self.n_best,
   435         1       296716 296716.0      3.0                                             n_col_clusters)
   436
   437         1            3      3.0      0.0          self.row_labels_ = self._project_and_cluster(X, best_vt.T,
   438         1       650654 650654.0      6.6                                                       n_row_clusters)
   439
   440         1            9      9.0      0.0          self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,
   441         1        71899  71899.0      0.7                                                          n_col_clusters)


* SpectralBiclustering sparse test: 20newsgroups_vectorized, with mini_batch=False

File: sklearn/cluster/bicluster/spectral.py
Function: _fit at line 411
Total time: 130.362 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   411                                               def _fit(self, X):
   412         1            5      5.0      0.0          n_sv = self.n_components
   413         1            3      3.0      0.0          if self.method == 'bistochastic':
   414         1       168287 168287.0      0.1              normalized_data = _bistochastic_normalize(X)
   415         1            3      3.0      0.0              n_sv += 1
   416                                                   elif self.method == 'scale':
   417                                                       normalized_data, _, _ = _scale_normalize(X)
   418                                                       n_sv += 1
   419                                                   elif self.method == 'log':
   420                                                       normalized_data = _log_normalize(X)
   421         1            3      3.0      0.0          n_discard = 0 if self.method == 'log' else 1
   422         1       421802 421802.0      0.3          u, v = self._svd(normalized_data, n_sv, n_discard)
   423         1            3      3.0      0.0          ut = u.T
   424         1            2      2.0      0.0          vt = v.T
   425
   426         1            2      2.0      0.0          try:
   427         1           13     13.0      0.0              n_row_clusters, n_col_clusters = self.n_clusters
   428         1            5      5.0      0.0          except TypeError:
   429         1            1      1.0      0.0              n_row_clusters = n_col_clusters = self.n_clusters
   430
   431         1            2      2.0      0.0          best_ut = self._fit_best_piecewise(ut, self.n_best,
   432         1      5315974 5315974.0      4.1                                             n_row_clusters)
   433
   434         1            4      4.0      0.0          best_vt = self._fit_best_piecewise(vt, self.n_best,
   435         1    104340257 104340257.0     80.0                                             n_col_clusters)
   436
   437         1           10     10.0      0.0          self.row_labels_ = self._project_and_cluster(X, best_vt.T,
   438         1      3851173 3851173.0      3.0                                                       n_row_clusters)
   439
   440         1          149    149.0      0.0          self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,
   441         1     16182799 16182799.0     12.4                                                          n_col_clusters)

* SpectralBiclustering sparse test: 20newsgroups_vectorized, with mini_batch=True

File: sklearn/cluster/bicluster/spectral.py
Function: _fit at line 411
Total time: 3.26277 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   411                                               def _fit(self, X):
   412         1            3      3.0      0.0          n_sv = self.n_components
   413         1            2      2.0      0.0          if self.method == 'bistochastic':
   414         1       151776 151776.0      4.7              normalized_data = _bistochastic_normalize(X)
   415         1            3      3.0      0.0              n_sv += 1
   416                                                   elif self.method == 'scale':
   417                                                       normalized_data, _, _ = _scale_normalize(X)
   418                                                       n_sv += 1
   419                                                   elif self.method == 'log':
   420                                                       normalized_data = _log_normalize(X)
   421         1            3      3.0      0.0          n_discard = 0 if self.method == 'log' else 1
   422         1       402177 402177.0     12.3          u, v = self._svd(normalized_data, n_sv, n_discard)
   423         1            3      3.0      0.0          ut = u.T
   424         1            2      2.0      0.0          vt = v.T
   425
   426         1            1      1.0      0.0          try:
   427         1           13     13.0      0.0              n_row_clusters, n_col_clusters = self.n_clusters
   428         1            4      4.0      0.0          except TypeError:
   429         1            1      1.0      0.0              n_row_clusters = n_col_clusters = self.n_clusters
   430
   431         1            3      3.0      0.0          best_ut = self._fit_best_piecewise(ut, self.n_best,
   432         1       760194 760194.0     23.3                                             n_row_clusters)
   433
   434         1            4      4.0      0.0          best_vt = self._fit_best_piecewise(vt, self.n_best,
   435         1      1570605 1570605.0     48.1                                             n_col_clusters)
   436
   437         1           10     10.0      0.0          self.row_labels_ = self._project_and_cluster(X, best_vt.T,
   438         1       120669 120669.0      3.7                                                       n_row_clusters)
   439
   440         1          159    159.0      0.0          self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,
   441         1       174007 174007.0      5.3                                                          n_col_clusters)
	from sklearn.datasets import fetch_20newsgroups_vectorized
	from sklearn.datasets import make_checkerboard
	from sklearn.datasets import make_biclusters
	from sklearn.cluster.bicluster import SpectralBiclustering
	from sklearn.cluster.bicluster import SpectralCoclustering

	newsgroups = fetch_20newsgroups_vectorized()

	# Spectral Co-Clustering

	a, rows, columns = make_biclusters((40000, 300), 10, noise=2)
	model = SpectralCoclustering(10, mini_batch=False)
	lprun -f model._fit model.fit(a)

	model = SpectralCoclustering(20, mini_batch=False)
	lprun -f model._fit model.fit(newsgroups.data)

	model = SpectralCoclustering(20, mini_batch=True)
	lprun -f model._fit model.fit(newsgroups.data)

	# Spectral Biclustering

	a, rows, columns = make_checkerboard((40000, 300), 10, noise=2)
	model = SpectralBiclustering(10)
	lprun -f model._fit model.fit(a)

	model = SpectralBiclustering(20, mini_batch=False)
	lprun -f model._fit model.fit(newsgroups.data)

	model = SpectralBiclustering(20, mini_batch=True)
	lprun -f model._fit model.fit(newsgroups.data)
	* SpectralCoclustering dense test: 40000 x 300 matrix

	File: sklearn/cluster/bicluster/spectral.py
	Function: _fit at line 271
	Total time: 2.62557 s

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	271 def _fit(self, X):
	272 1 124484 124484.0 4.7 normalized_data, row_diag, col_diag = _scale_normalize(X)
	273 1 48 48.0 0.0 n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))
	274 1 1737672 1737672.0 66.2 u, v = self._svd(normalized_data, n_sv, n_discard=1)
	275 1 830 830.0 0.0 z = np.vstack((row_diag[:, np.newaxis] * u,
	276 1 243 243.0 0.0 col_diag[:, np.newaxis] * v))
	277
	278 1 761520 761520.0 29.0 _, labels = self._k_means(z, self.n_clusters)
	279
	280 1 4 4.0 0.0 n_rows = X.shape[0]
	281 1 4 4.0 0.0 self.row_labels_ = labels[:n_rows]
	282 1 2 2.0 0.0 self.column_labels_ = labels[n_rows:]
	283
	284 1 3 3.0 0.0 self.rows_ = np.vstack(self.row_labels_ == c
	285 1 615 615.0 0.0 for c in range(self.n_clusters))
	286 1 2 2.0 0.0 self.columns_ = np.vstack(self.column_labels_ == c
	287 1 143 143.0 0.0 for c in range(self.n_clusters))


	* SpectralCoclustering sparse test: 20newsgroups_vectorized, with mini_batch=False

	File: sklearn/cluster/bicluster/spectral.py
	Function: _fit at line 271
	Total time: 32.9558 s

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	271 def _fit(self, X):
	272 1 138058 138058.0 0.4 normalized_data, row_diag, col_diag = _scale_normalize(X)
	273 1 17 17.0 0.0 n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))
	274 1 380753 380753.0 1.2 u, v = self._svd(normalized_data, n_sv, n_discard=1)
	275 1 259 259.0 0.0 z = np.vstack((row_diag[:, np.newaxis] * u,
	276 1 3901 3901.0 0.0 col_diag[:, np.newaxis] * v))
	277
	278 1 32428991 32428991.0 98.4 _, labels = self._k_means(z, self.n_clusters)
	279
	280 1 6 6.0 0.0 n_rows = X.shape[0]
	281 1 4 4.0 0.0 self.row_labels_ = labels[:n_rows]
	282 1 3 3.0 0.0 self.column_labels_ = labels[n_rows:]
	283
	284 1 3 3.0 0.0 self.rows_ = np.vstack(self.row_labels_ == c
	285 1 590 590.0 0.0 for c in range(self.n_clusters))
	286 1 2 2.0 0.0 self.columns_ = np.vstack(self.column_labels_ == c
	287 1 3263 3263.0 0.0 for c in range(self.n_clusters))


	* SpectralCoclustering sparse test: 20newsgroups_vectorized, with mini_batch=True

	File: sklearn/cluster/bicluster/spectral.py
	Function: _fit at line 271
	Total time: 1.92757 s

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	271 def _fit(self, X):
	272 1 140340 140340.0 7.3 normalized_data, row_diag, col_diag = _scale_normalize(X)
	273 1 17 17.0 0.0 n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))
	274 1 401109 401109.0 20.8 u, v = self._svd(normalized_data, n_sv, n_discard=1)
	275 1 262 262.0 0.0 z = np.vstack((row_diag[:, np.newaxis] * u,
	276 1 3825 3825.0 0.2 col_diag[:, np.newaxis] * v))
	277
	278 1 1377933 1377933.0 71.5 _, labels = self._k_means(z, self.n_clusters)
	279
	280 1 9 9.0 0.0 n_rows = X.shape[0]
	281 1 5 5.0 0.0 self.row_labels_ = labels[:n_rows]
	282 1 2 2.0 0.0 self.column_labels_ = labels[n_rows:]
	283
	284 1 4 4.0 0.0 self.rows_ = np.vstack(self.row_labels_ == c
	285 1 562 562.0 0.0 for c in range(self.n_clusters))
	286 1 2 2.0 0.0 self.columns_ = np.vstack(self.column_labels_ == c
	287 1 3501 3501.0 0.2 for c in range(self.n_clusters))


	* SpectralBiclustering dense test: 40000 x 300 checkerboard matrix

	File: sklearn/cluster/bicluster/spectral.py
	Function: _fit at line 411
	Total time: 9.88391 s

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	411 def _fit(self, X):
	412 1 4 4.0 0.0 n_sv = self.n_components
	413 1 3 3.0 0.0 if self.method == 'bistochastic':
	414 1 3718143 3718143.0 37.6 normalized_data = _bistochastic_normalize(X)
	415 1 3 3.0 0.0 n_sv += 1
	416 elif self.method == 'scale':
	417 normalized_data, _, _ = _scale_normalize(X)
	418 n_sv += 1
	419 elif self.method == 'log':
	420 normalized_data = _log_normalize(X)
	421 1 3 3.0 0.0 n_discard = 0 if self.method == 'log' else 1
	422 1 2105260 2105260.0 21.3 u, v = self._svd(normalized_data, n_sv, n_discard)
	423 1 3 3.0 0.0 ut = u.T
	424 1 2 2.0 0.0 vt = v.T
	425
	426 1 2 2.0 0.0 try:
	427 1 12 12.0 0.0 n_row_clusters, n_col_clusters = self.n_clusters
	428 1 3 3.0 0.0 except TypeError:
	429 1 2 2.0 0.0 n_row_clusters = n_col_clusters = self.n_clusters
	430
	431 1 2 2.0 0.0 best_ut = self._fit_best_piecewise(ut, self.n_best,
	432 1 3033660 3033660.0 30.7 n_row_clusters)
	433
	434 1 5 5.0 0.0 best_vt = self._fit_best_piecewise(vt, self.n_best,
	435 1 296716 296716.0 3.0 n_col_clusters)
	436
	437 1 3 3.0 0.0 self.row_labels_ = self._project_and_cluster(X, best_vt.T,
	438 1 650654 650654.0 6.6 n_row_clusters)
	439
	440 1 9 9.0 0.0 self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,
	441 1 71899 71899.0 0.7 n_col_clusters)


	* SpectralBiclustering sparse test: 20newsgroups_vectorized, with mini_batch=False

	File: sklearn/cluster/bicluster/spectral.py
	Function: _fit at line 411
	Total time: 130.362 s

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	411 def _fit(self, X):
	412 1 5 5.0 0.0 n_sv = self.n_components
	413 1 3 3.0 0.0 if self.method == 'bistochastic':
	414 1 168287 168287.0 0.1 normalized_data = _bistochastic_normalize(X)
	415 1 3 3.0 0.0 n_sv += 1
	416 elif self.method == 'scale':
	417 normalized_data, _, _ = _scale_normalize(X)
	418 n_sv += 1
	419 elif self.method == 'log':
	420 normalized_data = _log_normalize(X)
	421 1 3 3.0 0.0 n_discard = 0 if self.method == 'log' else 1
	422 1 421802 421802.0 0.3 u, v = self._svd(normalized_data, n_sv, n_discard)
	423 1 3 3.0 0.0 ut = u.T
	424 1 2 2.0 0.0 vt = v.T
	425
	426 1 2 2.0 0.0 try:
	427 1 13 13.0 0.0 n_row_clusters, n_col_clusters = self.n_clusters
	428 1 5 5.0 0.0 except TypeError:
	429 1 1 1.0 0.0 n_row_clusters = n_col_clusters = self.n_clusters
	430
	431 1 2 2.0 0.0 best_ut = self._fit_best_piecewise(ut, self.n_best,
	432 1 5315974 5315974.0 4.1 n_row_clusters)
	433
	434 1 4 4.0 0.0 best_vt = self._fit_best_piecewise(vt, self.n_best,
	435 1 104340257 104340257.0 80.0 n_col_clusters)
	436
	437 1 10 10.0 0.0 self.row_labels_ = self._project_and_cluster(X, best_vt.T,
	438 1 3851173 3851173.0 3.0 n_row_clusters)
	439
	440 1 149 149.0 0.0 self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,
	441 1 16182799 16182799.0 12.4 n_col_clusters)

	* SpectralBiclustering sparse test: 20newsgroups_vectorized, with mini_batch=True

	File: sklearn/cluster/bicluster/spectral.py
	Function: _fit at line 411
	Total time: 3.26277 s

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	411 def _fit(self, X):
	412 1 3 3.0 0.0 n_sv = self.n_components
	413 1 2 2.0 0.0 if self.method == 'bistochastic':
	414 1 151776 151776.0 4.7 normalized_data = _bistochastic_normalize(X)
	415 1 3 3.0 0.0 n_sv += 1
	416 elif self.method == 'scale':
	417 normalized_data, _, _ = _scale_normalize(X)
	418 n_sv += 1
	419 elif self.method == 'log':
	420 normalized_data = _log_normalize(X)
	421 1 3 3.0 0.0 n_discard = 0 if self.method == 'log' else 1
	422 1 402177 402177.0 12.3 u, v = self._svd(normalized_data, n_sv, n_discard)
	423 1 3 3.0 0.0 ut = u.T
	424 1 2 2.0 0.0 vt = v.T
	425
	426 1 1 1.0 0.0 try:
	427 1 13 13.0 0.0 n_row_clusters, n_col_clusters = self.n_clusters
	428 1 4 4.0 0.0 except TypeError:
	429 1 1 1.0 0.0 n_row_clusters = n_col_clusters = self.n_clusters
	430
	431 1 3 3.0 0.0 best_ut = self._fit_best_piecewise(ut, self.n_best,
	432 1 760194 760194.0 23.3 n_row_clusters)
	433
	434 1 4 4.0 0.0 best_vt = self._fit_best_piecewise(vt, self.n_best,
	435 1 1570605 1570605.0 48.1 n_col_clusters)
	436
	437 1 10 10.0 0.0 self.row_labels_ = self._project_and_cluster(X, best_vt.T,
	438 1 120669 120669.0 3.7 n_row_clusters)
	439
	440 1 159 159.0 0.0 self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,
	441 1 174007 174007.0 5.3 n_col_clusters)