Skip to content

Instantly share code, notes, and snippets.

@ivirshup
Created May 28, 2018 05:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivirshup/a6facfa1ace5b356ea2d18ff3ffe0cb9 to your computer and use it in GitHub Desktop.
Save ivirshup/a6facfa1ace5b356ea2d18ff3ffe0cb9 to your computer and use it in GitHub Desktop.
Profiling memory usage of `scanpy`'s `_get_mean_var`.
import scanpy
import scanpy.api as sc
import numpy as np
from scipy import random, sparse
from time import sleep
import sklearn.utils.sparsefuncs as sparsefuncs
@profile
def current_dense(X):
return scanpy.preprocessing.simple._get_mean_var(X)
@profile
def current_sparse(X):
return scanpy.preprocessing.simple._get_mean_var(X)
@profile
def lessalloc_dense(X):
mean = X.mean(axis=0)
mean_sq = np.apply_along_axis(lambda x: np.square(x).mean(), 0, X)
var = (mean_sq - mean**2) * ((X.shape[0]/(X.shape[0]-1)))
return mean, var
@profile
def unbiased_estimator(X):
mean, var = sparsefuncs.mean_variance_axis(X, 0)
# enforce R convention (unbiased estimator) for variance
var *= (X.shape[0]/(X.shape[0]-1))
return mean, var
def main():
a = random.negative_binomial(10, .95, (10000, 10000))
a_sparse = sparse.csr_matrix(a)
sleep(1) # To space out usage.
m1, v1 = current_dense(a)
sleep(0.1)
ms1, vs1 = current_sparse(a_sparse)
sleep(1)
m2, v2 = lessalloc_dense(a)
sleep(0.1)
ms2, vs2 = unbiased_estimator(a_sparse)
assert np.allclose(m1, m2)
assert np.allclose(v1, v2)
assert np.allclose(ms1, ms2)
assert np.allclose(vs1, vs2)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment