Created
May 15, 2018 14:22
-
-
Save denis-bz/5a5ec41e495580d9b4773373e72fdaf6 to your computer and use it in GitHub Desktop.
noisyUSV: what is the effect of noise on the covariance of n x d rank r ?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
""" A = noisyUSV( n, d, r, noise ): U S V + noise, n x d, rank r """ | |
from __future__ import division | |
import numpy as np | |
from numpy.linalg import norm | |
from etc import znumpyutil as nu | |
__version__ = "2018-05-15 May denis-bz-py t-online de" # scale noise * S.max | |
#............................................................................... | |
def noisyUSV( n=1000, d=100, r=10, noise=0.1, diag=0, | |
distrib="normal", dtype=np.float64, seed=3, verbose=1 ): | |
""" A ... = noisyUSV( n= d= r= noise= diag= ) | |
A = U S V + noise: n x d, rank r | |
U: n x r random normal | |
S: spectrum, diagonal 1 .. diag ) -- sensitive, scale noise * diag ? | |
or a vector of length r | |
default: 1 .. r, not 1 .. 1/r | |
V: r x d random orthonormal, U U' = I | |
noise: n x d scale=noise * S.max() | |
-> A, USV, U, S, V, descrip | |
after Ghashami, Liberty, Phillips and Woodruff, Frequent Directions 2015 p. 21-22 | |
https://arxiv.org/pdf/1501.01711 . | |
""" | |
# SNR = var( D ) / var( noise ) ? | |
random = seed if isinstance(seed, np.random.RandomState) \ | |
else np.random.RandomState( seed=seed ) | |
U = random.normal( size=(n, r) ).astype(dtype) | |
if np.isscalar( diag ): | |
S = np.linspace( 1, diag or r, r, dtype=dtype ) | |
else: | |
S = np.asarray( diag, dtype=dtype ) | |
assert len(S) == r, [len(S), r] | |
S = np.sort( S )[::-1] | |
q, _ = np.linalg.qr( random.normal( size=(d, r) )) # random ortho cols | |
V = q.T | |
# I = np.eye( r, dtype=dtype ) | |
# V = np.tile( I, d // r ) # ~ same | |
# V /= np.linalg.norm( V, axis=1 )[:,np.newaxis] # V V' = I | |
USV = (U * S) .dot( V ) | |
gen = getattr( random, distrib ) # "normal" "laplace" ... | |
A = USV + gen( scale=noise * S.max(), size=(n, d) ).astype(dtype) | |
descrip = "noisyUSV: n %d d %d r %d diag %.3g +- %.2g seed %s noise %s %.3g " % ( | |
n, d, r, S.mean(), S.std(), seed, distrib, noise ) | |
if verbose: | |
print "\n", descrip | |
print " spectrum:", S | |
print " nonoise: %s %s" % (nu.norms( USV ), nu.quantiles( USV, q=7 )) | |
print " noisy: %s %s \n" % (nu.norms( A ), nu.quantiles( A, q=7 )) | |
return A, USV, U, S, V, descrip | |
#............................................................................... | |
if __name__ == "__main__": # run various dims ranks noises | |
import sys | |
np.set_printoptions( threshold=20, edgeitems=10, linewidth=140, | |
formatter = dict( float=nu.numstr )) | |
# formatter = dict( float = lambda x: "%.2g" % x )) # float arrays %.2g | |
print "\n", 80 * "=" | |
#........................................................................... | |
n = 5000 # paper: n 10k d 1k | |
dims = [500] | |
ranks = [10] | |
noises = [.1, .2] # * S.max() | |
plot = 0 | |
# sh run-noisyUSV: for ... > logs | |
# to change these params, run this.py a=1 b=None 'c = "..."' in sh or ipython | |
for arg in sys.argv[1:]: | |
exec( arg ) | |
# Asmall, USV, U, S, V, descrip = noisyUSV( n=10, d=4, r=2 ) | |
# print nu.ints( Asmall * 100 ) | |
def sum_to( x, percents=[80, 90] ): | |
""" print x, cumsum to 100 %, nr terms to reach __ % """ | |
sum = x.cumsum() | |
sum *= 100 /sum[-1] | |
print "values cumsum %%: %s ..." % nu.ints( sum[:20] ) | |
for pc in percents: | |
j = np.searchsorted( sum, pc ) | |
print "%d of %d values add up to %.0f %% " % ( | |
j+1, len(x), sum[j] ) | |
#........................................................................... | |
print "How many eigenvalues of A'A reach __ % of the total, A = noisyUSV() ?" | |
# see eig-noisy.py plot=1 | |
for r in ranks: | |
for d in dims: | |
for noise in noises: | |
print "\n", 80 * "-" | |
print "-- noise %.0f %% --" % (noise * 100) | |
A, USV, U, S, V, descrip = noisyUSV( n=n, d=d, r=r, diag=r, noise=noise ) | |
sing = np.linalg.svd( A, compute_uv=False ) | |
eigAA = sing**2 | |
sum_to( eigAA ) | |
print "" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment