Skip to content

Instantly share code, notes, and snippets.

@denis-bz
Created May 15, 2018 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save denis-bz/5a5ec41e495580d9b4773373e72fdaf6 to your computer and use it in GitHub Desktop.
Save denis-bz/5a5ec41e495580d9b4773373e72fdaf6 to your computer and use it in GitHub Desktop.
noisyUSV: what is the effect of noise on the covariance of n x d rank r ?
#!/usr/bin/env python2
""" A = noisyUSV( n, d, r, noise ): U S V + noise, n x d, rank r """
from __future__ import division
import numpy as np
from numpy.linalg import norm
from etc import znumpyutil as nu
__version__ = "2018-05-15 May denis-bz-py t-online de" # scale noise * S.max
#...............................................................................
def noisyUSV( n=1000, d=100, r=10, noise=0.1, diag=0,
distrib="normal", dtype=np.float64, seed=3, verbose=1 ):
""" A ... = noisyUSV( n= d= r= noise= diag= )
A = U S V + noise: n x d, rank r
U: n x r random normal
S: spectrum, diagonal 1 .. diag ) -- sensitive, scale noise * diag ?
or a vector of length r
default: 1 .. r, not 1 .. 1/r
V: r x d random orthonormal, U U' = I
noise: n x d scale=noise * S.max()
-> A, USV, U, S, V, descrip
after Ghashami, Liberty, Phillips and Woodruff, Frequent Directions 2015 p. 21-22
https://arxiv.org/pdf/1501.01711 .
"""
# SNR = var( D ) / var( noise ) ?
random = seed if isinstance(seed, np.random.RandomState) \
else np.random.RandomState( seed=seed )
U = random.normal( size=(n, r) ).astype(dtype)
if np.isscalar( diag ):
S = np.linspace( 1, diag or r, r, dtype=dtype )
else:
S = np.asarray( diag, dtype=dtype )
assert len(S) == r, [len(S), r]
S = np.sort( S )[::-1]
q, _ = np.linalg.qr( random.normal( size=(d, r) )) # random ortho cols
V = q.T
# I = np.eye( r, dtype=dtype )
# V = np.tile( I, d // r ) # ~ same
# V /= np.linalg.norm( V, axis=1 )[:,np.newaxis] # V V' = I
USV = (U * S) .dot( V )
gen = getattr( random, distrib ) # "normal" "laplace" ...
A = USV + gen( scale=noise * S.max(), size=(n, d) ).astype(dtype)
descrip = "noisyUSV: n %d d %d r %d diag %.3g +- %.2g seed %s noise %s %.3g " % (
n, d, r, S.mean(), S.std(), seed, distrib, noise )
if verbose:
print "\n", descrip
print " spectrum:", S
print " nonoise: %s %s" % (nu.norms( USV ), nu.quantiles( USV, q=7 ))
print " noisy: %s %s \n" % (nu.norms( A ), nu.quantiles( A, q=7 ))
return A, USV, U, S, V, descrip
#...............................................................................
if __name__ == "__main__": # run various dims ranks noises
import sys
np.set_printoptions( threshold=20, edgeitems=10, linewidth=140,
formatter = dict( float=nu.numstr ))
# formatter = dict( float = lambda x: "%.2g" % x )) # float arrays %.2g
print "\n", 80 * "="
#...........................................................................
n = 5000 # paper: n 10k d 1k
dims = [500]
ranks = [10]
noises = [.1, .2] # * S.max()
plot = 0
# sh run-noisyUSV: for ... > logs
# to change these params, run this.py a=1 b=None 'c = "..."' in sh or ipython
for arg in sys.argv[1:]:
exec( arg )
# Asmall, USV, U, S, V, descrip = noisyUSV( n=10, d=4, r=2 )
# print nu.ints( Asmall * 100 )
def sum_to( x, percents=[80, 90] ):
""" print x, cumsum to 100 %, nr terms to reach __ % """
sum = x.cumsum()
sum *= 100 /sum[-1]
print "values cumsum %%: %s ..." % nu.ints( sum[:20] )
for pc in percents:
j = np.searchsorted( sum, pc )
print "%d of %d values add up to %.0f %% " % (
j+1, len(x), sum[j] )
#...........................................................................
print "How many eigenvalues of A'A reach __ % of the total, A = noisyUSV() ?"
# see eig-noisy.py plot=1
for r in ranks:
for d in dims:
for noise in noises:
print "\n", 80 * "-"
print "-- noise %.0f %% --" % (noise * 100)
A, USV, U, S, V, descrip = noisyUSV( n=n, d=d, r=r, diag=r, noise=noise )
sing = np.linalg.svd( A, compute_uv=False )
eigAA = sing**2
sum_to( eigAA )
print ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment