Philipp Singer psinger

## huge_matrices_7.py
#let's get the third row
a = h5.root
b = csr_matrix((a.data[a.indptr[3]:a.indptr[3+1]], a.indices[a.indptr[3]:a.indptr[3+1]], np.array([0,len(a.indices[a.indptr[3]:a.indptr[3+1]])])), shape=(1,n))

## huge_matrices_6.py
h5 = tb.open_file('dot.h5', 'r')
a = csr_matrix((h5.root.data[:], h5.root.indices[:], h5.root.indptr[:]), shape=(l,n))

## huge_matrices_5.py
import numpy as np
f = tb.open_file('dot.h5', 'w')
filters = tb.Filters(complevel=5, complib='blosc')
out_data = f.create_earray(f.root, 'data', tb.Float32Atom(), shape=(0,), filters=filters)
out_indices = f.create_earray(f.root, 'indices', tb.Int32Atom(),shape=(0,), filters=filters)
out_indptr = f.create_earray(f.root, 'indptr', tb.Int32Atom(), shape=(0,), filters=filters)
out_indptr.append(np.array([0])) #this is needed as a first indptr
max_indptr = 0
for i in range(0, l, bl):
 res = a[i:min(i+bl, l),:].dot(b)

## gist:9827b22d7c46e7624613941bb8c07f00
h5 = tb.open_file('dot.h5', 'r')
a = csr_matrix((h5.root.data[:], (h5.root.ri[:], h5.root.ci[:])), shape=(l,n))

## huge_matrices_3.py
f = tb.open_file('dot.h5', 'w')
filters = tb.Filters(complevel=5, complib='blosc')
out_data = f.create_earray(f.root, 'data', tb.Float32Atom(), shape=(0,), filters=filters)
out_ri = f.create_earray(f.root, 'ri', tb.Float32Atom(),shape=(0,), filters=filters)
out_ci = f.create_earray(f.root, 'ci', tb.Float32Atom(), shape=(0,), filters=filters)
for i in range(0, l, bl):
  res = a.dot(b[:,i:min(i+bl, l)])
  vals = res.data
  ri, ci = res.nonzero()
  out_data.append(vals)

## huge_matrices_2.py
h5 = tb.open_file('dot.h5', 'r')
a = h5.root.data
row = a[0,:] #only one row gets loaded into memory
print row

## huge_matrices_1.py
from scipy.sparse import csr_matrix, rand
import tables as tb
a = rand(2000,2000, format='csr') #imagine that many values are stored in this matrix and that sparsity is low
b = a.T
l, m, n = a.shape[0], a.shape[1], b.shape[1]

f = tb.open_file('dot.h5', 'w')
filters = tb.Filters(complevel=5, complib='blosc')
out = f.create_carray(f.root, 'data', tb.Float32Atom(), shape=(l, n), filters=filters)


## pl_pymc_analyze.py
def analyze(data, discrete=True, xmin=1.):
    model = mc.MCMC(_model(data,discrete,xmin))
    model.sample(5000)

    print
    print(model.stats()['alpha']['mean'])

    mc.Matplot.plot(model)

## pl_pymc_model.py
import pymc as mc
from scipy.special import zeta

def _model(data, discrete=True, xmin=1.):
    alpha = mc.Exponential('alpha', 1. / 1.5)

    @mc.stochastic(observed=True)
    def custom_stochastic(value=data, alpha=alpha, xmin=xmin, discrete=discrete):
        value = value[value &gt;= xmin]
        if discrete == True:

## plot_dict.py
import matplotlib.pyplot as plt
import numpy as np
def plot_dict(d, xlabel, ylabel, filename):
  ''' Plotting the keys and values given in dictionary
  d = dictionary
  xlabel = label for x-axis
  ylabel = label for y-axis
  filename = output filename
  '''
  fig = plt.figure()
	#let's get the third row
	a = h5.root
	b = csr_matrix((a.data[a.indptr[3]:a.indptr[3+1]], a.indices[a.indptr[3]:a.indptr[3+1]], np.array([0,len(a.indices[a.indptr[3]:a.indptr[3+1]])])), shape=(1,n))
	h5 = tb.open_file('dot.h5', 'r')
	a = csr_matrix((h5.root.data[:], h5.root.indices[:], h5.root.indptr[:]), shape=(l,n))
	import numpy as np
	f = tb.open_file('dot.h5', 'w')
	filters = tb.Filters(complevel=5, complib='blosc')
	out_data = f.create_earray(f.root, 'data', tb.Float32Atom(), shape=(0,), filters=filters)
	out_indices = f.create_earray(f.root, 'indices', tb.Int32Atom(),shape=(0,), filters=filters)
	out_indptr = f.create_earray(f.root, 'indptr', tb.Int32Atom(), shape=(0,), filters=filters)
	out_indptr.append(np.array([0])) #this is needed as a first indptr
	max_indptr = 0
	for i in range(0, l, bl):
	res = a[i:min(i+bl, l),:].dot(b)
	h5 = tb.open_file('dot.h5', 'r')
	a = csr_matrix((h5.root.data[:], (h5.root.ri[:], h5.root.ci[:])), shape=(l,n))
	h5 = tb.open_file('dot.h5', 'r')
	a = h5.root.data
	row = a[0,:] #only one row gets loaded into memory
	print row
	from scipy.sparse import csr_matrix, rand
	import tables as tb
	a = rand(2000,2000, format='csr') #imagine that many values are stored in this matrix and that sparsity is low
	b = a.T
	l, m, n = a.shape[0], a.shape[1], b.shape[1]

	f = tb.open_file('dot.h5', 'w')
	filters = tb.Filters(complevel=5, complib='blosc')
	out = f.create_carray(f.root, 'data', tb.Float32Atom(), shape=(l, n), filters=filters)
	def analyze(data, discrete=True, xmin=1.):
	model = mc.MCMC(_model(data,discrete,xmin))
	model.sample(5000)

	print
	print(model.stats()['alpha']['mean'])

	mc.Matplot.plot(model)
	import pymc as mc
	from scipy.special import zeta

	def _model(data, discrete=True, xmin=1.):
	alpha = mc.Exponential('alpha', 1. / 1.5)

	@mc.stochastic(observed=True)
	def custom_stochastic(value=data, alpha=alpha, xmin=xmin, discrete=discrete):
	value = value[value >= xmin]
	if discrete == True:
	import matplotlib.pyplot as plt
	import numpy as np
	def plot_dict(d, xlabel, ylabel, filename):
	''' Plotting the keys and values given in dictionary
	d = dictionary
	xlabel = label for x-axis
	ylabel = label for y-axis
	filename = output filename
	'''
	fig = plt.figure()