Skip to content

Instantly share code, notes, and snippets.

@mrgloom
Created May 8, 2014 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrgloom/91f494e0fbe3ce595d2a to your computer and use it in GitHub Desktop.
Save mrgloom/91f494e0fbe3ce595d2a to your computer and use it in GitHub Desktop.
#test save and load of numpy matrix
#test matrix multiplication in memory and using memmap
#in case of memmap no need to use batch processing
#also can test hdf5 and pytables for matrix mult
#can use matrix mult for pca (more smart to use randompca)
#need to test it on x64 machine
import numpy as np
import time
import struct
rows=1000
cols=200
#need w+ acceess ? how to get type of variable
def resize_memmap(fm,sz,tp):
fm.flush()
print fm.filename
new_fm = np.memmap(fm.filename, mode='w+', dtype= tp, shape=sz)
return new_fm
def test_resize_inplace():
fA = np.memmap('A_r.npy', dtype='uint8', mode='w+', shape=(3,12))
#fA.resize(120,12) # don't work
print "fA"
print fA
fA[2][0] = 42
# fA.flush() # write change to disk?
# resize by creating new memmap
# new_fA = np.memmap('A_r.npy', mode='r+', dtype='uint8', shape=(20,12))
sz= (20,12)
tp= 'uint8'
resize_memmap(fA,sz,type)
print 'fA'
print fA
print 'new_fA'
print new_fA
def create_matrix(rows,cols):
data = (np.random.rand(rows,cols)*100).astype('uint8') #type for image [0 255] int8?
return data
def save_matrix(filename, data):
np.save(filename, data)
def load_matrix(filename):
data= np.load(filename)
return data
def load_npy_to_memmap(filename, dtype, shape):
# npy format is documented here
# https://github.com/numpy/numpy/blob/master/doc/neps/npy-format.txt
with open(filename, 'r') as f:
# skip magic string \x93NUMPY + 2 bytes major/minor version number
# + 2 bytes little-endian unsigned short int
junk, header_len = struct.unpack('<8sh', f.read(10))
data= np.memmap(filename, dtype=dtype, shape=shape, offset=6+2+2+header_len)
return data
def create_matrixs():
A= create_matrix(rows,cols)
save_matrix("A.npy", A)
A= load_matrix("A.npy")
# print A
print A.nbytes/1024/1024
B= create_matrix(cols,rows)
save_matrix("B.npy", B)
B= load_matrix("B.npy")
# print B
print B.nbytes/1024/1024
print A.shape
print B.shape
return A,B
def test_mult_ram():
A,B= create_matrixs()
t0= time.time()
C= np.dot(A,B)
print (time.time()-t0)
# print C.shape
def test_memmap():
#wrong way
# fA = np.memmap('A.npy', dtype='uint8', mode='r', shape=(rows,cols))
# fB = np.memmap('B.npy', dtype='uint8', mode='r', shape=(cols,rows))
#no need for custim func
# fA = load_npy_to_memmap('A.npy', dtype='uint8', shape=(rows, cols))
# fB = load_npy_to_memmap('B.npy', dtype='uint8', shape=(cols, rows))
# can just use np.lib.format.open_memmap without custom func
a = np.lib.format.open_memmap('A.npy', dtype='uint8', mode='r+')
b = np.lib.format.open_memmap('B.npy', dtype='uint8', mode='r+')
#need to predefine size of result? need write header? no need to save in .npy format?
fC = np.memmap('C.npy', dtype='uint16', mode='w+', shape=(rows,rows))
#fC = np.memmap('C.npy', dtype='uint16', mode='w+')
# print fA
# print fB
#where it stored if not gived filename?
#it even can be mixed: some matrix at ram and some mapped
#test if fC is not stored in memory
#case1 predefine fC then
#fC= np.dot(fA,fB)
#case2 predefine fC then
#fC[:]= np.dot(fA,fB)
#case3
# fC = np.memmap('/tmp/C.npy', dtype='float', mode='w+', shape=(Arows, Bcols))
# np.dot(fA,fB, out=fC)
# print fC.filename
# /tmp/C.npy
# print type(fC)
# <class 'numpy.core.memmap.memmap'>
t0= time.time()
fC= np.dot(fA,fB)
print (time.time()-t0)
print fC.filename
print type(fC)
t0= time.time()
c= np.dot(a,b)
print (time.time()-t0)
print c.filename
print type(c)
# print fC[12:10]
# print fC
# test_resize_inplace()
test_mult_ram()
test_memmap()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment