Created
May 8, 2014 14:26
-
-
Save mrgloom/91f494e0fbe3ce595d2a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#test save and load of numpy matrix | |
#test matrix multiplication in memory and using memmap | |
#in case of memmap no need to use batch processing | |
#also can test hdf5 and pytables for matrix mult | |
#can use matrix mult for pca (more smart to use randompca) | |
#need to test it on x64 machine | |
import numpy as np | |
import time | |
import struct | |
rows=1000 | |
cols=200 | |
#need w+ acceess ? how to get type of variable | |
def resize_memmap(fm,sz,tp): | |
fm.flush() | |
print fm.filename | |
new_fm = np.memmap(fm.filename, mode='w+', dtype= tp, shape=sz) | |
return new_fm | |
def test_resize_inplace(): | |
fA = np.memmap('A_r.npy', dtype='uint8', mode='w+', shape=(3,12)) | |
#fA.resize(120,12) # don't work | |
print "fA" | |
print fA | |
fA[2][0] = 42 | |
# fA.flush() # write change to disk? | |
# resize by creating new memmap | |
# new_fA = np.memmap('A_r.npy', mode='r+', dtype='uint8', shape=(20,12)) | |
sz= (20,12) | |
tp= 'uint8' | |
resize_memmap(fA,sz,type) | |
print 'fA' | |
print fA | |
print 'new_fA' | |
print new_fA | |
def create_matrix(rows,cols): | |
data = (np.random.rand(rows,cols)*100).astype('uint8') #type for image [0 255] int8? | |
return data | |
def save_matrix(filename, data): | |
np.save(filename, data) | |
def load_matrix(filename): | |
data= np.load(filename) | |
return data | |
def load_npy_to_memmap(filename, dtype, shape): | |
# npy format is documented here | |
# https://github.com/numpy/numpy/blob/master/doc/neps/npy-format.txt | |
with open(filename, 'r') as f: | |
# skip magic string \x93NUMPY + 2 bytes major/minor version number | |
# + 2 bytes little-endian unsigned short int | |
junk, header_len = struct.unpack('<8sh', f.read(10)) | |
data= np.memmap(filename, dtype=dtype, shape=shape, offset=6+2+2+header_len) | |
return data | |
def create_matrixs(): | |
A= create_matrix(rows,cols) | |
save_matrix("A.npy", A) | |
A= load_matrix("A.npy") | |
# print A | |
print A.nbytes/1024/1024 | |
B= create_matrix(cols,rows) | |
save_matrix("B.npy", B) | |
B= load_matrix("B.npy") | |
# print B | |
print B.nbytes/1024/1024 | |
print A.shape | |
print B.shape | |
return A,B | |
def test_mult_ram(): | |
A,B= create_matrixs() | |
t0= time.time() | |
C= np.dot(A,B) | |
print (time.time()-t0) | |
# print C.shape | |
def test_memmap(): | |
#wrong way | |
# fA = np.memmap('A.npy', dtype='uint8', mode='r', shape=(rows,cols)) | |
# fB = np.memmap('B.npy', dtype='uint8', mode='r', shape=(cols,rows)) | |
#no need for custim func | |
# fA = load_npy_to_memmap('A.npy', dtype='uint8', shape=(rows, cols)) | |
# fB = load_npy_to_memmap('B.npy', dtype='uint8', shape=(cols, rows)) | |
# can just use np.lib.format.open_memmap without custom func | |
a = np.lib.format.open_memmap('A.npy', dtype='uint8', mode='r+') | |
b = np.lib.format.open_memmap('B.npy', dtype='uint8', mode='r+') | |
#need to predefine size of result? need write header? no need to save in .npy format? | |
fC = np.memmap('C.npy', dtype='uint16', mode='w+', shape=(rows,rows)) | |
#fC = np.memmap('C.npy', dtype='uint16', mode='w+') | |
# print fA | |
# print fB | |
#where it stored if not gived filename? | |
#it even can be mixed: some matrix at ram and some mapped | |
#test if fC is not stored in memory | |
#case1 predefine fC then | |
#fC= np.dot(fA,fB) | |
#case2 predefine fC then | |
#fC[:]= np.dot(fA,fB) | |
#case3 | |
# fC = np.memmap('/tmp/C.npy', dtype='float', mode='w+', shape=(Arows, Bcols)) | |
# np.dot(fA,fB, out=fC) | |
# print fC.filename | |
# /tmp/C.npy | |
# print type(fC) | |
# <class 'numpy.core.memmap.memmap'> | |
t0= time.time() | |
fC= np.dot(fA,fB) | |
print (time.time()-t0) | |
print fC.filename | |
print type(fC) | |
t0= time.time() | |
c= np.dot(a,b) | |
print (time.time()-t0) | |
print c.filename | |
print type(c) | |
# print fC[12:10] | |
# print fC | |
# test_resize_inplace() | |
test_mult_ram() | |
test_memmap() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment