Created
April 18, 2014 14:20
-
-
Save mrgloom/11046631 to your computer and use it in GitHub Desktop.
numpy.memmap test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#test save and load of numpy matrix | |
#test matrix multiplication in memory and using memmap | |
#in case of memmap no need to use batch processing | |
#also can test hdf5 and pytables for matrix mult | |
#can use matrix mult for pca (more smart to use randompca) | |
#need to test it on x64 machine | |
import numpy as np | |
import time | |
import struct | |
rows=100000 | |
cols=1000 | |
def create_matrix(rows,cols): | |
data = (np.random.rand(rows,cols)*100).astype('uint8') #type for image [0 255] int8? | |
return data | |
def save_matrix(filename, data): | |
np.save(filename, data) | |
def load_matrix(filename): | |
data= np.load(filename) | |
return data | |
def load_npy_to_memmap(filename, dtype, shape): | |
# npy format is documented here | |
# https://github.com/numpy/numpy/blob/master/doc/neps/npy-format.txt | |
with open(filename, 'r') as f: | |
# skip magic string \x93NUMPY + 2 bytes major/minor version number | |
# + 2 bytes little-endian unsigned short int | |
junk, header_len = struct.unpack('<8sh', f.read(10)) | |
data= np.memmap(filename, dtype=dtype, shape=shape, offset=6+2+2+header_len) | |
return data | |
def test_mult_ram(): | |
A= create_matrix(rows,cols) | |
print 'a' | |
save_matrix("A.npy", A) | |
print 'aa' | |
A= load_matrix("A.npy") | |
# print A | |
print A.nbytes/1024/1024 | |
B= create_matrix(cols,rows) | |
save_matrix("B.npy", B) | |
B= load_matrix("B.npy") | |
# print B | |
print B.nbytes/1024/1024 | |
pause | |
# print A.shape | |
# print B.shape | |
t0= time.time() | |
C= np.dot(A,B) | |
print (time.time()-t0) | |
# print C.shape | |
def test_memmap(): | |
#seems it creates array? or it reads array in different way reading header as data? | |
# fA = np.memmap('A.npy', dtype='uint8', mode='r', shape=(rows,cols)) | |
# fB = np.memmap('B.npy', dtype='uint8', mode='r', shape=(cols,rows)) | |
fA = load_npy_to_memmap('A.npy', dtype='uint8', shape=(rows, cols)) | |
fB = load_npy_to_memmap('B.npy', dtype='uint8', shape=(cols, rows)) | |
#need to predefine size of result? need write header? no need to save in .npy format? | |
fC = np.memmap('C.npy', dtype='uint16', mode='w+', shape=(rows,rows)) | |
# print fA | |
# print fB | |
t0= time.time() | |
fC= np.dot(fA,fB) | |
print (time.time()-t0) | |
# print fC[12:10] | |
# print fC | |
test_mult_ram() | |
test_memmap() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment