Last active
October 20, 2023 10:03
-
-
Save aloisg/ac83160edf8a543b5ee6 to your computer and use it in GitHub Desktop.
generate_h5.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import h5py | |
from scipy import ndimage | |
import numpy as np | |
if len(sys.argv) < 2: | |
print "Usage: python gen_h5.py input_folder" | |
exit(1) | |
# input folder | |
fi = sys.argv[1] | |
# init var | |
classes = os.listdir(fi) | |
set_x = [] | |
set_y = [] | |
k = 0 # idx for classes | |
list_classes = [] | |
# Create sets | |
for cls in classes: | |
list_classes.append(cls) | |
imgs = os.listdir(fi + cls) | |
for img in imgs: | |
im = ndimage.imread(fi + cls + '/' + img) | |
set_x.append(im) | |
set_y.append(k) | |
k +=1 | |
# sets to numpy arrays | |
set_x = np.array(set_x) | |
set_y = np.array(set_y) | |
# shuffle sets | |
rp = np.random.permutation(set_x.shape[0]) | |
set_x = set_x[rp,:] | |
set_y = set_y[rp] | |
# divide sets, train or valid | |
valid_set_x = set_x[0:set_x.shape[0]/10,:] | |
valid_set_y = set_y[0:set_x.shape[0]/10] | |
train_set_x = set_x[set_x.shape[0]/10:,:] | |
train_set_y = set_y[set_x.shape[0]/10:] | |
# save h5 files | |
f = h5py.File('data.h5','w') | |
f.create_dataset('train_set_x', data=train_set_x) | |
f.create_dataset('train_set_y', data=train_set_y) | |
f.create_dataset('valid_set_x', data=valid_set_x) | |
f.create_dataset('valid_set_y', data=valid_set_y) | |
f.create_dataset('list_classes', data=list_classes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import h5py | |
datasets = h5py.File('data.h5', "r") | |
train_set_x = datasets["train_set_x"][:,:] | |
# put it in a shared variable if you have enough gpu ram | |
train_set_x = theano.shared(np.asarray(train_set_x, dtype=theano.config.floatX), borrow=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Instead of ndimage.imread, could we also do cv2.imread?