Skip to content

Instantly share code, notes, and snippets.

@helxsz
Last active March 12, 2016 05:06
Show Gist options
  • Save helxsz/110280fe6e0f227a4ba1 to your computer and use it in GitHub Desktop.
Save helxsz/110280fe6e0f227a4ba1 to your computer and use it in GitHub Desktop.
HDF5
# caffe recives data range 0 - 255, and [n, channels, height , width]
def compareImage(path, convert):
print "============== opencv BGR ==================="
cv2_img = cv2.imread(path)
print cv2_img[0]
print len(cv2_img),len(cv2_img[0]),len(cv2_img[0][0])
#convert
if(convert):
print "============== opencv convert ==============="
cv2_img = np.transpose( cv2_img , (2,0,1))
print cv2_img[0]
print len(cv2_img),len(cv2_img[0]),len(cv2_img[0][0])
print "============== pil RGB ================="
pil_im_obj = Image.open(path)
print pil_im_obj
#pil_im = np.array(pil_im_obj.getdata()).reshape(pil_im_obj.size[0],pil_im_obj.size[1], 3)
#print pil_im[0]
#print len(pil_im), len(pil_im[0]), len(pil_im[0][0])
data = np.asarray(pil_im_obj)
pil_im = Image.fromarray(np.roll(data,1,axis=-1))
print pil_im
#print len(pil_im), len(pil_im[0]), len(pil_im[0][0])
if(convert):
print "============== pil convert ==============="
pil_im = np.transpose( pil_im , (2,1,0))
print pil_im[0]
print len(pil_im), len(pil_im[0]), len(pil_im[0][0])
print "============== caffe RGB ================"
caffe_img = caffe.io.load_image( path )
#print caffe_img[0]
caffe_img = caffe_img*255
print caffe_img[0]
print len(caffe_img), len(caffe_img[0]), len(caffe_img[0][0])
if(convert):
print "============== opencv convert ==============="
caffe_img = np.transpose( caffe_img , (2,0,1))
print caffe_img[0]
print len(caffe_img),len(caffe_img[0]),len(caffe_img[0][0])
print "============== dimension ================"
print cv2_img.shape #height, width
print pil_im_obj.size #width, height
print caffe_img.shape #height, width
def generateHDF5FromText2(label_num):
print '\nplease wait...'
HDF5_FILE = ['hdf5_train.h5', 'hdf5_test1.h5']
#store the training and testing data path and labels
LIST_FILE = ['train.txt','test.txt']
for kk, list_file in enumerate(LIST_FILE):
#reading the training.txt or testing.txt to extract the all the image path and labels, store into the array
path_list = []
label_list = []
with open(list_file, buffering=1) as hosts_file:
for line in hosts_file:
line = line.rstrip()
array = line.split(' ')
lab = int(array[1])
label_list.append(lab)
path_list.append(array[0])
print len(path_list), len(label_list)
# init the temp data and labels storage for HDF5
datas = np.zeros((len(path_list),3,227,227),dtype='f4')
labels = np.zeros((len(path_list), 1),dtype="f4")
for ii, _file in enumerate(path_list):
# feed the image and label data to the TEMP data
img = caffe.io.load_image( _file )
img = caffe.io.resize( img, (227, 227, 3) ) # resize to fixed size
img = np.transpose( img , (2,0,1))
datas[ii] = img
labels[ii] = int(label_list[ii])
# store the temp data and label into the HDF5
with h5py.File("/data2/"+HDF5_FILE[kk], 'w') as f:
f['data'] = datas.astype(np.float32)
f['label'] = labels.astype(np.float32)
f.close()
def generateHDF5FromOpencv(label_num):
print '\nplease wait...'
HDF5_FILE = ['hdf5_'+str(label_num)+'_train1.h5',
'hdf5_'+str(label_num)+'_train2.h5',
'hdf5_'+str(label_num)+'_test1.h5']
LIST_FILE = ['train1.txt','train2.txt','test1.txt']
for kk, list_file in enumerate(LIST_FILE):
path_list = []
label_list = []
with open(script_dir+"/"+list_file, buffering=1) as hosts_file:
for line in hosts_file:
line = line.rstrip()
array = line.split(' ')
if(array[1].isdigit()):
lab = int(array[1])
label_list.append(lab)
path_list.append(array[0])
print len(path_list), len(label_list)
datas = np.zeros((len(path_list),3,227,227),dtype='f4')
labels = np.zeros((len(path_list), 1),dtype="f4")
for ii, _file in enumerate(path_list):
cv2_img = cv2.imread(_file)
cv2_img = cv2.resize(cv2_img,(227,227))
cv2_img = np.transpose( cv2_img , (2,0,1))
datas[ii] = cv2_img
print "=================================="
print datas[ii]
print "==================================="
labels[ii] = float(label_list[ii])
with h5py.File(script_dir+"/data2/"+HDF5_FILE[kk], 'w') as f:
f['data'] = datas.astype(np.float32)
f['label'] = labels.astype(np.float32)
f.close()
generateHDF5FromOpencv(2)
def generateHDF5(img_fns,jnt_fns,name,mean):
datas = np.zeros(( len(img_fns),3,227,227))
labels = np.zeros(( len(jnt_fns), 14))
for i, (img_fn, jnt_fn) in enumerate(zip(sorted(img_fns), sorted(jnt_fns))):
print len(jnt_fns), i, img_fn
cv2_img = cv.imread(img_fn)
cv2_img = np.transpose( cv2_img , (2,0,1))
datas[i] = cv2_img - mean
labels[i] = np.load(jnt_fn)
with h5py.File(script_dir+"/data/"+name+".h5", 'w') as f:
f['data'] = datas.astype(np.float32)
f['label'] = labels.astype(np.float32)
f.close()
print '\ndone...'
def create_dataset2():
print 'create_dataset222'
keys = np.arange(100000)
np.random.shuffle(keys)
img_fns = glob.glob('data/FLIC-full/crop/*.jpg')
img_fns += glob.glob('data/lspet_dataset/crop/*.jpg')
jnt_fns = glob.glob('data/FLIC-full/joint/*.npy')
jnt_fns += glob.glob('data/lspet_dataset/joint/*.npy')
print len(img_fns) , len(jnt_fns)
#
#generateHDF5(img_fns[1500:2999], jnt_fns[1500:2999], 'test1')
mean = createMean(img_fns)
generateHDF5(img_fns[0:1499] , jnt_fns[0:1499], 'test',mean)
'''''
print len(img_fns), len(img_fns)/1500
img_lst = []
jnt_lst = []
for ii in range( len(img_fns)/1500-1):
img_lst = img_fns[ii*1500 : (ii+1)*1500-1]
jnt_lst = jnt_fns[ii*1500 : (ii+1)*1500-1]
if( ii*1500 > len(img_fns)*0.8 ):
label = "test_"
else:
label = "train_"
generateHDF5(img_lst,jnt_lst, label+str(ii) )
print ii, str(ii*1500), str( (ii+1)*1500-1 )
'''''
def createMean(img_fns):
count = 0
sum_image = None
for line in img_fns:
image = cv.imread(line)
imge = image[:,:,[2,1,0]].transpose(2,0,1)
if sum_image is None:
sum_image = np.ndarray(image.shape,dtype=np.float32)
sum_image[:] = image
else:
sum_image +=image
count += 1
mean = sum_image / count
print mean
return mean
def generateHDF5FromText2(label_num):
print '\nplease wait...'
HDF5_FILE = ['hdf5_train.h5',
'hdf5_test.h5']
LIST_FILE = ['train.txt','test.txt']
for kk, list_file in enumerate(LIST_FILE):
path_list = []
label_list = []
with open(script_dir+"/"+list_file, buffering=1) as hosts_file:
for line in hosts_file:
line = line.rstrip()
array = line.split(' ')
lab = int(array[1])
label_list.append(lab)
path_list.append(array[0])
print len(path_list), len(label_list)
datas = np.zeros((len(path_list),3,227,227),dtype='f4')
labels = np.zeros((len(path_list), 1),dtype="f4")
datas = np.zeros((len(path_list),3,227,227))
labels = np.zeros((len(path_list), 1))
for ii, _file in enumerate(path_list):
# feed image to HDF5
im = Image.open(_file)
im = im.resize((227, 227))
img = np.array(im.getdata()).reshape(227,227, 3)
img = np.transpose( img , (2,0,1))
datas[ii, :, :, :] = np.array(img).astype(np.float32) / 255
# feed label to HDF5
label_array = label_list[ii]
labels[0, :] = np.array( label_array ).astype(np.int)
with h5py.File(script_dir+"/data2/"+HDF5_FILE[kk], 'w') as f:
f['data'] = datas.astype(np.float32)
f['label'] = labels.astype(np.float32)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment