Skip to content

Instantly share code, notes, and snippets.

@Yuri-M-Dias
Created February 22, 2018 17:25
Show Gist options
  • Save Yuri-M-Dias/be44367d7fd02db80e84f6b8c3949c7c to your computer and use it in GitHub Desktop.
Save Yuri-M-Dias/be44367d7fd02db80e84f6b8c3949c7c to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
from scipy.io import wavfile
from glob import glob
import cv2
import pandas as pd
import numpy as np
from os.path import basename
import gc
import h5py
import os
import wave
import pylab
max_edge = 600
#data_img = 'Dataset_5s_img_1/*.png'
data_img = 'Dataset_5s_img/*.png'
csv_file = 'train.csv'
img_file = glob(data_img)
csv_reader = pd.read_table(csv_file, sep=',', index_col='id')
wavs_imgs = []
genders = []
#ids = []
scores = []
#
gc.collect()
#
n = 0
#
with h5py.File('Depression_dataset_600_5s_incremental.h5','w') as hf:
dsetZ = hf.create_dataset(
'z', (0,), dtype='uint8', data=genders, chunks=True, maxshape=(None,)
)
print('dsetZ: ', dsetZ.dtype, ' ', dsetZ.shape)
#
dsetX = hf.create_dataset(
'x', (0,max_edge,max_edge), dtype='uint8', data=wavs_imgs, chunks=True,
maxshape=(None,None,None)
)
print('dsetX: ', dsetX.dtype, ' ', dsetX.shape)
#
dsetY = hf.create_dataset(
'y', (0,), dtype='uint8', data=scores, chunks=True, maxshape=(None,)
)
print('dsetY: ', dsetY.dtype, ' ', dsetY.shape)
print(list(hf.keys()))
pass
print('Created empty H5 file')
# Magic number, but should be a code to get the smallest divisor, to work with primes
# Only works because I know the value beforehand...
chunks = 5
minimumPerChunk = len(img_file)/chunks
step = int(minimumPerChunk)
arrayStart = 0
arrayEnd = step
for i in range(0,chunks):
n = arrayStart
for x in img_file[arrayStart:arrayEnd]:
#
name = basename(x)
#print (name)
img_id, _ = name.split(sep='_')
#indice, _ = name.split(sep='.')
idx = int(img_id)
n = n + 1
#
img = cv2.imread(x, 0)
aspect_ratio = img.shape[0]*1./img.shape[1]
if np.argmax(img.shape) == 0:
c = int(max_edge*1./aspect_ratio)
new_shape = (c, max_edge)
pw = (max_edge - c)//2
pad_width = ((0,0), (pw, max_edge - c - pw))
else:
r = int(max_edge*aspect_ratio)
new_shape = (max_edge, r)
pw = (max_edge - r)//2
pad_width = ((pw, max_edge - r - pw), (0,0))
img = cv2.resize(img, new_shape, cv2.INTER_AREA)
img = np.lib.pad(img, pad_width, 'constant')
#
score = csv_reader.get_value(idx, 'score')
gender = csv_reader.get_value(idx, 'gender')
#
#wavs_imgs.append(wav_img)
wavs_imgs.append(img)
scores.append(score)
genders.append(gender)
#ids.append(indice)
print('OK: ', n)
print('Done: reshape for ', arrayStart,'-', arrayEnd)
with h5py.File('Depression_dataset_600_5s_incremental.h5','a') as hf:
dsetZ = hf['z']
dsetX = hf['x']
dsetY = hf['y']
print('dsetZ: ', dsetZ.dtype, ' ', dsetZ.shape)
print('dsetX: ', dsetX.dtype, ' ', dsetX.shape)
print('dsetY: ', dsetY.dtype, ' ', dsetY.shape)
#
dsetZ.resize(dsetZ.shape[0]+step, axis=0)
dsetZ[-step:] = genders
#
dsetY.resize(dsetY.shape[0]+step, axis=0)
dsetY[-step:] = scores
#
dsetX.resize(dsetX.shape[0]+step, axis=0)
dsetX[-step:] = wavs_imgs
#
print(dsetZ.shape, dsetZ.shape, dsetY.shape)
print(len(dsetZ.value), len(dsetZ.value), len(dsetY.value))
print('Done: write to h5')
# Free memory? No good way of doing this, apparently
del wavs_imgs, genders, scores
wavs_imgs = []
genders = []
#ids = []
scores = []
arrayStart += step
arrayEnd = arrayStart + step
gc.collect()
pass
print('End to create h5')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment