sotelo/pavoque.py

## pavoque.py

from run_merlin import prepare_file_path_list, read_file_list
from io_funcs.binary_io import BinaryIOCollection
import numpy
import h5py
import pickle
from fuel.datasets.hdf5 import H5PYDataset

io_fun = BinaryIOCollection()
n_outs = 63  # 187
save_dir = '/Tmp/sotelo/data/pavoque/'

base_dir = '/Tmp/sotelo/results/merlin/egs/build_your_own_voice/s1/'

merlin_data_dir = base_dir + \
    'experiments/pavoque/acoustic_model/data/'

file_list = read_file_list(merlin_data_dir + 'file_id_list.scp')


emotion_set = [x.split('-')[1] for x in file_list]
emotion_set = sorted(list(set(emotion_set)))
emotion_dict = {x: i + 1 for i, x in enumerate(emotion_set)}

# File before merlin
raw_dir = base_dir + 'raw_data/pavoque/'

text_file = raw_dir + 'utts.data'
with open(text_file) as f:
    text_data = f.readlines()

id_from_text = [x.split()[1] for x in text_data]
error_files = [
    (i, x) for i, x in enumerate(id_from_text) if x not in file_list]

assert id_from_text == file_list

text_data = ['"'.join(x.strip().split('"')[1:-1]) for x in text_data]

char_set = sorted(list(set(''.join(text_data).lower())))
char2code = {x: i + 1 for i, x in enumerate(char_set)}

with open(save_dir + 'char2code.pkl', 'w') as f:
    pickle.dump(char2code, f)

audio_files = prepare_file_path_list(
    file_list, merlin_data_dir + 'nn_norm_mgc_lf0_vuv_bap_63', '.cmp')

resulth5 = h5py.File(
    '/Tmp/sotelo/data/pavoque/pavoque.hdf5', mode='w')
num_files = len(file_list)

features_h5 = resulth5.create_dataset(
    'features', (num_files,),
    dtype=h5py.special_dtype(vlen=numpy.dtype('float32')))

features_shape_h5 = resulth5.create_dataset(
    'features_shapes', (num_files, 2), dtype='int32')

features_h5.dims.create_scale(features_shape_h5, 'shapes')
features_h5.dims[0].attach_scale(features_shape_h5)

features_shape_labels = resulth5.create_dataset(
    'features_shape_labels', (2,), dtype='S7')
features_shape_labels[...] = [
    'time_step'.encode('utf8'),
    'num_feature'.encode('utf8')]
features_h5.dims.create_scale(
    features_shape_labels, 'shape_labels')
features_h5.dims[0].attach_scale(features_shape_labels)

text_h5 = resulth5.create_dataset(
    'text', (num_files,),
    dtype=h5py.special_dtype(vlen=numpy.dtype('int32')))

speaker_index_h5 = resulth5.create_dataset(
    'speaker_index', (num_files, 1), dtype='uint8')

order = range(num_files)
numpy.random.seed(1)
numpy.random.shuffle(order)

for i, idx in enumerate(order):
    if i % 100 == 0:
        print i
    out_features, out_frame_number = io_fun.load_binary_file_frame(
        audio_files[idx], n_outs)

    features_h5[i] = out_features.flatten()
    features_shape_h5[i] = numpy.array(out_features.shape)

    speaker_label = id_from_text[idx].split('-')[1]
    speaker_index_h5[i] = emotion_dict[speaker_label]

    text_h5[i] = numpy.array(
        [char2code[x.lower()] for x in text_data[idx]], dtype='int32')

end_train = int(.9 * num_files)
end_valid = int(.95 * num_files)
end_test = num_files

split_dict = {
    'train': {'features': (0, end_train),
              'text': (0, end_train),
              'speaker_index': (0, end_train)},
    'valid': {'features': (end_train, end_valid),
              'text': (end_train, end_valid),
              'speaker_index': (end_train, end_valid)},
    'test': {'features': (end_valid, end_test),
             'text': (end_valid, end_test),
             'speaker_index': (end_valid, end_test)}}

resulth5.attrs['split'] = H5PYDataset.create_split_array(split_dict)

resulth5.flush()
resulth5.close()

	from run_merlin import prepare_file_path_list, read_file_list
	from io_funcs.binary_io import BinaryIOCollection
	import numpy
	import h5py
	import pickle
	from fuel.datasets.hdf5 import H5PYDataset

	io_fun = BinaryIOCollection()
	n_outs = 63 # 187
	save_dir = '/Tmp/sotelo/data/pavoque/'

	base_dir = '/Tmp/sotelo/results/merlin/egs/build_your_own_voice/s1/'

	merlin_data_dir = base_dir + \
	'experiments/pavoque/acoustic_model/data/'

	file_list = read_file_list(merlin_data_dir + 'file_id_list.scp')


	emotion_set = [x.split('-')[1] for x in file_list]
	emotion_set = sorted(list(set(emotion_set)))
	emotion_dict = {x: i + 1 for i, x in enumerate(emotion_set)}

	# File before merlin
	raw_dir = base_dir + 'raw_data/pavoque/'

	text_file = raw_dir + 'utts.data'
	with open(text_file) as f:
	text_data = f.readlines()

	id_from_text = [x.split()[1] for x in text_data]
	error_files = [
	(i, x) for i, x in enumerate(id_from_text) if x not in file_list]

	assert id_from_text == file_list

	text_data = ['"'.join(x.strip().split('"')[1:-1]) for x in text_data]

	char_set = sorted(list(set(''.join(text_data).lower())))
	char2code = {x: i + 1 for i, x in enumerate(char_set)}

	with open(save_dir + 'char2code.pkl', 'w') as f:
	pickle.dump(char2code, f)

	audio_files = prepare_file_path_list(
	file_list, merlin_data_dir + 'nn_norm_mgc_lf0_vuv_bap_63', '.cmp')

	resulth5 = h5py.File(
	'/Tmp/sotelo/data/pavoque/pavoque.hdf5', mode='w')
	num_files = len(file_list)

	features_h5 = resulth5.create_dataset(
	'features', (num_files,),
	dtype=h5py.special_dtype(vlen=numpy.dtype('float32')))

	features_shape_h5 = resulth5.create_dataset(
	'features_shapes', (num_files, 2), dtype='int32')

	features_h5.dims.create_scale(features_shape_h5, 'shapes')
	features_h5.dims[0].attach_scale(features_shape_h5)

	features_shape_labels = resulth5.create_dataset(
	'features_shape_labels', (2,), dtype='S7')
	features_shape_labels[...] = [
	'time_step'.encode('utf8'),
	'num_feature'.encode('utf8')]
	features_h5.dims.create_scale(
	features_shape_labels, 'shape_labels')
	features_h5.dims[0].attach_scale(features_shape_labels)

	text_h5 = resulth5.create_dataset(
	'text', (num_files,),
	dtype=h5py.special_dtype(vlen=numpy.dtype('int32')))

	speaker_index_h5 = resulth5.create_dataset(
	'speaker_index', (num_files, 1), dtype='uint8')

	order = range(num_files)
	numpy.random.seed(1)
	numpy.random.shuffle(order)

	for i, idx in enumerate(order):
	if i % 100 == 0:
	print i
	out_features, out_frame_number = io_fun.load_binary_file_frame(
	audio_files[idx], n_outs)

	features_h5[i] = out_features.flatten()
	features_shape_h5[i] = numpy.array(out_features.shape)

	speaker_label = id_from_text[idx].split('-')[1]
	speaker_index_h5[i] = emotion_dict[speaker_label]

	text_h5[i] = numpy.array(
	[char2code[x.lower()] for x in text_data[idx]], dtype='int32')

	end_train = int(.9 * num_files)
	end_valid = int(.95 * num_files)
	end_test = num_files

	split_dict = {
	'train': {'features': (0, end_train),
	'text': (0, end_train),
	'speaker_index': (0, end_train)},
	'valid': {'features': (end_train, end_valid),
	'text': (end_train, end_valid),
	'speaker_index': (end_train, end_valid)},
	'test': {'features': (end_valid, end_test),
	'text': (end_valid, end_test),
	'speaker_index': (end_valid, end_test)}}

	resulth5.attrs['split'] = H5PYDataset.create_split_array(split_dict)

	resulth5.flush()
	resulth5.close()