Justin-Heer/train-test-val-split.py

## train-test-val-split.py
# combine these features
features = np.concatenate((features_happy, features_awkward), axis=0)

# generate corresponding labels
labels = np.concatenate((np.ones(len(features_happy)), np.zeros(len(features_awkward))), axis=0)

# save features and labels to file
np.savez_compressed('dataset', f=features, l=labels)

# split the dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2,
                                                    random_state=16)

# split the dataset into training and validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2,
                                                  random_state=16)

# calculate the max length of any video in the dataset
expected_frames = max([X_train[i].shape[0] for i in range(len(X_train))])

# set the batch size
batch_size = 16

# Converts a class vector (integers) to binary class matrix
# for use with categorical_crossentropy
# IMPORTANT for CATEGORICAL_CROSSENTROPY!
y_train = np_utils.to_categorical(y_train, 2)
y_val = np_utils.to_categorical(y_val, 2)

# generator for training the LSTM model
train_gen = generate_batch(X_train, y_train, batch_size, expected_frames)

# generator for validating the LSTM model
val_gen = generate_batch(X_val, y_val, batch_size, expected_frames)
	# combine these features
	features = np.concatenate((features_happy, features_awkward), axis=0)

	# generate corresponding labels
	labels = np.concatenate((np.ones(len(features_happy)), np.zeros(len(features_awkward))), axis=0)

	# save features and labels to file
	np.savez_compressed('dataset', f=features, l=labels)

	# split the dataset into training and testing
	X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2,
	random_state=16)

	# split the dataset into training and validation
	X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2,
	random_state=16)

	# calculate the max length of any video in the dataset
	expected_frames = max([X_train[i].shape[0] for i in range(len(X_train))])

	# set the batch size
	batch_size = 16

	# Converts a class vector (integers) to binary class matrix
	# for use with categorical_crossentropy
	# IMPORTANT for CATEGORICAL_CROSSENTROPY!
	y_train = np_utils.to_categorical(y_train, 2)
	y_val = np_utils.to_categorical(y_val, 2)

	# generator for training the LSTM model
	train_gen = generate_batch(X_train, y_train, batch_size, expected_frames)

	# generator for validating the LSTM model
	val_gen = generate_batch(X_val, y_val, batch_size, expected_frames)