Skip to content

Instantly share code, notes, and snippets.

# Splitting into train, val and test set -- 80-10-10 split
# First, an 80-20 split
Xtrain, Xvaltest, ytrain, yvaltest = train_test_split(X, y, test_size = 0.2)
# Then split the 20% into half
Xval, Xtest, yval, ytest = train_test_split(Xvaltest, yvaltest, test_size = 0.5)
# Input attributes (every column except the last)
X = df[df.columns.tolist()[:-1]]
# Output attribute - one-hot encoded
y = pd.get_dummies(df[64])
df0 = pd.read_csv('https://cainvas-static.s3.amazonaws.com/media/user_data/AyishaR0/0.csv', header = None)
df1 = pd.read_csv('https://cainvas-static.s3.amazonaws.com/media/user_data/AyishaR0/1.csv', header = None)
df2 = pd.read_csv('https://cainvas-static.s3.amazonaws.com/media/user_data/AyishaR0/2.csv', header = None)
df3 = pd.read_csv('https://cainvas-static.s3.amazonaws.com/media/user_data/AyishaR0/3.csv', header = None)
df = pd.concat([df0, df1, df2, df3])
# pick random test data sample from one batch
x = random.randint(0, 32 - 1) # default batch size is 32
for i in test_df.as_numpy_iterator():
img, label = i
plt.axis('off') # remove axes
plt.imshow(img[x]) # shape from (32, 256, 256, 3) --> (256, 256, 3)
output = model.predict(np.expand_dims(img[x],0))[0][0] # getting output; input shape (256, 256, 3) --> (1, 256, 256, 3)
pred = (output > 0.5).astype('int')
print("Predicted: ", class_names[pred], '(', output, '-->', pred, ')') # Picking the label from class_names base don the model output
model = models.Sequential([
layers.Conv2D(8, 2, activation='relu', input_shape=(256,256,3)),
layers.Conv2D(16, 2, activation='relu'),
layers.MaxPool2D(pool_size=(2, 2)),
layers.Conv2D(16, 2, activation='relu'),
layers.Conv2D(32, 2, activation='relu'),
layers.MaxPool2D(pool_size=(2, 2)),
layers.Flatten(),
# Normalizing the pixel values for faster convergence
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
train_df = train_df.map(lambda x, y: (normalization_layer(x), y))
test_df = test_df.map(lambda x, y: (normalization_layer(x), y))
# Loading the dataset
path = 'surface'
batch_size = 256
train_df = preprocessing.image_dataset_from_directory(path, label_mode = 'binary', validation_split = 0.2, seed = 113, subset='training', batch_size = batch_size)
test_df = preprocessing.image_dataset_from_directory(path, label_mode = 'binary', validation_split = 0.2, seed = 113, subset='validation', batch_size = batch_size)
# Looking into the class labels
if ( sampleBackGround() ) :
n_bg_samples = int(other_nSamples)
bg_labels = np.zeros((n_bg_samples, n_classes)).astype(np.int)
bg_labels[:,n_classes-1] = 1
silence = np.zeros((n_bg_samples, desired_samples))
input_audio = np.append(input_audio, silence, axis=0)
input_labels = np.append(input_labels, bg_labels, axis=0)
#Dataset storing audio samples for wake word and background
top_dir = 'audio'
input_audio = np.empty((0, desired_samples)).astype(np.float32)
input_labels = np.empty((0)).astype(np.int32); # index of the word in hotwords list is the lable.
for word in (word_dirs) :
print("\n",word)
add_noise = False # add different words, null samples and random noise
n_classes = len(hotwords) + int(add_noise)
class_nSamples = 1000 # number of samples in the hotword classes
other_nSamples = float(class_nSamples)/(len(word_dirs) - n_classes) # number of samples to be picked from each of the non-hotword classes
def nLabel(word):
return n_classes-1 if ( word not in hotwords ) else hotwords.index(word)
def textLabel(index):