This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hop_len=int(win_len/4) # default | |
# fft_len=pow(2, int(np.log2(win_len)+1)) | |
fft_len = 2048 | |
win_len = fft_len | |
hop_len = int(win_len/4) | |
def spectrogramOp(X): | |
spectrogram_out = librosa.core.stft(X, n_fft=fft_len, hop_length=hop_len, win_length=win_len, center=True) | |
return np.absolute(spectrogram_out) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
onehot_labels = np.zeros((input_labels.size, n_classes)).astype(np.int32) | |
onehot_labels[np.arange(input_labels.size), input_labels] = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
add_noise = False # add different words, null samples and random noise | |
n_classes = len(hotwords) + int(add_noise) | |
class_nSamples = 1000 # number of samples in the hotword classes | |
other_nSamples = float(class_nSamples)/(len(word_dirs) - n_classes) # number of samples to be picked from each of the non-hotword classes | |
def nLabel(word): | |
return n_classes-1 if ( word not in hotwords ) else hotwords.index(word) | |
def textLabel(index): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Processing the data | |
dataset_min = 0.0 | |
dataset_max = 1.0 | |
def denormalize_dataset(input_val): | |
global dataset_min, dataset_max | |
return input_val * (dataset_max - dataset_min) | |
#Function to normalize input values | |
def normalize_dataset(input_val): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
max_length = 4 # length (in seconds) of input | |
desired_sr = 16000 # sampling rate to use | |
mic_sr = 16000 # rate supported by Sampling library like PDM | |
desired_samples = max_length*desired_sr # total number of samples in input | |
tf.random.set_seed(0) | |
np.random.seed(0) | |
os.environ['PYTHONHASHSEED'] = '0' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
word_dirs = list(set(df['class'].to_list())) | |
hotwords = ['jackhammer', 'dog_bark', 'siren', 'gun_shot'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pick random test data sample from one batch | |
x = random.randint(0, 41) # test set has 42 samples | |
for i in test_ds.as_numpy_iterator(): | |
img, label = i | |
plt.axis('off') # remove axes | |
#print(img.shape, x) | |
plt.imshow(img[x]) # shape from (64, 256, 256, 3) --> (256, 256, 3) | |
output = model.predict(np.expand_dims(img[x],0)) # getting output; input shape (256, 256, 3) --> (1, 256, 256, 3) | |
pred = np.argmax(output[0]) # finding max |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
base_model = keras.applications.DenseNet121(weights='imagenet', input_shape=input_shape, include_top=False) # False, do not include the classification layer of the model | |
base_model.trainable = False | |
inputs = tf.keras.Input(shape=input_shape) | |
x = base_model(inputs, training=False) | |
x = keras.layers.GlobalAveragePooling2D()(x) | |
outputs = keras.layers.Dense(len(class_names), activation = 'softmax')(x) # Add own classififcation layer | |
model = keras.Model(inputs, outputs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Normalizing the pixel values for faster convergence | |
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255) | |
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) | |
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Loading the dataset | |
path = 'currency/' | |
input_shape = (256, 256, 3) # default input shape while loading the images | |
batch = 64 | |
# The train and test datasets | |
print("Train dataset") | |
train_ds = preprocessing.image_dataset_from_directory(path+'Train', batch_size=batch, label_mode='categorical') |