Last active
April 8, 2021 07:27
-
-
Save AyishaR/d3d089cffa2abdc6ef24f9b67ca8dc64 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Dataset storing audio samples for wake word and background | |
top_dir = 'audio' | |
input_audio = np.empty((0, desired_samples)).astype(np.float32) | |
input_labels = np.empty((0)).astype(np.int32); # index of the word in hotwords list is the lable. | |
for word in (word_dirs) : | |
print("\n",word) | |
if ( word not in hotwords and False == sampleBackGround()) : # background, do not include | |
print("-- Background/noise/other words not included") | |
continue | |
else: # to be included | |
dfx = df[df['class'] == word] | |
start_time = time.time() | |
wav_files = 0 | |
word_samples = np.empty((0, desired_samples)) | |
if word in hotwords: # hotwords | |
print("-- Category : hotword") | |
for i in range(len(dfx)): | |
file_path = top_dir + "/fold" + str(dfx.iloc[i]['fold']) + "/" + str(dfx.iloc[i]['slice_file_name']) | |
X_sub = np.empty((0, desired_samples)) | |
X, sr = librosa.core.load(file_path, sr=desired_sr) | |
X, interval = librosa.effects.trim(X) | |
if X.shape[0] < desired_sr: # if samples less than 1 second | |
continue | |
if X.shape[0]%desired_samples != 0: # if it needs padding, else, there will be unnecessary silence appended | |
X = np.pad(X, (0, desired_samples - (X.shape[0]%desired_samples))) | |
X_sub = np.array(np.split(X, int(X.shape[0]*1.0/desired_samples))) | |
word_samples = np.append(word_samples, X_sub, axis=0) | |
if ( word_samples.shape[0] > class_nSamples ): | |
break | |
wav_files = wav_files + 1 | |
else: | |
print("-- Category : backgound/noise/other words") | |
for i in range(len(dfx)): | |
file_path = top_dir + "/fold" + str(dfx.iloc[i]['fold']) + "/" + str(dfx.iloc[i]['slice_file_name']) | |
X_sub = np.empty((0, desired_samples)) | |
X, sr = librosa.core.load(file_path, sr=desired_sr) | |
X, interval = librosa.effects.trim(X) | |
if X.shape[0] < desired_sr: # if samples less than 1 second | |
continue | |
if X.shape[0]%desired_samples != 0: # if it needs padding, else, there will be unnecessary silence appended | |
X = np.pad(X, (0, desired_samples - (X.shape[0]%desired_samples))) | |
X_sub = np.array(np.split(X, int(X.shape[0]*1.0/desired_samples))) | |
word_samples = np.append(word_samples, X_sub, axis=0) | |
if ( word_samples.shape[0] > other_nSamples ): | |
break | |
wav_files = wav_files + 1 | |
if ( word_samples.size > 0 ): | |
input_audio = np.concatenate((input_audio, word_samples), axis=0) | |
labels = np.full((word_samples.shape[0]), nLabel(word)) | |
input_labels = np.concatenate((input_labels, labels)) | |
print("added {} audio files with {} samples for word \"{}\" with label {} in {:.1f} sec.". | |
format(wav_files, labels.shape[0], word, nLabel(word), (time.time() - start_time))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment