Skip to content

Instantly share code, notes, and snippets.

@AyishaR
Last active April 8, 2021 07:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AyishaR/d3d089cffa2abdc6ef24f9b67ca8dc64 to your computer and use it in GitHub Desktop.
Save AyishaR/d3d089cffa2abdc6ef24f9b67ca8dc64 to your computer and use it in GitHub Desktop.
#Dataset storing audio samples for wake word and background
top_dir = 'audio'
input_audio = np.empty((0, desired_samples)).astype(np.float32)
input_labels = np.empty((0)).astype(np.int32); # index of the word in hotwords list is the lable.
for word in (word_dirs) :
print("\n",word)
if ( word not in hotwords and False == sampleBackGround()) : # background, do not include
print("-- Background/noise/other words not included")
continue
else: # to be included
dfx = df[df['class'] == word]
start_time = time.time()
wav_files = 0
word_samples = np.empty((0, desired_samples))
if word in hotwords: # hotwords
print("-- Category : hotword")
for i in range(len(dfx)):
file_path = top_dir + "/fold" + str(dfx.iloc[i]['fold']) + "/" + str(dfx.iloc[i]['slice_file_name'])
X_sub = np.empty((0, desired_samples))
X, sr = librosa.core.load(file_path, sr=desired_sr)
X, interval = librosa.effects.trim(X)
if X.shape[0] < desired_sr: # if samples less than 1 second
continue
if X.shape[0]%desired_samples != 0: # if it needs padding, else, there will be unnecessary silence appended
X = np.pad(X, (0, desired_samples - (X.shape[0]%desired_samples)))
X_sub = np.array(np.split(X, int(X.shape[0]*1.0/desired_samples)))
word_samples = np.append(word_samples, X_sub, axis=0)
if ( word_samples.shape[0] > class_nSamples ):
break
wav_files = wav_files + 1
else:
print("-- Category : backgound/noise/other words")
for i in range(len(dfx)):
file_path = top_dir + "/fold" + str(dfx.iloc[i]['fold']) + "/" + str(dfx.iloc[i]['slice_file_name'])
X_sub = np.empty((0, desired_samples))
X, sr = librosa.core.load(file_path, sr=desired_sr)
X, interval = librosa.effects.trim(X)
if X.shape[0] < desired_sr: # if samples less than 1 second
continue
if X.shape[0]%desired_samples != 0: # if it needs padding, else, there will be unnecessary silence appended
X = np.pad(X, (0, desired_samples - (X.shape[0]%desired_samples)))
X_sub = np.array(np.split(X, int(X.shape[0]*1.0/desired_samples)))
word_samples = np.append(word_samples, X_sub, axis=0)
if ( word_samples.shape[0] > other_nSamples ):
break
wav_files = wav_files + 1
if ( word_samples.size > 0 ):
input_audio = np.concatenate((input_audio, word_samples), axis=0)
labels = np.full((word_samples.shape[0]), nLabel(word))
input_labels = np.concatenate((input_labels, labels))
print("added {} audio files with {} samples for word \"{}\" with label {} in {:.1f} sec.".
format(wav_files, labels.shape[0], word, nLabel(word), (time.time() - start_time)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment