AyishaR/multiaudio_danger_dataset.py

## multiaudio_danger_dataset.py
#Dataset storing audio samples for wake word and background

top_dir = 'audio'

input_audio   = np.empty((0, desired_samples)).astype(np.float32)
input_labels  = np.empty((0)).astype(np.int32); # index of the word in hotwords list is the lable.

for word in (word_dirs) :
    print("\n",word)

    if ( word not in hotwords and False == sampleBackGround()) : # background, do not include
        print("-- Background/noise/other words not included")
        continue

    else: # to be included
        dfx = df[df['class'] == word]
        start_time = time.time()

        wav_files = 0

        word_samples = np.empty((0, desired_samples))

        if word in hotwords: # hotwords
            print("-- Category : hotword")

            for i in range(len(dfx)):
                file_path = top_dir + "/fold" + str(dfx.iloc[i]['fold']) + "/" + str(dfx.iloc[i]['slice_file_name'])

                X_sub = np.empty((0, desired_samples))
                X, sr = librosa.core.load(file_path, sr=desired_sr)
                X, interval = librosa.effects.trim(X)

                if X.shape[0] < desired_sr: # if samples less than 1 second
                    continue

                if X.shape[0]%desired_samples != 0: # if it needs padding, else, there will be unnecessary silence appended
                    X = np.pad(X, (0, desired_samples - (X.shape[0]%desired_samples)))

                X_sub = np.array(np.split(X, int(X.shape[0]*1.0/desired_samples)))

                word_samples = np.append(word_samples, X_sub, axis=0)

                if ( word_samples.shape[0] > class_nSamples ):
                    break

                wav_files = wav_files + 1

        else:
            print("-- Category : backgound/noise/other words")

            for i in range(len(dfx)):
                file_path = top_dir + "/fold" + str(dfx.iloc[i]['fold']) + "/" + str(dfx.iloc[i]['slice_file_name'])
                X_sub = np.empty((0, desired_samples))
                X, sr = librosa.core.load(file_path, sr=desired_sr)
                X, interval = librosa.effects.trim(X)

                if X.shape[0] < desired_sr: # if samples less than 1 second
                    continue

                if X.shape[0]%desired_samples != 0: # if it needs padding, else, there will be unnecessary silence appended
                    X = np.pad(X, (0, desired_samples - (X.shape[0]%desired_samples)))

                X_sub = np.array(np.split(X, int(X.shape[0]*1.0/desired_samples)))

                word_samples = np.append(word_samples, X_sub, axis=0)

                if ( word_samples.shape[0] > other_nSamples ):
                    break

                wav_files = wav_files + 1

        if ( word_samples.size > 0 ):
            input_audio = np.concatenate((input_audio, word_samples), axis=0)
            labels = np.full((word_samples.shape[0]), nLabel(word))
            input_labels = np.concatenate((input_labels, labels))

            print("added {} audio files with {} samples for word \"{}\" with label {} in {:.1f} sec.".
                  format(wav_files, labels.shape[0], word, nLabel(word), (time.time() - start_time)))
	#Dataset storing audio samples for wake word and background

	top_dir = 'audio'

	input_audio = np.empty((0, desired_samples)).astype(np.float32)
	input_labels = np.empty((0)).astype(np.int32); # index of the word in hotwords list is the lable.

	for word in (word_dirs) :
	print("\n",word)

	if ( word not in hotwords and False == sampleBackGround()) : # background, do not include
	print("-- Background/noise/other words not included")
	continue

	else: # to be included
	dfx = df[df['class'] == word]
	start_time = time.time()

	wav_files = 0

	word_samples = np.empty((0, desired_samples))

	if word in hotwords: # hotwords
	print("-- Category : hotword")

	for i in range(len(dfx)):
	file_path = top_dir + "/fold" + str(dfx.iloc[i]['fold']) + "/" + str(dfx.iloc[i]['slice_file_name'])

	X_sub = np.empty((0, desired_samples))
	X, sr = librosa.core.load(file_path, sr=desired_sr)
	X, interval = librosa.effects.trim(X)

	if X.shape[0] < desired_sr: # if samples less than 1 second
	continue

	if X.shape[0]%desired_samples != 0: # if it needs padding, else, there will be unnecessary silence appended
	X = np.pad(X, (0, desired_samples - (X.shape[0]%desired_samples)))

	X_sub = np.array(np.split(X, int(X.shape[0]*1.0/desired_samples)))

	word_samples = np.append(word_samples, X_sub, axis=0)

	if ( word_samples.shape[0] > class_nSamples ):
	break

	wav_files = wav_files + 1

	else:
	print("-- Category : backgound/noise/other words")

	for i in range(len(dfx)):
	file_path = top_dir + "/fold" + str(dfx.iloc[i]['fold']) + "/" + str(dfx.iloc[i]['slice_file_name'])
	X_sub = np.empty((0, desired_samples))
	X, sr = librosa.core.load(file_path, sr=desired_sr)
	X, interval = librosa.effects.trim(X)

	if X.shape[0] < desired_sr: # if samples less than 1 second
	continue

	if X.shape[0]%desired_samples != 0: # if it needs padding, else, there will be unnecessary silence appended
	X = np.pad(X, (0, desired_samples - (X.shape[0]%desired_samples)))

	X_sub = np.array(np.split(X, int(X.shape[0]*1.0/desired_samples)))

	word_samples = np.append(word_samples, X_sub, axis=0)

	if ( word_samples.shape[0] > other_nSamples ):
	break

	wav_files = wav_files + 1

	if ( word_samples.size > 0 ):
	input_audio = np.concatenate((input_audio, word_samples), axis=0)
	labels = np.full((word_samples.shape[0]), nLabel(word))
	input_labels = np.concatenate((input_labels, labels))

	print("added {} audio files with {} samples for word \"{}\" with label {} in {:.1f} sec.".
	format(wav_files, labels.shape[0], word, nLabel(word), (time.time() - start_time)))