tam17aki/demo_denoise_stft_pyroom.py

## demo_denoise_stft_pyroom.py
import numpy as np
from scipy.io import wavfile
import os
import pyroomacoustics as pra

# STFT parameters
fft_len = 512
hop = fft_len // 2  # half overlap
window = pra.hann(fft_len, flag='asymmetric', length='full')

snr = 5         # SNR of input signal
speech_file = "./input/arctic_a0010.wav"
noise_file = "./input/exercise_bike.wav"

# load WAV files, should have same sampling rates!
fs_s, audio = wavfile.read(speech_file)
fs_n, noise = wavfile.read(noise_file)
assert fs_s == fs_n, "audio and noise wav files should have same sampling rate!"

# truncate to same length
noise = noise[:len(audio)]

# weight noise according to desired SNR
signal_level = np.linalg.norm(audio)
noise_level = np.linalg.norm(noise)
noise_fact = signal_level / 10**(snr / 20)
noise_weighted = noise * noise_fact / noise_level

# add signal and noise
noisy_signal = audio + noise_weighted
noisy_signal /= np.abs(noisy_signal).max()
noisy_signal -= noisy_signal.mean()

# STFT and SpectralSub parameters
# STFT frame length will be nfft/2 as we will use an STFT with 50% overlap.
nfft = 512

# Maximum suppression per frequency bin.
# Large suppresion can result in more musical noise.
db_reduc = 10

lookback = 5    # How many frames to look back for the noise floor estimate.

# An overestimation factor to "push" the suppression towards db_reduc.
beta = 10

# An exponential factor to tune the suppresion
# (see documentation of 'SpectralSub').
alpha = 3

# Hann window
window = pra.hann(nfft, flag='asymmetric', length='full')

# create objects
stft = pra.transform.STFT(nfft, hop=nfft // 2, analysis_window=window)
scnr = pra.denoise.SpectralSub(nfft, db_reduc, lookback, beta, alpha)

# collect the processed blocks
denoised_signal = np.zeros(noisy_signal.shape)
n = 0
while noisy_signal.shape[0] - n > hop:

    # SCNR in frequency domain
    stft.analysis(noisy_signal[n: n + hop])
    gain_filt = scnr.compute_gain_filter(stft.X)

    # back to time domain
    denoised_signal[n: n + hop] = stft.synthesis(gain_filt * stft.X)

    # update step
    n += hop

# output results
outputDir = './output'
os.makedirs(outputDir, exist_ok=True)
print("Output noisy speech.")
wavfile.write("{}/noisy_speech.wav".format(outputDir), fs_s, noisy_signal)

print("Output denoised speech.")
wavfile.write("{}/denoised_speech.wav".format(outputDir), fs_s, denoised_signal)
	import numpy as np
	from scipy.io import wavfile
	import os
	import pyroomacoustics as pra

	# STFT parameters
	fft_len = 512
	hop = fft_len // 2 # half overlap
	window = pra.hann(fft_len, flag='asymmetric', length='full')

	snr = 5 # SNR of input signal
	speech_file = "./input/arctic_a0010.wav"
	noise_file = "./input/exercise_bike.wav"

	# load WAV files, should have same sampling rates!
	fs_s, audio = wavfile.read(speech_file)
	fs_n, noise = wavfile.read(noise_file)
	assert fs_s == fs_n, "audio and noise wav files should have same sampling rate!"

	# truncate to same length
	noise = noise[:len(audio)]

	# weight noise according to desired SNR
	signal_level = np.linalg.norm(audio)
	noise_level = np.linalg.norm(noise)
	noise_fact = signal_level / 10**(snr / 20)
	noise_weighted = noise * noise_fact / noise_level

	# add signal and noise
	noisy_signal = audio + noise_weighted
	noisy_signal /= np.abs(noisy_signal).max()
	noisy_signal -= noisy_signal.mean()

	# STFT and SpectralSub parameters
	# STFT frame length will be nfft/2 as we will use an STFT with 50% overlap.
	nfft = 512

	# Maximum suppression per frequency bin.
	# Large suppresion can result in more musical noise.
	db_reduc = 10

	lookback = 5 # How many frames to look back for the noise floor estimate.

	# An overestimation factor to "push" the suppression towards db_reduc.
	beta = 10

	# An exponential factor to tune the suppresion
	# (see documentation of 'SpectralSub').
	alpha = 3

	# Hann window
	window = pra.hann(nfft, flag='asymmetric', length='full')

	# create objects
	stft = pra.transform.STFT(nfft, hop=nfft // 2, analysis_window=window)
	scnr = pra.denoise.SpectralSub(nfft, db_reduc, lookback, beta, alpha)

	# collect the processed blocks
	denoised_signal = np.zeros(noisy_signal.shape)
	n = 0
	while noisy_signal.shape[0] - n > hop:

	# SCNR in frequency domain
	stft.analysis(noisy_signal[n: n + hop])
	gain_filt = scnr.compute_gain_filter(stft.X)

	# back to time domain
	denoised_signal[n: n + hop] = stft.synthesis(gain_filt * stft.X)

	# update step
	n += hop

	# output results
	outputDir = './output'
	os.makedirs(outputDir, exist_ok=True)
	print("Output noisy speech.")
	wavfile.write("{}/noisy_speech.wav".format(outputDir), fs_s, noisy_signal)

	print("Output denoised speech.")
	wavfile.write("{}/denoised_speech.wav".format(outputDir), fs_s, denoised_signal)