Last active
January 11, 2020 12:48
-
-
Save tam17aki/f884c48304b524395d259dd2a8e2f69b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy.io import wavfile | |
import os | |
import pyroomacoustics as pra | |
# STFT parameters | |
fft_len = 512 | |
hop = fft_len // 2 # half overlap | |
window = pra.hann(fft_len, flag='asymmetric', length='full') | |
snr = 5 # SNR of input signal | |
speech_file = "./input/arctic_a0010.wav" | |
noise_file = "./input/exercise_bike.wav" | |
# load WAV files, should have same sampling rates! | |
fs_s, audio = wavfile.read(speech_file) | |
fs_n, noise = wavfile.read(noise_file) | |
assert fs_s == fs_n, "audio and noise wav files should have same sampling rate!" | |
# truncate to same length | |
noise = noise[:len(audio)] | |
# weight noise according to desired SNR | |
signal_level = np.linalg.norm(audio) | |
noise_level = np.linalg.norm(noise) | |
noise_fact = signal_level / 10**(snr / 20) | |
noise_weighted = noise * noise_fact / noise_level | |
# add signal and noise | |
noisy_signal = audio + noise_weighted | |
noisy_signal /= np.abs(noisy_signal).max() | |
noisy_signal -= noisy_signal.mean() | |
# STFT and SpectralSub parameters | |
# STFT frame length will be nfft/2 as we will use an STFT with 50% overlap. | |
nfft = 512 | |
# Maximum suppression per frequency bin. | |
# Large suppresion can result in more musical noise. | |
db_reduc = 10 | |
lookback = 5 # How many frames to look back for the noise floor estimate. | |
# An overestimation factor to "push" the suppression towards db_reduc. | |
beta = 10 | |
# An exponential factor to tune the suppresion | |
# (see documentation of 'SpectralSub'). | |
alpha = 3 | |
# Hann window | |
window = pra.hann(nfft, flag='asymmetric', length='full') | |
# create objects | |
stft = pra.transform.STFT(nfft, hop=nfft // 2, analysis_window=window) | |
scnr = pra.denoise.SpectralSub(nfft, db_reduc, lookback, beta, alpha) | |
# collect the processed blocks | |
denoised_signal = np.zeros(noisy_signal.shape) | |
n = 0 | |
while noisy_signal.shape[0] - n > hop: | |
# SCNR in frequency domain | |
stft.analysis(noisy_signal[n: n + hop]) | |
gain_filt = scnr.compute_gain_filter(stft.X) | |
# back to time domain | |
denoised_signal[n: n + hop] = stft.synthesis(gain_filt * stft.X) | |
# update step | |
n += hop | |
# output results | |
outputDir = './output' | |
os.makedirs(outputDir, exist_ok=True) | |
print("Output noisy speech.") | |
wavfile.write("{}/noisy_speech.wav".format(outputDir), fs_s, noisy_signal) | |
print("Output denoised speech.") | |
wavfile.write("{}/denoised_speech.wav".format(outputDir), fs_s, denoised_signal) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment