Skip to content

Instantly share code, notes, and snippets.

@tam17aki
Last active January 11, 2020 12:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tam17aki/f884c48304b524395d259dd2a8e2f69b to your computer and use it in GitHub Desktop.
Save tam17aki/f884c48304b524395d259dd2a8e2f69b to your computer and use it in GitHub Desktop.
import numpy as np
from scipy.io import wavfile
import os
import pyroomacoustics as pra
# STFT parameters
fft_len = 512
hop = fft_len // 2 # half overlap
window = pra.hann(fft_len, flag='asymmetric', length='full')
snr = 5 # SNR of input signal
speech_file = "./input/arctic_a0010.wav"
noise_file = "./input/exercise_bike.wav"
# load WAV files, should have same sampling rates!
fs_s, audio = wavfile.read(speech_file)
fs_n, noise = wavfile.read(noise_file)
assert fs_s == fs_n, "audio and noise wav files should have same sampling rate!"
# truncate to same length
noise = noise[:len(audio)]
# weight noise according to desired SNR
signal_level = np.linalg.norm(audio)
noise_level = np.linalg.norm(noise)
noise_fact = signal_level / 10**(snr / 20)
noise_weighted = noise * noise_fact / noise_level
# add signal and noise
noisy_signal = audio + noise_weighted
noisy_signal /= np.abs(noisy_signal).max()
noisy_signal -= noisy_signal.mean()
# STFT and SpectralSub parameters
# STFT frame length will be nfft/2 as we will use an STFT with 50% overlap.
nfft = 512
# Maximum suppression per frequency bin.
# Large suppresion can result in more musical noise.
db_reduc = 10
lookback = 5 # How many frames to look back for the noise floor estimate.
# An overestimation factor to "push" the suppression towards db_reduc.
beta = 10
# An exponential factor to tune the suppresion
# (see documentation of 'SpectralSub').
alpha = 3
# Hann window
window = pra.hann(nfft, flag='asymmetric', length='full')
# create objects
stft = pra.transform.STFT(nfft, hop=nfft // 2, analysis_window=window)
scnr = pra.denoise.SpectralSub(nfft, db_reduc, lookback, beta, alpha)
# collect the processed blocks
denoised_signal = np.zeros(noisy_signal.shape)
n = 0
while noisy_signal.shape[0] - n > hop:
# SCNR in frequency domain
stft.analysis(noisy_signal[n: n + hop])
gain_filt = scnr.compute_gain_filter(stft.X)
# back to time domain
denoised_signal[n: n + hop] = stft.synthesis(gain_filt * stft.X)
# update step
n += hop
# output results
outputDir = './output'
os.makedirs(outputDir, exist_ok=True)
print("Output noisy speech.")
wavfile.write("{}/noisy_speech.wav".format(outputDir), fs_s, noisy_signal)
print("Output denoised speech.")
wavfile.write("{}/denoised_speech.wav".format(outputDir), fs_s, denoised_signal)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment