-
-
Save fakufaku/d395f5aa5e5c5fa07c7cbcf51d543413 to your computer and use it in GitHub Desktop.
import torch | |
import paderbox | |
import numpy as np | |
from scipy.signal import hamming, blackman, get_window, hann | |
import matplotlib.pyplot as plt | |
f = 1500.0 # exactly periodic | |
fs = 48000.0 | |
nfft = 512 | |
hop = 128 | |
demo = np.sin(2 * np.pi * f / fs * np.arange(fs)) | |
# To make the comparison easier, we pad the input signal to | |
# make it a multiple of the FFT size | |
pad_size = nfft - hop | |
padding = np.zeros(pad_size) | |
demo = np.concatenate([demo, np.zeros(pad_size)]) | |
# torch | |
# - use `center=False` | |
# - zero-pad the front of the signal | |
demo_lr = np.concatenate([padding, demo]) | |
demo_lr = torch.from_numpy(demo_lr) | |
win_pt = torch.hamming_window(nfft, dtype=demo_lr.dtype) | |
PT = torch.stft( | |
demo_lr, | |
n_fft=nfft, | |
hop_length=hop, | |
window=win_pt, | |
return_complex=True, | |
pad_mode="constant", | |
center=False, | |
) | |
recon_pt = torch.istft(PT, n_fft=nfft, hop_length=hop, window=win_pt, center=False) | |
recon_pt = recon_pt[pad_size:] | |
recon_pt = recon_pt.numpy() | |
PT = PT.numpy() | |
print("torch: reconstruction exact ?", np.allclose(recon_pt, demo)) | |
# paderbox | |
AR = paderbox.transform.stft( | |
demo_lr, | |
size=nfft, | |
shift=hop, | |
fading=False, | |
window=hamming, | |
# window=hamming_win, | |
# symmetric_window=True, | |
) | |
recon_ar = paderbox.transform.istft( | |
AR, size=nfft, shift=hop, fading=False, window=hamming | |
) | |
AR = AR.T | |
recon_ar = recon_ar[pad_size:] | |
print( | |
"pader: reconstruction exact ?", | |
np.allclose(recon_ar, demo), | |
) | |
print("difference between librosa's and pra's STFT", abs(PT - AR).max()) |
If I change "hamming" to "blackman" (which is used in GSS), I get the following error:
RuntimeError: istft(CPUComplexDoubleType[257, 378], n_fft=512, hop_length=128, win_length=512, window=torch.DoubleTensor{[512]}, center=0, normalized=0, onesided=None, length=None, return_complex=0) window overlap add min: 0Perhaps related to the following issue: pytorch/pytorch#62323?
I tried to get around the issue by using the idea from this comment, i.e., win_pt = torch.blackman_window(nfft + 2, dtype=demo_lr.dtype)[1:-1]
, but then the difference between STFTs are large (0.42016923270740847).
I also reported the same issue! pytorch/pytorch#91309
Does it have to be Blackman ? You can also use center=True, which has slightly different padding, but should not change quality.
I also reported the same issue! pytorch/pytorch#91309 Does it have to be Blackman ? You can also use center=True, which has slightly different padding, but should not change quality.
I will try out other options and see how they compare in terms of downstream WER.
If I change "hamming" to "blackman" (which is used in GSS), I get the following error:
Perhaps related to the following issue: pytorch/pytorch#62323?