Skip to content

Instantly share code, notes, and snippets.

@matbra
Last active December 14, 2015 17:25
Show Gist options
  • Save matbra/bf9c53f86f2ef388de90 to your computer and use it in GitHub Desktop.
Save matbra/bf9c53f86f2ef388de90 to your computer and use it in GitHub Desktop.
This gist contains code that is part of a programming assignment (Peer Assessment 10a) for the Coursera course "Audio Signal Processing for Music Applications".
def sineModelMultiRes(x, fs, w, N, t, B):
"""
Analysis/synthesis of a sound using the sinusoidal model, without sine tracking
x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB
returns y: output array sound
This is an extension of the original sineModel() function that supports different DFT
resolutions/window lengths in different subbands.
w is assumed to be a Python list of N_w windows
N is a Python list of (corresponding) DFT lengths
B is a Python list with (N_w - 1) elements, containing the frequencies where
the bands are split (0 Hz and fs/2 are always included)
Hence, not only three bands are supported but multiple, if desired.
"""
# determine the half analysis window lengths
hM1 = np.zeros(len(w))
hM2 = np.zeros(len(w))
for idx_w, cur_w in enumerate(w):
hM1[idx_w] = int(math.floor((cur_w.size+1)/2)) # half analysis window size by rounding
hM2[idx_w] = int(math.floor(cur_w.size/2)) # half analysis window size by floor
N_bands = len(w)
Ns = 512 # FFT size for synthesis (even)
H = Ns/4 # Hop size used for analysis and synthesis
hNs = Ns/2 # half of synthesis FFT size
pin = np.max(hM1) # init sound pointer in middle of anal window
pend = x.size - max(hNs, np.max(hM1)) # last sample to start a frame
# fftbuffer = [np.zeros(n) for n in N] # initialize buffer for FFT
yw = np.zeros(Ns) # initialize output sound frame
y = np.zeros(x.size) # initialize output array
for cur_w in w: # normalize analysis window
cur_w /= sum(cur_w)
sw = np.zeros(Ns) # initialize synthesis window
ow = triang(2*H) # triangular window
sw[hNs-H:hNs+H] = ow # add triangular window
bh = blackmanharris(Ns) # blackmanharris window
bh = bh / sum(bh) # normalized blackmanharris window
sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window
while pin<pend: # while input sound pointer is within sound
#-----analysis-----
iploc_overall = []
ipmag_overall = []
ipphase_overall = []
ipfreq_overall = []
for idx_band in range(N_bands):
f_lower = 0 if idx_band==0 else B[idx_band-1]
f_upper = fs/2 if idx_band==N_bands-1 else B[idx_band]
x1 = x[pin-hM1[idx_band]:pin+hM2[idx_band]]
mX, pX = DFT.dftAnal(x1, w[idx_band], N[idx_band]) # compute dft
ploc = UF.peakDetection(mX, t) # detect locations of peaks
pmag = mX[ploc] # get the magnitude of the peaks
iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation
ipfreq = fs*iploc/float(N[idx_band]) # convert peak locations to Hertz
idx_freqs_in_cur_band = np.where((ipfreq >= f_lower) & (ipfreq < f_upper))
iploc_overall.append(iploc[idx_freqs_in_cur_band])
ipmag_overall.append(ipmag[idx_freqs_in_cur_band])
ipphase_overall.append(ipphase[idx_freqs_in_cur_band])
ipfreq_overall.append(ipfreq[idx_freqs_in_cur_band])
#-----synthesis-----
Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum
fftbuffer = np.real(ifft(Y)) # compute inverse FFT
yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window
yw[hNs-1:] = fftbuffer[:hNs+1]
y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window
pin += H # advance sound pointer
return y
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment