Created
August 19, 2013 13:12
-
-
Save bistaumanga/6268977 to your computer and use it in GitHub Desktop.
Phase vocoder algorithm for time scaling and pitch scaling of audio signals
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy.io import wavfile | |
from numpy.fft import fft, ifft, fftshift | |
def hamming(M): | |
n = np.arange(M) | |
return 0.54 - 0.46 * np.cos( 2 * np.pi * n / (M - 1)) | |
def hann(M): | |
n = np.arange(M) | |
return 0.5 - 0.5 * np.cos( 2 * np.pi * n / (M - 1)) | |
def rectwin(M): | |
return np.ones(M) | |
def phase_voc(x, ts_ratio, L = 1024, H = 256, win = hamming): | |
syn_hop = H * ts_ratio | |
N = len(x) | |
w = win(L) | |
gain = 1. / (L * np.sum((win(L) * win(L))) / syn_hop) | |
unwrapdata = 2 * np.pi * H / L * np.arange(0, L).T | |
yangle, ysangle = np.zeros(L), np.zeros(L) | |
ys = np.zeros(L, dtype = complex) | |
yprevwin = np.zeros(L - syn_hop, dtype = complex) | |
first_time = True | |
y = np.array(0) | |
for i in np.arange(0, N - L, H): | |
yprevangle = yangle | |
### Analysis with windowed fft of ST signal ### | |
yfft = fft(w * x[i : i + L]) | |
ymag, yangle = np.abs(yfft), np.angle(yfft) | |
############################################### | |
# Synthesis Phase Calculation | |
# The synthesis phase is calculated by computing the phase increments | |
# between successive frequency transforms, unwrapping them, and scaling | |
# them by the ratio between the analysis and synthesis hop sizes. | |
yunwrap = (yangle - yprevangle) - unwrapdata | |
yunwrap = yunwrap - np.round(yunwrap / (2.*np.pi)) * 2 * np.pi | |
yunwrap = (yunwrap + unwrapdata) * ts_ratio | |
if first_time: | |
ysangle = yangle | |
first_time = False | |
else: ysangle += yunwrap | |
### synthesis ### | |
# Convert magnitude and phase to complex numbers. | |
ys.real, ys.imag = np.cos(ysangle), np.sin(ysangle) | |
ys *= ymag | |
ywin = ifft(w * ys) | |
# Overlap-add operation | |
olapadd = np.hstack((ywin[:L - syn_hop] + yprevwin,\ | |
ywin[L - syn_hop : ])) | |
yistfft = olapadd[: syn_hop] | |
yprevwin = olapadd[syn_hop : ] | |
# Compensate for the scaling that was introduced by the | |
# overlap-add peration | |
yistfft = yistfft * gain | |
y = np.hstack((y, yistfft)) | |
return y * np.max(np.abs(x)) / np.max(np.abs(y)) | |
infile, tsfile, psfile = 'sent1.wav', 'out1.wav', 'out2.wav' | |
Fs, x = wavfile.read(infile) | |
import sys | |
scale = 2. | |
y = phase_voc(x, 2., L = 1024, H = 128, win = hamming) | |
wavfile.write(tsfile , Fs, np.array(y, dtype = 'int16')) | |
wavfile.write(psfile , Fs * scale, np.array(y, dtype = 'int16')) | |
_, x2 = wavfile.read(psfile) | |
from os import system | |
try: | |
system('play '+ infile + ' ' + tsfile + ' ' + psfile) | |
print 'played' | |
except: | |
print 'not played' | |
import pylab as plt | |
fig = plt.figure(figsize = (14, 9)) | |
fig.add_subplot(321) | |
axis1 = np.linspace(0, 6000. / Fs, num = 6000) | |
plt.plot(axis1, x[34000:40000]) | |
plt.title('Original') | |
fig.add_subplot(323) | |
axis2 = np.linspace(0, 12000./Fs, num = 12000) | |
plt.plot(axis2, y[68000:80000]) | |
plt.title('Time Stretched') | |
fig.add_subplot(325) | |
axis3 = np.linspace(0, 12000./ (scale * Fs), num = 12000) | |
plt.plot(axis3, x2[68000:80000]) | |
plt.title('Pitch Scaled') | |
fig.add_subplot(322) | |
plt.plot(axis1, np.abs(fftshift(fft(x[34000:40000])))) | |
plt.title('Original') | |
fig.add_subplot(324) | |
plt.plot(axis2, np.abs(fftshift(fft(y[68000:80000])))) | |
plt.title('Time Stretched') | |
fig.add_subplot(326) | |
plt.plot(axis3, np.abs(fftshift(fft(x2[68000:80000])))) | |
plt.title('Pitch Scaled') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
My files keep getting clipped on the end. Any idea why this might be happening?