Created
April 7, 2013 03:08
-
-
Save asquared/5328733 to your computer and use it in GitHub Desktop.
Speed up or slow down audio without changing the pitch. Uses the fftw FFT library. Compile with g++ -std=c++11 -g -W -Wall -o phase_vocoder_simple phase_vocoder_simple.cpp -lfftw3f. Run with ./phase_vocoder_simple < raw_floating_point_samples > output_raw_floating_point_samples.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2013 Andrew Armenia | |
* | |
* This program is free software: you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation, either version 3 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program. If not, see <http://www.gnu.org/licenses/>. | |
*/ | |
#include <stdexcept> | |
#include <vector> | |
#include <fftw3.h> | |
#include <math.h> | |
#include <stddef.h> | |
#include <unistd.h> | |
using namespace std; | |
/* does this work? */ | |
static const float pi = 4*atanf(1); | |
void hann_window(float *target, size_t n) { | |
size_t k; | |
for (k = 0; k < n; k++) { | |
target[k] = 0.5 * (1 - cosf(2*pi*k/(n-1))); | |
} | |
} | |
template <class T, class U> | |
void numarray_copy(T *dst, const U *src, size_t sz) { | |
while (sz != 0) { | |
sz--; | |
dst[sz] = src[sz]; | |
} | |
} | |
template <class T, class U> | |
void numarray_mult(T *srcdst, const U *src, size_t sz) { | |
while (sz != 0) { | |
sz--; | |
srcdst[sz] *= src[sz]; | |
} | |
} | |
template <class T, class U> | |
void numarray_cdiv(T *srcdst, U src, size_t sz) { | |
while (sz != 0) { | |
sz--; | |
srcdst[sz] /= src; | |
} | |
} | |
template <class T, class U> | |
void numarray_set(T *srcdst, U src, size_t sz) { | |
while (sz != 0) { | |
sz--; | |
srcdst[sz] = src; | |
} | |
} | |
template <class T> | |
T *numarray_acopy(const T *src, size_t sz) { | |
T *ret = new T[sz]; | |
while (sz != 0) { | |
sz--; | |
ret[sz] = src[sz]; | |
} | |
return ret; | |
} | |
template <class T, class U> | |
void numarray_add(T *srcdst, const U *src, size_t sz) { | |
while (sz != 0) { | |
sz--; | |
srcdst[sz] += src[sz]; | |
} | |
} | |
template <class T> | |
void numarray_pop(T *array, size_t n, size_t sz) { | |
size_t i; | |
for (i = n; i < sz; i++) { | |
array[i - n] = array[i]; | |
} | |
for (i = sz - n; i < sz; i++) { | |
array[i] = (T) 0; | |
} | |
} | |
/* convert FFTW halfcomplex format between rectangular and polar forms */ | |
void make_polar(float *frame, size_t sz) { | |
/* | |
* 0..sz/2 are real components. | |
* sz/2+1..sz-1 are imaginary components. | |
* (DC and Nyquist components have no imaginary part) | |
* | |
* e.g. for 8 point FFT we have R R R R R I I I | |
*/ | |
float *rmp = &frame[sz/2 - 1]; | |
float *ipp = &frame[sz/2 + 1]; | |
float re, im, mag, phase; | |
size_t i; | |
for (i = 0; i < sz/2 - 1; i++) { | |
re = *rmp; | |
im = *ipp; | |
mag = sqrtf(re*re + im*im); | |
phase = atan2f(im, re); | |
*rmp = mag; | |
*ipp = phase; | |
rmp--; | |
ipp++; | |
} | |
} | |
void make_rectangular(float *frame, size_t sz) { | |
/* inverse of make_polar */ | |
float *rmp = &frame[sz/2 - 1]; | |
float *ipp = &frame[sz/2 + 1]; | |
float mag, phase, re, im; | |
size_t i; | |
for (i = 0; i < sz/2 - 1; i++) { | |
mag = *rmp; | |
phase = *ipp; | |
re = mag * cosf(phase); | |
im = mag * sinf(phase); | |
*rmp = re; | |
*ipp = im; | |
rmp--; | |
ipp++; | |
} | |
} | |
void interpolate_polar(float *accum, const float *f1, const float *f2, size_t sz, float pos) { | |
size_t i; | |
/* magnitude interpolation */ | |
for (i = 0; i <= sz/2; i++) { | |
accum[i] = f1[i] * (1.0f-pos) + f2[i] * pos; | |
} | |
/* phase delta/accumulation */ | |
for (i = sz/2 + 1; i < sz; i++) { | |
accum[i] += (f2[i] - f1[i]); | |
if (accum[i] > pi) { | |
accum[i] -= 2*pi; | |
} | |
if (accum[i] < -pi) { | |
accum[i] += 2*pi; | |
} | |
} | |
} | |
ssize_t read_all(int fd, void *data, size_t size) { | |
ssize_t nread; | |
uint8_t *buf = (uint8_t *) data; | |
while (size > 0) { | |
nread = read(fd, buf, size); | |
if (nread > 0) { | |
size -= nread; | |
buf += nread; | |
} else if (nread == 0) { | |
/* EOF */ | |
return 0; | |
} else { | |
if (errno == EAGAIN || errno == EINTR) { | |
/* don't worry about these */ | |
} else { | |
/* return with error to caller */ | |
return -1; | |
} | |
} | |
} | |
return 1; | |
} | |
ssize_t write_all(int fd, const void *data, size_t size) { | |
ssize_t written; | |
uint8_t *buf = (uint8_t *) data; | |
while (size > 0) { | |
written = write(fd, buf, size); | |
if (written > 0) { | |
size -= written; | |
buf += written; | |
} else if (written == 0) { | |
/* what exactly does this mean? we'll try again */ | |
} else { | |
if (errno == EAGAIN || errno == EINTR) { | |
/* don't worry about these */ | |
} else { | |
/* exit (pass errno through to caller) */ | |
return -1; | |
} | |
} | |
} | |
return 1; | |
} | |
int main( ) { | |
const size_t FFT_SIZE = 2048; | |
const size_t HOP_SIZE = FFT_SIZE/32; | |
unsigned int speed_mult = 6; | |
unsigned int speed_div = 8; | |
size_t fno; | |
float *analyze_buffer; | |
float *fft_input; | |
float *fft_result; | |
float *synth_buffer; | |
float *window; | |
float *phase_accum; | |
vector<float *> fft_frames; | |
fftwf_plan p, ip; | |
/* allocate arrays */ | |
window = (float *)fftwf_malloc(FFT_SIZE * sizeof(float)); | |
analyze_buffer = (float *)fftwf_malloc(FFT_SIZE * sizeof(float)); | |
fft_input = (float *)fftwf_malloc(FFT_SIZE * sizeof(float)); | |
fft_result = (float *)fftwf_malloc(FFT_SIZE * sizeof(float)); | |
synth_buffer = (float *)fftwf_malloc(FFT_SIZE * sizeof(float)); | |
phase_accum = (float *)fftwf_malloc(FFT_SIZE * sizeof(float)); | |
/* generate windowing function */ | |
hann_window(window, FFT_SIZE); | |
/* create FFT plans */ | |
p = fftwf_plan_r2r_1d(FFT_SIZE, fft_input, fft_result, | |
FFTW_R2HC, FFTW_ESTIMATE); | |
if (p == NULL) { | |
throw std::runtime_error("fftwf_plan_r2r_1d failed"); | |
} | |
ip = fftwf_plan_r2r_1d(FFT_SIZE, fft_input, fft_result, | |
FFTW_HC2R, FFTW_ESTIMATE); | |
if (ip == NULL) { | |
throw std::runtime_error("fftwf_plan_r2r_1d failed"); | |
} | |
/* preload FFT input buffer */ | |
if ( | |
read_all( | |
STDIN_FILENO, | |
analyze_buffer, | |
FFT_SIZE * sizeof(float) | |
) <= 0 | |
) { | |
throw std::runtime_error("insufficient input"); | |
} | |
/* phase 1: FFT analysis */ | |
for (;;) { | |
/* copy data */ | |
numarray_copy(fft_input, analyze_buffer, FFT_SIZE); | |
/* apply windowing function */ | |
numarray_mult(fft_input, window, FFT_SIZE); | |
/* do an FFT */ | |
fftwf_execute_r2r(p, fft_input, fft_result); | |
/* convert to polar format */ | |
make_polar(fft_result, FFT_SIZE); | |
/* save result */ | |
fft_frames.push_back(numarray_acopy(fft_result, FFT_SIZE)); | |
/* pop a hop off our buffer */ | |
numarray_pop(analyze_buffer, HOP_SIZE, FFT_SIZE); | |
/* try to read some more data */ | |
if ( | |
read_all( | |
STDIN_FILENO, | |
&analyze_buffer[FFT_SIZE - HOP_SIZE], | |
HOP_SIZE * sizeof(float) | |
) <= 0 | |
) { | |
break; | |
} | |
} | |
/* phase 2: re-synthesis */ | |
numarray_set(synth_buffer, 0, FFT_SIZE); | |
numarray_set(phase_accum, 0, FFT_SIZE); | |
for (int i = 0;; i++) { | |
fno = i * speed_mult / speed_div; | |
if (fno >= fft_frames.size( ) - 1) { | |
break; | |
} | |
/* copy data */ | |
interpolate_polar(phase_accum, fft_frames[fno], fft_frames[fno+1], | |
FFT_SIZE, float(i % speed_div)/speed_div); | |
numarray_copy(fft_input, phase_accum, FFT_SIZE); | |
/* convert to rectangular format */ | |
make_rectangular(fft_input, FFT_SIZE); | |
/* inverse FFT */ | |
fftwf_execute_r2r(ip, fft_input, fft_result); | |
/* apply windowing function */ | |
numarray_mult(fft_result, window, FFT_SIZE); | |
/* divide by scaling factor necessary for reciprocal FFT */ | |
numarray_cdiv(fft_result, FFT_SIZE * 16, FFT_SIZE); | |
/* overlap-add into the existing array */ | |
numarray_add(synth_buffer, fft_result, FFT_SIZE); | |
/* output first HOP_SIZE samples of synth buffer */ | |
write_all(STDOUT_FILENO, synth_buffer, HOP_SIZE * sizeof(float)); | |
/* discard first HOP_SIZE samples from buffer */ | |
numarray_pop(synth_buffer, HOP_SIZE, FFT_SIZE); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment