Skip to content

Instantly share code, notes, and snippets.

@syncopika
Created June 27, 2018 23:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save syncopika/a045190423c884e4c5ad2ce8925dce92 to your computer and use it in GitHub Desktop.
Save syncopika/a045190423c884e4c5ad2ce8925dce92 to your computer and use it in GitHub Desktop.
remove vocals from a stereo .wav file with SDL_Audio!
#include <iostream>
#include <vector>
#include <SDL.h>
// some resources I visited
// https://adamtcroft.com/playing-sound-with-sdl-c/
// https://davidgow.net/handmadepenguin/ch7.html
// https://gigi.nullneuron.net/gigilabs/playing-a-wav-file-using-sdl2/
// https://forums.libsdl.org/viewtopic.php?p=50240
// https://stackoverflow.com/questions/10110905/simple-sound-wave-generator-with-sdl-in-c
// https://stackoverflow.com/questions/8258398/c-how-to-combine-two-signed-8-bit-numbers-to-a-16-bit-short-unexplainable-res
// https://stackoverflow.com/questions/28632721/does-16bit-integer-pcm-data-mean-its-signed-or-unsigned
// https://stackoverflow.com/questions/34897166/sdl2-audio-callback-restricted-to-8bit
// https://opensource.apple.com/source/IOAudioFamily/IOAudioFamily-183.4.2/Examples/Templates/SamplePCIAudioDriver/SampleAudioClip.cpp.auto.html
// https://stackoverflow.com/questions/15087668/how-to-convert-pcm-samples-in-byte-array-as-floating-point-numbers-in-the-range
// https://stackoverflow.com/questions/46177712/convert-int16-array-to-float
// some wav file info
// http://www.cplusplus.com/forum/general/205408/
// audio data struct that callback will use
struct AudioData{
Uint8* position;
Uint32 length;
};
// define an audio callback that SDL_AudioSpec will use
void audioCallback(void* userData, Uint8* stream, int length){
AudioData* audio = (AudioData*)userData;
float* streamF = (float *)stream;
if(audio->length == 0){
// stop playing stream here??
return;
}
// length is number of bytes of userData's audio data
Uint32 len = (Uint32)length;
if(len > audio->length){
len = audio->length;
}
// copy len bytes from audio stream at audio->position to stream buffer
SDL_memcpy(streamF, audio->position, len);
audio->position += len;
audio->length -= len;
}
int main(int argc, char **argv){
// initialize SDL before doing anything else
if(SDL_Init(SDL_INIT_AUDIO) != 0){
std::cout << "Error initializing SDL!" << std::endl;
return 1;
}
// set up an AudioSpec to load in the file
SDL_AudioSpec wavSpec;
Uint8* wavStart;
Uint32 wavLength;
std::string file = /* path to wav file - make sure to escape any slashes */
std::cout << "the file is: " << file << std::endl;
// load the wav file and some of its properties to the specified variables
if(SDL_LoadWAV(file.c_str(), &wavSpec, &wavStart, &wavLength) == NULL){
std::cout << "couldn't load wav file" << std::endl;
return 1;
}
// convert audio data to F32
SDL_AudioCVT cvt;
SDL_BuildAudioCVT(&cvt, AUDIO_S16, 2, 48000, AUDIO_F32, 2, 48000);
cvt.len = wavLength;
cvt.buf = (Uint8 *)SDL_malloc(cvt.len * cvt.len_mult);
// copy current audio data to the buffer (dest, src, len)
SDL_memcpy(cvt.buf, wavStart, wavLength); // wavLength is the total number of bytes the audio data takes up
SDL_ConvertAudio(&cvt);
// audio data is now in float form!
float* newData = (float *)cvt.buf;
std::vector<float> leftChannel;
std::vector<float> rightChannel;
// divide by 4 since cvt.len_cvt is total bytes of the buffer, and 4 bytes per float
int floatBufLen = (int)cvt.len_cvt / 4;
int count = 0; // if 0, left channel. 1 for right channel
for(int i = 0; i < floatBufLen; i++){
if(count == 0){
leftChannel.push_back(newData[i]);
count++;
}else{
rightChannel.push_back(newData[i]);
count--;
}
}
// now eliminate the vocal by getting the diff between left and right and dividing by 2
std::vector<float> modifiedData;
for(int j = 0; j < (int)leftChannel.size(); j++){
float temp = (leftChannel[j] - rightChannel[j]) / 2.0;
modifiedData.push_back(temp);
}
// set up another SDL_AudioSpec with 1 channel to play the modified audio buffer of wavSpec
SDL_AudioSpec karaokeAudio;
karaokeAudio.freq = wavSpec.freq;
karaokeAudio.format = AUDIO_F32;
karaokeAudio.channels = 1;
karaokeAudio.samples = wavSpec.samples;
karaokeAudio.callback = audioCallback;
AudioData audio;
audio.position = (Uint8*)modifiedData.data();
audio.length = (Uint32)(modifiedData.size() * sizeof(float));
karaokeAudio.userdata = &audio;
SDL_AudioDeviceID audioDevice;
audioDevice = SDL_OpenAudioDevice(NULL, 0, &karaokeAudio, NULL, 0);
// play
SDL_PauseAudioDevice(audioDevice, 0);
while(audio.length > 0){
SDL_Delay(1000); // set some delay so program doesn't immediately quit
}
// done playing audio
SDL_CloseAudioDevice(audioDevice);
SDL_FreeWAV(wavStart);
SDL_Quit();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment