Created
June 27, 2018 23:50
-
-
Save syncopika/a045190423c884e4c5ad2ce8925dce92 to your computer and use it in GitHub Desktop.
remove vocals from a stereo .wav file with SDL_Audio!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <SDL.h> | |
// some resources I visited | |
// https://adamtcroft.com/playing-sound-with-sdl-c/ | |
// https://davidgow.net/handmadepenguin/ch7.html | |
// https://gigi.nullneuron.net/gigilabs/playing-a-wav-file-using-sdl2/ | |
// https://forums.libsdl.org/viewtopic.php?p=50240 | |
// https://stackoverflow.com/questions/10110905/simple-sound-wave-generator-with-sdl-in-c | |
// https://stackoverflow.com/questions/8258398/c-how-to-combine-two-signed-8-bit-numbers-to-a-16-bit-short-unexplainable-res | |
// https://stackoverflow.com/questions/28632721/does-16bit-integer-pcm-data-mean-its-signed-or-unsigned | |
// https://stackoverflow.com/questions/34897166/sdl2-audio-callback-restricted-to-8bit | |
// https://opensource.apple.com/source/IOAudioFamily/IOAudioFamily-183.4.2/Examples/Templates/SamplePCIAudioDriver/SampleAudioClip.cpp.auto.html | |
// https://stackoverflow.com/questions/15087668/how-to-convert-pcm-samples-in-byte-array-as-floating-point-numbers-in-the-range | |
// https://stackoverflow.com/questions/46177712/convert-int16-array-to-float | |
// some wav file info | |
// http://www.cplusplus.com/forum/general/205408/ | |
// audio data struct that callback will use | |
struct AudioData{ | |
Uint8* position; | |
Uint32 length; | |
}; | |
// define an audio callback that SDL_AudioSpec will use | |
void audioCallback(void* userData, Uint8* stream, int length){ | |
AudioData* audio = (AudioData*)userData; | |
float* streamF = (float *)stream; | |
if(audio->length == 0){ | |
// stop playing stream here?? | |
return; | |
} | |
// length is number of bytes of userData's audio data | |
Uint32 len = (Uint32)length; | |
if(len > audio->length){ | |
len = audio->length; | |
} | |
// copy len bytes from audio stream at audio->position to stream buffer | |
SDL_memcpy(streamF, audio->position, len); | |
audio->position += len; | |
audio->length -= len; | |
} | |
int main(int argc, char **argv){ | |
// initialize SDL before doing anything else | |
if(SDL_Init(SDL_INIT_AUDIO) != 0){ | |
std::cout << "Error initializing SDL!" << std::endl; | |
return 1; | |
} | |
// set up an AudioSpec to load in the file | |
SDL_AudioSpec wavSpec; | |
Uint8* wavStart; | |
Uint32 wavLength; | |
std::string file = /* path to wav file - make sure to escape any slashes */ | |
std::cout << "the file is: " << file << std::endl; | |
// load the wav file and some of its properties to the specified variables | |
if(SDL_LoadWAV(file.c_str(), &wavSpec, &wavStart, &wavLength) == NULL){ | |
std::cout << "couldn't load wav file" << std::endl; | |
return 1; | |
} | |
// convert audio data to F32 | |
SDL_AudioCVT cvt; | |
SDL_BuildAudioCVT(&cvt, AUDIO_S16, 2, 48000, AUDIO_F32, 2, 48000); | |
cvt.len = wavLength; | |
cvt.buf = (Uint8 *)SDL_malloc(cvt.len * cvt.len_mult); | |
// copy current audio data to the buffer (dest, src, len) | |
SDL_memcpy(cvt.buf, wavStart, wavLength); // wavLength is the total number of bytes the audio data takes up | |
SDL_ConvertAudio(&cvt); | |
// audio data is now in float form! | |
float* newData = (float *)cvt.buf; | |
std::vector<float> leftChannel; | |
std::vector<float> rightChannel; | |
// divide by 4 since cvt.len_cvt is total bytes of the buffer, and 4 bytes per float | |
int floatBufLen = (int)cvt.len_cvt / 4; | |
int count = 0; // if 0, left channel. 1 for right channel | |
for(int i = 0; i < floatBufLen; i++){ | |
if(count == 0){ | |
leftChannel.push_back(newData[i]); | |
count++; | |
}else{ | |
rightChannel.push_back(newData[i]); | |
count--; | |
} | |
} | |
// now eliminate the vocal by getting the diff between left and right and dividing by 2 | |
std::vector<float> modifiedData; | |
for(int j = 0; j < (int)leftChannel.size(); j++){ | |
float temp = (leftChannel[j] - rightChannel[j]) / 2.0; | |
modifiedData.push_back(temp); | |
} | |
// set up another SDL_AudioSpec with 1 channel to play the modified audio buffer of wavSpec | |
SDL_AudioSpec karaokeAudio; | |
karaokeAudio.freq = wavSpec.freq; | |
karaokeAudio.format = AUDIO_F32; | |
karaokeAudio.channels = 1; | |
karaokeAudio.samples = wavSpec.samples; | |
karaokeAudio.callback = audioCallback; | |
AudioData audio; | |
audio.position = (Uint8*)modifiedData.data(); | |
audio.length = (Uint32)(modifiedData.size() * sizeof(float)); | |
karaokeAudio.userdata = &audio; | |
SDL_AudioDeviceID audioDevice; | |
audioDevice = SDL_OpenAudioDevice(NULL, 0, &karaokeAudio, NULL, 0); | |
// play | |
SDL_PauseAudioDevice(audioDevice, 0); | |
while(audio.length > 0){ | |
SDL_Delay(1000); // set some delay so program doesn't immediately quit | |
} | |
// done playing audio | |
SDL_CloseAudioDevice(audioDevice); | |
SDL_FreeWAV(wavStart); | |
SDL_Quit(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment