syncopika/remove_vocals_SDL.cpp

## remove_vocals_SDL.cpp
#include <iostream>
#include <vector>
#include <SDL.h>

// some resources I visited
// https://adamtcroft.com/playing-sound-with-sdl-c/
// https://davidgow.net/handmadepenguin/ch7.html
// https://gigi.nullneuron.net/gigilabs/playing-a-wav-file-using-sdl2/
// https://forums.libsdl.org/viewtopic.php?p=50240
// https://stackoverflow.com/questions/10110905/simple-sound-wave-generator-with-sdl-in-c
// https://stackoverflow.com/questions/8258398/c-how-to-combine-two-signed-8-bit-numbers-to-a-16-bit-short-unexplainable-res
// https://stackoverflow.com/questions/28632721/does-16bit-integer-pcm-data-mean-its-signed-or-unsigned
// https://stackoverflow.com/questions/34897166/sdl2-audio-callback-restricted-to-8bit
// https://opensource.apple.com/source/IOAudioFamily/IOAudioFamily-183.4.2/Examples/Templates/SamplePCIAudioDriver/SampleAudioClip.cpp.auto.html
// https://stackoverflow.com/questions/15087668/how-to-convert-pcm-samples-in-byte-array-as-floating-point-numbers-in-the-range
// https://stackoverflow.com/questions/46177712/convert-int16-array-to-float

// some wav file info
// http://www.cplusplus.com/forum/general/205408/


// audio data struct that callback will use
struct AudioData{
	Uint8* position;
	Uint32 length;
};


// define an audio callback that SDL_AudioSpec will use
void audioCallback(void* userData, Uint8* stream, int length){

	AudioData* audio = (AudioData*)userData;
	float* streamF = (float *)stream;

	if(audio->length == 0){
		// stop playing stream here??
		return;
	}

	// length is number of bytes of userData's audio data
	Uint32 len = (Uint32)length;

	if(len > audio->length){
		len = audio->length;
	}

	// copy len bytes from audio stream at audio->position to stream buffer
	SDL_memcpy(streamF, audio->position, len);

	audio->position += len;
	audio->length -= len;
}


int main(int argc, char **argv){

	// initialize SDL before doing anything else
	if(SDL_Init(SDL_INIT_AUDIO) != 0){
		std::cout << "Error initializing SDL!" << std::endl;
		return 1;
	}

	// set up an AudioSpec to load in the file
	SDL_AudioSpec wavSpec;
	Uint8* wavStart;
	Uint32 wavLength;
	std::string file = /* path to wav file - make sure to escape any slashes */
	std::cout << "the file is: " << file << std::endl;

	// load the wav file and some of its properties to the specified variables
	if(SDL_LoadWAV(file.c_str(), &wavSpec, &wavStart, &wavLength) == NULL){
		std::cout << "couldn't load wav file" << std::endl;
		return 1;
	}

	// convert audio data to F32
	SDL_AudioCVT cvt;
	SDL_BuildAudioCVT(&cvt, AUDIO_S16, 2, 48000, AUDIO_F32, 2, 48000);
	cvt.len = wavLength;
	cvt.buf = (Uint8 *)SDL_malloc(cvt.len * cvt.len_mult);

	// copy current audio data to the buffer (dest, src, len)
	SDL_memcpy(cvt.buf, wavStart, wavLength); // wavLength is the total number of bytes the audio data takes up
	SDL_ConvertAudio(&cvt);

	// audio data is now in float form!
	float* newData = (float *)cvt.buf;

	std::vector<float> leftChannel;
	std::vector<float> rightChannel;

	// divide by 4 since cvt.len_cvt is total bytes of the buffer, and 4 bytes per float
	int floatBufLen = (int)cvt.len_cvt / 4;
	int count = 0; // if 0, left channel. 1 for right channel
	for(int i = 0; i < floatBufLen; i++){
		if(count == 0){
			leftChannel.push_back(newData[i]);
			count++;
		}else{
			rightChannel.push_back(newData[i]);
			count--;
		}
	}

	// now eliminate the vocal by getting the diff between left and right and dividing by 2
	std::vector<float> modifiedData;
	for(int j = 0; j < (int)leftChannel.size(); j++){
		float temp = (leftChannel[j] - rightChannel[j]) / 2.0;
		modifiedData.push_back(temp);
	}

	// set up another SDL_AudioSpec with 1 channel to play the modified audio buffer of wavSpec
	SDL_AudioSpec karaokeAudio;
	karaokeAudio.freq = wavSpec.freq;
	karaokeAudio.format = AUDIO_F32;
	karaokeAudio.channels = 1;
	karaokeAudio.samples = wavSpec.samples;
	karaokeAudio.callback = audioCallback;

	AudioData audio;
	audio.position = (Uint8*)modifiedData.data();
	audio.length = (Uint32)(modifiedData.size() * sizeof(float));

	karaokeAudio.userdata = &audio;

	SDL_AudioDeviceID audioDevice;
	audioDevice = SDL_OpenAudioDevice(NULL, 0, &karaokeAudio, NULL, 0);

	// play
	SDL_PauseAudioDevice(audioDevice, 0);

	while(audio.length > 0){
		SDL_Delay(1000); // set some delay so program doesn't immediately quit
	}

	// done playing audio
	SDL_CloseAudioDevice(audioDevice);
	SDL_FreeWAV(wavStart);
	SDL_Quit();

	return 0;
}
	#include <iostream>
	#include <vector>
	#include <SDL.h>

	// some resources I visited
	// https://adamtcroft.com/playing-sound-with-sdl-c/
	// https://davidgow.net/handmadepenguin/ch7.html
	// https://gigi.nullneuron.net/gigilabs/playing-a-wav-file-using-sdl2/
	// https://forums.libsdl.org/viewtopic.php?p=50240
	// https://stackoverflow.com/questions/10110905/simple-sound-wave-generator-with-sdl-in-c
	// https://stackoverflow.com/questions/8258398/c-how-to-combine-two-signed-8-bit-numbers-to-a-16-bit-short-unexplainable-res
	// https://stackoverflow.com/questions/28632721/does-16bit-integer-pcm-data-mean-its-signed-or-unsigned
	// https://stackoverflow.com/questions/34897166/sdl2-audio-callback-restricted-to-8bit
	// https://opensource.apple.com/source/IOAudioFamily/IOAudioFamily-183.4.2/Examples/Templates/SamplePCIAudioDriver/SampleAudioClip.cpp.auto.html
	// https://stackoverflow.com/questions/15087668/how-to-convert-pcm-samples-in-byte-array-as-floating-point-numbers-in-the-range
	// https://stackoverflow.com/questions/46177712/convert-int16-array-to-float

	// some wav file info
	// http://www.cplusplus.com/forum/general/205408/



	// audio data struct that callback will use
	struct AudioData{
	Uint8* position;
	Uint32 length;
	};


	// define an audio callback that SDL_AudioSpec will use
	void audioCallback(void* userData, Uint8* stream, int length){

	AudioData* audio = (AudioData*)userData;
	float* streamF = (float *)stream;

	if(audio->length == 0){
	// stop playing stream here??
	return;
	}

	// length is number of bytes of userData's audio data
	Uint32 len = (Uint32)length;

	if(len > audio->length){
	len = audio->length;
	}

	// copy len bytes from audio stream at audio->position to stream buffer
	SDL_memcpy(streamF, audio->position, len);

	audio->position += len;
	audio->length -= len;
	}


	int main(int argc, char **argv){

	// initialize SDL before doing anything else
	if(SDL_Init(SDL_INIT_AUDIO) != 0){
	std::cout << "Error initializing SDL!" << std::endl;
	return 1;
	}

	// set up an AudioSpec to load in the file
	SDL_AudioSpec wavSpec;
	Uint8* wavStart;
	Uint32 wavLength;
	std::string file = /* path to wav file - make sure to escape any slashes */
	std::cout << "the file is: " << file << std::endl;

	// load the wav file and some of its properties to the specified variables
	if(SDL_LoadWAV(file.c_str(), &wavSpec, &wavStart, &wavLength) == NULL){
	std::cout << "couldn't load wav file" << std::endl;
	return 1;
	}

	// convert audio data to F32
	SDL_AudioCVT cvt;
	SDL_BuildAudioCVT(&cvt, AUDIO_S16, 2, 48000, AUDIO_F32, 2, 48000);
	cvt.len = wavLength;
	cvt.buf = (Uint8 )SDL_malloc(cvt.len cvt.len_mult);

	// copy current audio data to the buffer (dest, src, len)
	SDL_memcpy(cvt.buf, wavStart, wavLength); // wavLength is the total number of bytes the audio data takes up
	SDL_ConvertAudio(&cvt);

	// audio data is now in float form!
	float* newData = (float *)cvt.buf;

	std::vector<float> leftChannel;
	std::vector<float> rightChannel;

	// divide by 4 since cvt.len_cvt is total bytes of the buffer, and 4 bytes per float
	int floatBufLen = (int)cvt.len_cvt / 4;
	int count = 0; // if 0, left channel. 1 for right channel
	for(int i = 0; i < floatBufLen; i++){
	if(count == 0){
	leftChannel.push_back(newData[i]);
	count++;
	}else{
	rightChannel.push_back(newData[i]);
	count--;
	}
	}

	// now eliminate the vocal by getting the diff between left and right and dividing by 2
	std::vector<float> modifiedData;
	for(int j = 0; j < (int)leftChannel.size(); j++){
	float temp = (leftChannel[j] - rightChannel[j]) / 2.0;
	modifiedData.push_back(temp);
	}

	// set up another SDL_AudioSpec with 1 channel to play the modified audio buffer of wavSpec
	SDL_AudioSpec karaokeAudio;
	karaokeAudio.freq = wavSpec.freq;
	karaokeAudio.format = AUDIO_F32;
	karaokeAudio.channels = 1;
	karaokeAudio.samples = wavSpec.samples;
	karaokeAudio.callback = audioCallback;

	AudioData audio;
	audio.position = (Uint8*)modifiedData.data();
	audio.length = (Uint32)(modifiedData.size() * sizeof(float));

	karaokeAudio.userdata = &audio;

	SDL_AudioDeviceID audioDevice;
	audioDevice = SDL_OpenAudioDevice(NULL, 0, &karaokeAudio, NULL, 0);

	// play
	SDL_PauseAudioDevice(audioDevice, 0);

	while(audio.length > 0){
	SDL_Delay(1000); // set some delay so program doesn't immediately quit
	}

	// done playing audio
	SDL_CloseAudioDevice(audioDevice);
	SDL_FreeWAV(wavStart);
	SDL_Quit();

	return 0;
	}