-
-
Save roxlu/62f717bbaa69ac7196be to your computer and use it in GitHub Desktop.
libav how to encode audio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bool AV::addAudioFrame(unsigned char* buffer, int nsamples, int nchannels) { | |
AVCodecContext* c = ct.as->codec; | |
AVPacket packet = {0}; // data and size must be '0' (allocation is done for you :> ) | |
AVFrame* frame = avcodec_alloc_frame(); | |
int got_packet = 0; | |
// BUFFER HANDLING | |
int samples_stored = av_audio_fifo_write(ct.afifo, (void**)&buffer, nsamples); | |
if(samples_stored != nsamples) { | |
return false; | |
} | |
int nstored = av_audio_fifo_size(ct.afifo); | |
if(nstored < c->frame_size) { | |
return false; | |
} | |
av_init_packet(&packet); | |
int use_nsamples = c->frame_size; | |
frame->nb_samples = use_nsamples; // <-- important, must be set before avcodec_fill_audio_frame | |
// GET DATA FROM BUFFER | |
int num_bytes = av_samples_get_buffer_size(NULL, c->channels, use_nsamples, c->sample_fmt, 0); | |
uint8_t* my_buffer = (uint8_t*)av_malloc(num_bytes); | |
uint8_t** my_ptr = &my_buffer; | |
int nread = av_audio_fifo_read(ct.afifo, (void**)my_ptr, use_nsamples); | |
if(nread != use_nsamples) { | |
printf("We only read: %d but we wanted to read %d samples.\n", nread, use_nsamples); | |
av_free(my_buffer); | |
return false; | |
} | |
// FILL | |
int fill_result = avcodec_fill_audio_frame( | |
frame | |
,c->channels | |
,c->sample_fmt | |
,(uint8_t*)my_buffer | |
,num_bytes | |
,1 | |
); | |
if(fill_result != 0) { | |
char buf[1024]; | |
av_strerror(fill_result, buf, 1024); | |
printf("av error: %s\n",buf); | |
av_free(my_buffer); | |
return false; | |
} | |
// ENCODE | |
int enc_result = avcodec_encode_audio2(c, &packet, frame, &got_packet); | |
packet.stream_index = ct.as->index; | |
if(!got_packet) { | |
av_free(my_buffer); | |
return false; | |
} | |
if(enc_result < 0) { | |
char buf[1024]; | |
av_strerror(enc_result, buf, 1024); | |
printf("av error: %s\n",buf); | |
} | |
// WRITE | |
if(av_interleaved_write_frame(ct.c, &packet) != 0) { | |
printf("Cannot write audio frame.\n"); | |
av_free(my_buffer); | |
return false; | |
} | |
av_free(my_buffer); | |
return true; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Hi! | |
I'm using libav to encode raw RGB24 frames to h264 and muxing it to flv. This works | |
all fine and I've streamed for more then 48 hours w/o any problems! My next step | |
is to add audio to the stream. I'll be capturing live audio and I want to encode it | |
in real time using speex, mp3 or nelly moser. | |
Background info | |
---------------------- | |
I'm new to digital audio and therefore I might be doing things wrong. But basically my application | |
gets a "float" buffer with interleaved audio. The buffer contains 256 samples per channel, | |
and I have 2 channels. Because I might be mixing terminology, this is how I use the | |
data: | |
// input = array with audio samples | |
// bufferSize = 256 | |
// nChannels = 2 | |
void audioIn(float * input, int bufferSize, int nChannels) { | |
// convert from float to S16 | |
short* buf = new signed short[bufferSize * 2]; | |
for(int i = 0; i < bufferSize; ++i) { // loop over all samples | |
int dx = i * 2; | |
buf[dx + 0] = (float)input[dx + 0] * numeric_limits<short>::max(); // convert frame of the first channel | |
buf[dx + 1] = (float)input[dx + 1] * numeric_limits<short>::max(); // convert frame of the second channel | |
} | |
// add this to the libav wrapper. | |
av.addAudioFrame((unsigned char*)buf, bufferSize, nChannels); | |
delete[] buf; | |
} | |
Now that I have a buffer, where each sample is 16 bits, I pass this short* buffer, to my | |
wrapper "av.addAudioFrame()" function. In this function I create a buffer, before I encode | |
the audio. From what I read, the AVCodecContext of the audio encoder sets the frame_size. | |
This frame_size must match the number of samples in the buffer when calling avcodec_encode_audio2(). | |
Why I think this, is because of what is documented here: | |
http://libav.org/doxygen/master/group__lavc__encoding.html#ga93a49fbd0973b216dcb8a8c5dffe1d82 | |
Then, especially the line: "If it is not set, frame->nb_samples must be equal to avctx->frame_size | |
for all frames except the last." (Please correct me here if I'm wrong about this). | |
After encoding I call av_interleaved_write_frame() to actually write the frame. | |
When I use mp3 as codec my application runs for about 1-2 minutes and then my server, which is | |
receiving the video/audio stream (flv, tcp), disconnects with a message "Frame too large: 14485504". | |
Also in my logs I see several messages from libav: "Stream over/underflow detected." | |
Questions: | |
------------ | |
- There quite some bits I'm not sure of, even when going through the source code of libav and therefore | |
I hope if someone has an working example of encoding audio which comes from a buffer which which | |
comes from "outside" libav (i.e. your own application). | |
- As I wrote above I need to keep track of a buffer before I can encode. | |
Does someone else has some code which does this? I'm using AVAudioFifo now. | |
- I compiled with --enable-debug=3 and disable optimizations, but I'm not seeing any | |
debug information. How can I make libav more verbose? | |
Thanks! | |
roxlu |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment