Skip to content

Instantly share code, notes, and snippets.

@targodan
Last active December 20, 2023 13:39
Show Gist options
  • Save targodan/8cef8f2b682a30055aa7937060cd94b7 to your computer and use it in GitHub Desktop.
Save targodan/8cef8f2b682a30055aa7937060cd94b7 to your computer and use it in GitHub Desktop.
Complementing code for my blog post "Decoding audio files with ffmpeg": https://www.targodan.de/post/decoding-audio-files-with-ffmpeg

Decoding audio files with ffmpeg

The contained code is part of Luca Corbatto's blog post Decoding audio files with ffmpeg . The code may be used as per the MIT License. The full text of the MIT License can be found in the contained file called "LICENSE".

Building the code

For building the code you can just use the provided build script "build.sh".

$ ./build.sh decode

By default the gcc compiler is used, you can change the used decoder by the CC environment variable.

$ CC=g++ ./build.sh decode
#!/bin/bash
if [[ "$CC" == "" ]]; then
CC=gcc
fi
C_FLAGS="$(pkg-config --cflags libavformat libavcodec libavutil)"
LD_FLAGS="$(pkg-config --libs libavformat libavcodec libavutil)"
if [[ "$?" != "0" ]]; then
echo "pkg-config not working... trying some default flags."
FLAGS="-lavformat -lavcodec -lavutil"
fi
echo $CC -g "$1.c" -o "$1" -lm $C_FLAGS $LD_FLAGS
$CC -g "$1.c" -o "$1" -lm $C_FLAGS $LD_FLAGS
#ifdef __cplusplus
extern "C" {
#endif
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#ifdef __cplusplus
}
#endif
#include <stdio.h>
#ifndef __cplusplus
typedef uint8_t bool;
#define true 1
#define false 0
#endif
#ifdef __cplusplus
#define REINTERPRET_CAST(type, variable) reinterpret_cast<type>(variable)
#define STATIC_CAST(type, variable) static_cast<type>(variable)
#else
#define C_CAST(type, variable) ((type)variable)
#define REINTERPRET_CAST(type, variable) C_CAST(type, variable)
#define STATIC_CAST(type, variable) C_CAST(type, variable)
#endif
FILE* outFile;
// #define RAW_OUT_ON_PLANAR false
#define RAW_OUT_ON_PLANAR true
/**
* Print an error string describing the errorCode to stderr.
*/
int printError(const char* prefix, int errorCode) {
if(errorCode == 0) {
return 0;
} else {
const size_t bufsize = 64;
char buf[bufsize];
if(av_strerror(errorCode, buf, bufsize) != 0) {
strcpy(buf, "UNKNOWN_ERROR");
}
fprintf(stderr, "%s (%d: %s)\n", prefix, errorCode, buf);
return errorCode;
}
}
/**
* Extract a single sample and convert to float.
*/
float getSample(const AVCodecContext* codecCtx, uint8_t* buffer, int sampleIndex) {
int64_t val = 0;
float ret = 0;
int sampleSize = av_get_bytes_per_sample(codecCtx->sample_fmt);
switch(sampleSize) {
case 1:
// 8bit samples are always unsigned
val = REINTERPRET_CAST(uint8_t*, buffer)[sampleIndex];
// make signed
val -= 127;
break;
case 2:
val = REINTERPRET_CAST(int16_t*, buffer)[sampleIndex];
break;
case 4:
val = REINTERPRET_CAST(int32_t*, buffer)[sampleIndex];
break;
case 8:
val = REINTERPRET_CAST(int64_t*, buffer)[sampleIndex];
break;
default:
fprintf(stderr, "Invalid sample size %d.\n", sampleSize);
return 0;
}
// Check which data type is in the sample.
switch(codecCtx->sample_fmt) {
case AV_SAMPLE_FMT_U8:
case AV_SAMPLE_FMT_S16:
case AV_SAMPLE_FMT_S32:
case AV_SAMPLE_FMT_U8P:
case AV_SAMPLE_FMT_S16P:
case AV_SAMPLE_FMT_S32P:
// integer => Scale to [-1, 1] and convert to float.
ret = val / STATIC_CAST(float, ((1 << (sampleSize*8-1))-1));
break;
case AV_SAMPLE_FMT_FLT:
case AV_SAMPLE_FMT_FLTP:
// float => reinterpret
ret = *REINTERPRET_CAST(float*, &val);
break;
case AV_SAMPLE_FMT_DBL:
case AV_SAMPLE_FMT_DBLP:
// double => reinterpret and then static cast down
ret = STATIC_CAST(float, *REINTERPRET_CAST(double*, &val));
break;
default:
fprintf(stderr, "Invalid sample format %s.\n", av_get_sample_fmt_name(codecCtx->sample_fmt));
return 0;
}
return ret;
}
/**
* Write the frame to an output file.
*/
void handleFrame(const AVCodecContext* codecCtx, const AVFrame* frame) {
if(av_sample_fmt_is_planar(codecCtx->sample_fmt) == 1) {
// This means that the data of each channel is in its own buffer.
// => frame->extended_data[i] contains data for the i-th channel.
for(int s = 0; s < frame->nb_samples; ++s) {
for(int c = 0; c < codecCtx->channels; ++c) {
float sample = getSample(codecCtx, frame->extended_data[c], s);
fwrite(&sample, sizeof(float), 1, outFile);
}
}
} else {
// This means that the data of each channel is in the same buffer.
// => frame->extended_data[0] contains data of all channels.
if(RAW_OUT_ON_PLANAR) {
fwrite(frame->extended_data[0], 1, frame->linesize[0], outFile);
} else {
for(int s = 0; s < frame->nb_samples; ++s) {
for(int c = 0; c < codecCtx->channels; ++c) {
float sample = getSample(codecCtx, frame->extended_data[0], s*codecCtx->channels+c);
fwrite(&sample, sizeof(float), 1, outFile);
}
}
}
}
}
/**
* Find the first audio stream and returns its index. If there is no audio stream returns -1.
*/
int findAudioStream(const AVFormatContext* formatCtx) {
int audioStreamIndex = -1;
for(size_t i = 0; i < formatCtx->nb_streams; ++i) {
// Use the first audio stream we can find.
// NOTE: There may be more than one, depending on the file.
if(formatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
audioStreamIndex = i;
break;
}
}
return audioStreamIndex;
}
/*
* Print information about the input file and the used codec.
*/
void printStreamInformation(const AVCodec* codec, const AVCodecContext* codecCtx, int audioStreamIndex) {
fprintf(stderr, "Codec: %s\n", codec->long_name);
if(codec->sample_fmts != NULL) {
fprintf(stderr, "Supported sample formats: ");
for(int i = 0; codec->sample_fmts[i] != -1; ++i) {
fprintf(stderr, "%s", av_get_sample_fmt_name(codec->sample_fmts[i]));
if(codec->sample_fmts[i+1] != -1) {
fprintf(stderr, ", ");
}
}
fprintf(stderr, "\n");
}
fprintf(stderr, "---------\n");
fprintf(stderr, "Stream: %7d\n", audioStreamIndex);
fprintf(stderr, "Sample Format: %7s\n", av_get_sample_fmt_name(codecCtx->sample_fmt));
fprintf(stderr, "Sample Rate: %7d\n", codecCtx->sample_rate);
fprintf(stderr, "Sample Size: %7d\n", av_get_bytes_per_sample(codecCtx->sample_fmt));
fprintf(stderr, "Channels: %7d\n", codecCtx->channels);
fprintf(stderr, "Planar: %7d\n", av_sample_fmt_is_planar(codecCtx->sample_fmt));
fprintf(stderr, "Float Output: %7s\n", !RAW_OUT_ON_PLANAR || av_sample_fmt_is_planar(codecCtx->sample_fmt) ? "yes" : "no");
}
/**
* Receive as many frames as available and handle them.
*/
int receiveAndHandle(AVCodecContext* codecCtx, AVFrame* frame) {
int err = 0;
// Read the packets from the decoder.
// NOTE: Each packet may generate more than one frame, depending on the codec.
while((err = avcodec_receive_frame(codecCtx, frame)) == 0) {
// Let's handle the frame in a function.
handleFrame(codecCtx, frame);
// Free any buffers and reset the fields to default values.
av_frame_unref(frame);
}
return err;
}
/*
* Drain any buffered frames.
*/
void drainDecoder(AVCodecContext* codecCtx, AVFrame* frame) {
int err = 0;
// Some codecs may buffer frames. Sending NULL activates drain-mode.
if((err = avcodec_send_packet(codecCtx, NULL)) == 0) {
// Read the remaining packets from the decoder.
err = receiveAndHandle(codecCtx, frame);
if(err != AVERROR(EAGAIN) && err != AVERROR_EOF) {
// Neither EAGAIN nor EOF => Something went wrong.
printError("Receive error.", err);
}
} else {
// Something went wrong.
printError("Send error.", err);
}
}
int main(int argc, char *argv[]) {
if(argc != 2) {
printf("Usage: decode <audofile>\n");
return 1;
}
// Get the filename.
char* filename = argv[1];
// Open the outfile called "<infile>.raw".
char* outFilename = REINTERPRET_CAST(char*, malloc(strlen(filename)+5));
strcpy(outFilename, filename);
strcpy(outFilename+strlen(filename), ".raw");
outFile = fopen(outFilename, "w+");
if(outFile == NULL) {
fprintf(stderr, "Unable to open output file \"%s\".\n", outFilename);
}
free(outFilename);
// Initialize the libavformat. This registers all muxers, demuxers and protocols.
av_register_all();
int err = 0;
AVFormatContext *formatCtx = NULL;
// Open the file and read the header.
if ((err = avformat_open_input(&formatCtx, filename, NULL, 0)) != 0) {
return printError("Error opening file.", err);
}
// In case the file had no header, read some frames and find out which format and codecs are used.
// This does not consume any data. Any read packets are buffered for later use.
avformat_find_stream_info(formatCtx, NULL);
// Try to find an audio stream.
int audioStreamIndex = findAudioStream(formatCtx);
if(audioStreamIndex == -1) {
// No audio stream was found.
fprintf(stderr, "None of the available %d streams are audio streams.\n", formatCtx->nb_streams);
avformat_close_input(&formatCtx);
return -1;
}
// Find the correct decoder for the codec.
AVCodec* codec = avcodec_find_decoder(formatCtx->streams[audioStreamIndex]->codecpar->codec_id);
if (codec == NULL) {
// Decoder not found.
fprintf(stderr, "Decoder not found. The codec is not supported.\n");
avformat_close_input(&formatCtx);
return -1;
}
// Initialize codec context for the decoder.
AVCodecContext* codecCtx = avcodec_alloc_context3(codec);
if (codecCtx == NULL) {
// Something went wrong. Cleaning up...
avformat_close_input(&formatCtx);
fprintf(stderr, "Could not allocate a decoding context.\n");
return -1;
}
// Fill the codecCtx with the parameters of the codec used in the read file.
if ((err = avcodec_parameters_to_context(codecCtx, formatCtx->streams[audioStreamIndex]->codecpar)) != 0) {
// Something went wrong. Cleaning up...
avcodec_close(codecCtx);
avcodec_free_context(&codecCtx);
avformat_close_input(&formatCtx);
return printError("Error setting codec context parameters.", err);
}
// Explicitly request non planar data.
codecCtx->request_sample_fmt = av_get_alt_sample_fmt(codecCtx->sample_fmt, 0);
// Initialize the decoder.
if ((err = avcodec_open2(codecCtx, codec, NULL)) != 0) {
avcodec_close(codecCtx);
avcodec_free_context(&codecCtx);
avformat_close_input(&formatCtx);
return -1;
}
// Print some intersting file information.
printStreamInformation(codec, codecCtx, audioStreamIndex);
AVFrame* frame = NULL;
if ((frame = av_frame_alloc()) == NULL) {
avcodec_close(codecCtx);
avcodec_free_context(&codecCtx);
avformat_close_input(&formatCtx);
return -1;
}
// Prepare the packet.
AVPacket packet;
// Set default values.
av_init_packet(&packet);
while ((err = av_read_frame(formatCtx, &packet)) != AVERROR_EOF) {
if(err != 0) {
// Something went wrong.
printError("Read error.", err);
break; // Don't return, so we can clean up nicely.
}
// Does the packet belong to the correct stream?
if(packet.stream_index != audioStreamIndex) {
// Free the buffers used by the frame and reset all fields.
av_packet_unref(&packet);
continue;
}
// We have a valid packet => send it to the decoder.
if((err = avcodec_send_packet(codecCtx, &packet)) == 0) {
// The packet was sent successfully. We don't need it anymore.
// => Free the buffers used by the frame and reset all fields.
av_packet_unref(&packet);
} else {
// Something went wrong.
// EAGAIN is technically no error here but if it occurs we would need to buffer
// the packet and send it again after receiving more frames. Thus we handle it as an error here.
printError("Send error.", err);
break; // Don't return, so we can clean up nicely.
}
// Receive and handle frames.
// EAGAIN means we need to send before receiving again. So thats not an error.
if((err = receiveAndHandle(codecCtx, frame)) != AVERROR(EAGAIN)) {
// Not EAGAIN => Something went wrong.
printError("Receive error.", err);
break; // Don't return, so we can clean up nicely.
}
}
// Drain the decoder.
drainDecoder(codecCtx, frame);
// Free all data used by the frame.
av_frame_free(&frame);
// Close the context and free all data associated to it, but not the context itself.
avcodec_close(codecCtx);
// Free the context itself.
avcodec_free_context(&codecCtx);
// We are done here. Close the input.
avformat_close_input(&formatCtx);
// Close the outfile.
fclose(outFile);
}
The MIT License (MIT)
Copyright (c) 2016 Luca Corbatto
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@kartik-venugopal
Copy link

kartik-venugopal commented Jul 21, 2020

@Ayxan13 - If you give me more specifics about your issue, I can try to help. I got this code working just now (I'm developing in Swift for macOS).

Perhaps share the problem file with me (i.e. the audio file you tried to decode) ? I can try it on my end and provide feedback.

@SukitOwl
Copy link

This example save my day

@nji9nji9
Copy link

nji9nji9 commented Dec 8, 2023

@targodan - Thank you very much for your great article and code!

99, 105: dereferencing type-punned pointer
Might give problems when compiler optimized.

Due to ffmpeg 6.1 API changes:

243: av_register_all() gets obsolete now
263: const AVCodec* codec ("const" added)

deprecated APIs:

AVCodecContext::channels
void av_init_packet(AVPacket*)

@nji9nji9
Copy link

nji9nji9 commented Dec 12, 2023

planar/packed, sample format and size are "properties" of a AVCodecContext.
Deciding

  • planar/packed for ever frame (in handleFrame) and
  • sample format and size for every sample (in getSample)

all over again
doesn't reflect the structure/ is misleading.
(Moreover it's a performance issue).

As the typical usecase might not be to write a pcm file,
but to copy the samples to an application buffer,
providing 6 (#(planar/packed) * #(format groups)) small specific handlings
would offer the opportunity to code them highly efficient (mem ops) ...

Minor issue
132: Code should not rely on the compiler's optimizing abilities.
RAW_OUT_ON_PLANAR should be used as #ifdef to decide at compile time.
Moreover that should not be default.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment