Skip to content

Instantly share code, notes, and snippets.

@areinull
Created July 17, 2014 12:34
Show Gist options
  • Save areinull/65a902b26d13205e4216 to your computer and use it in GitHub Desktop.
Save areinull/65a902b26d13205e4216 to your computer and use it in GitHub Desktop.
Transcode audio from amr-nb/amr to vorbis/ogg with ffmeg libs
#include <stdio.h>
#include <libavutil/samplefmt.h>
#include <libavutil/timestamp.h>
#include <libavutil/audio_fifo.h>
#include <libavutil/avstring.h>
#include <libavformat/avformat.h>
#include <libswresample/swresample.h>
/** The number of output channels */
#define OUTPUT_CHANNELS 2
/** The audio sample output format */
#define OUTPUT_SAMPLE_FORMAT AV_SAMPLE_FMT_FLTP
/** Input/output audio data buffer */
struct DataBuf
{
/** pointer to buffer */
uint8_t *buf;
/** buf allocated size */
size_t capacity;
/** data size in buf */
size_t size;
/** reading offset */
size_t offset;
};
/**
* Convert an error code into a text message.
* @param error Error code to be converted
* @return Corresponding error text (not thread-safe)
*/
static char const* get_error_text(int error)
{
static char error_buffer[255];
av_strerror(error, error_buffer, sizeof(error_buffer));
return error_buffer;
}
/** Initialize one data packet for reading or writing. */
static void init_packet(AVPacket *packet)
{
av_init_packet(packet);
/* Set the packet data and size so that it is recognized as being empty. */
packet->data = NULL;
packet->size = 0;
}
/** Write callback for output AVIOContext */
static int write_fn(void* opaque, uint8_t* buf, int buf_size)
{
struct DataBuf *out_buf = (struct DataBuf*)opaque;
/* allocate more space if needed */
if ((size_t)buf_size > out_buf->capacity - out_buf->size)
{
size_t new_cap = out_buf->capacity? out_buf->capacity*2: 256;
while ((size_t)buf_size > new_cap - out_buf->size)
{
new_cap *= 2;
}
out_buf->buf = (uint8_t*)realloc(out_buf->buf, new_cap);
out_buf->capacity = new_cap;
}
memcpy(out_buf->buf + out_buf->size, buf, buf_size);
out_buf->size += buf_size;
return buf_size;
}
/**
* Open required encoder.
* Also set some basic encoder parameters.
*/
static int init_output(struct DataBuf *out_buf,
AVCodecContext *dec_ctx,
AVFormatContext **enc_fmt_ctx,
AVCodecContext **enc_ctx)
{
AVStream *stream = NULL;
AVCodec *output_codec = NULL;
int error;
uint8_t *filebuf = (uint8_t*)av_malloc(4096);
/* Allocate output context to output buffer */
AVIOContext *output_io_context = avio_alloc_context(filebuf, 4096, 0, out_buf, NULL, &write_fn, NULL);
if (!output_io_context)
{
fprintf(stderr, "Could not allocate output context\n");
return 1;
}
/* Create a new format context for the output container format. */
if (!(*enc_fmt_ctx = avformat_alloc_context()))
{
fprintf(stderr, "Could not allocate output format context\n");
return 1;
}
/* Associate the output file (pointer) with the container format context. */
(*enc_fmt_ctx)->pb = output_io_context;
/* Set the desired container format based on the mime type */
if (!((*enc_fmt_ctx)->oformat = av_guess_format(NULL, NULL, "audio/ogg")))
{
fprintf(stderr, "Could not find output file format\n");
goto cleanup;
}
/* Find the encoder to be used by its name. */
if (!(output_codec = avcodec_find_encoder(AV_CODEC_ID_VORBIS)))
{
fprintf(stderr, "Could not find an VORBIS encoder.\n");
goto cleanup;
}
/* Create a new audio stream in the output file container. */
if (!(stream = avformat_new_stream(*enc_fmt_ctx, output_codec)))
{
fprintf(stderr, "Could not create new stream\n");
error = AVERROR(ENOMEM);
goto cleanup;
}
/* Save the encoder context for access later. */
*enc_ctx = stream->codec;
/*
* Set the basic encoder parameters.
* The input file's sample rate is used to avoid a sample rate conversion.
*/
(*enc_ctx)->channels = OUTPUT_CHANNELS;
(*enc_ctx)->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS);
(*enc_ctx)->sample_rate = dec_ctx->sample_rate;
stream->time_base = (AVRational){1, (*enc_ctx)->sample_rate};
(*enc_ctx)->sample_fmt = OUTPUT_SAMPLE_FORMAT;
(*enc_ctx)->bit_rate = dec_ctx->bit_rate;
(*enc_ctx)->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
/*
* Some container formats (like MP4) require global headers to be present
* Mark the encoder so that it behaves accordingly.
*/
if ((*enc_fmt_ctx)->oformat->flags & AVFMT_GLOBALHEADER)
(*enc_ctx)->flags |= CODEC_FLAG_GLOBAL_HEADER;
/* Open the encoder for the audio stream to use it later. */
if ((error = avcodec_open2(*enc_ctx, output_codec, NULL)) < 0)
{
fprintf(stderr, "Could not open output codec (error '%s')\n", get_error_text(error));
goto cleanup;
}
return 0;
cleanup:
av_free((*enc_fmt_ctx)->pb->buffer);
av_free((*enc_fmt_ctx)->pb);
avformat_free_context(*enc_fmt_ctx);
*enc_fmt_ctx = NULL;
return error;
}
/** Write the header of the output file container. */
static int write_output_file_header(AVFormatContext *enc_fmt_ctx)
{
int error;
if ((error = avformat_write_header(enc_fmt_ctx, NULL)) < 0)
{
fprintf(stderr, "Could not write output file header (error '%s')\n", get_error_text(error));
return error;
}
return 0;
}
/*
* Initialize one input frame for writing to the output buffer.
* The frame will be exactly frame_size samples large.
*/
static int init_output_frame(AVFrame **frame,
AVCodecContext *enc_ctx,
int frame_size)
{
int error;
/* Create a new frame to store the audio samples. */
if (!(*frame = av_frame_alloc()))
{
fprintf(stderr, "Could not allocate output frame\n");
return AVERROR_EXIT;
}
/*
* Set the frame's parameters, especially its size and format.
* av_frame_get_buffer needs this to allocate memory for the
* audio samples of the frame.
* Default channel layouts based on the number of channels
* are assumed for simplicity.
*/
(*frame)->nb_samples = frame_size;
(*frame)->channel_layout = enc_ctx->channel_layout;
(*frame)->format = enc_ctx->sample_fmt;
(*frame)->sample_rate = enc_ctx->sample_rate;
/*
* Allocate the samples of the created frame. This call will make
* sure that the audio frame can hold as many samples as specified.
*/
if ((error = av_frame_get_buffer(*frame, 0)) < 0)
{
fprintf(stderr, "Could allocate output frame samples (error '%s')\n", get_error_text(error));
av_frame_free(frame);
return error;
}
return 0;
}
/* Encode one frame worth of audio to the output buffer. */
static int encode_frame(AVFrame *frame,
AVFormatContext *enc_fmt_ctx,
AVCodecContext *enc_ctx,
int *data_present)
{
static int64_t samples_written = 0;
/* Packet used for temporary storage. */
AVPacket output_packet;
int error;
init_packet(&output_packet);
/*
* Encode the audio frame and store it in the temporary packet.
* The output audio stream encoder is used to do this.
*/
if ((error = avcodec_encode_audio2(enc_ctx, &output_packet,
frame, data_present)) < 0)
{
fprintf(stderr, "Could not encode frame (error '%s')\n", get_error_text(error));
av_free_packet(&output_packet);
return error;
}
/* Write one audio frame from the temporary packet to the output buffer. */
if (*data_present)
{
// printf("0 output_packet dts %ld pts %ld\n", output_packet.dts, output_packet.pts);
output_packet.dts = output_packet.pts = samples_written;
if (frame)
samples_written += frame->nb_samples;
// printf("1 output_packet dts %ld pts %ld\n", output_packet.dts, output_packet.pts);
if ((error = av_write_frame(enc_fmt_ctx, &output_packet)) < 0)
{
fprintf(stderr, "Could not write frame (error '%s')\n", get_error_text(error));
av_free_packet(&output_packet);
return error;
}
av_free_packet(&output_packet);
}
return 0;
}
/**
* Load one audio frame from the FIFO buffer, encode and write it to the
* output buffer.
*/
static int load_encode_and_write(AVAudioFifo *fifo,
AVFormatContext *enc_fmt_ctx,
AVCodecContext *enc_ctx)
{
/* Temporary storage of the output samples of the frame written to the buffer */
AVFrame *output_frame;
/*
* Use the maximum number of possible samples per frame.
* If there is less than the maximum possible frame size in the FIFO
* buffer use this number. Otherwise, use the maximum possible frame size
*/
const int frame_size = FFMIN(av_audio_fifo_size(fifo), enc_ctx->frame_size);
int data_written;
/* Initialize temporary storage for one output frame. */
if (init_output_frame(&output_frame, enc_ctx, frame_size))
return AVERROR_EXIT;
/*
* Read as many samples from the FIFO buffer as required to fill the frame.
* The samples are stored in the frame temporarily.
*/
if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size)
{
fprintf(stderr, "Could not read data from FIFO\n");
av_frame_free(&output_frame);
return AVERROR_EXIT;
}
/* Encode one frame worth of audio samples. */
if (encode_frame(output_frame, enc_fmt_ctx, enc_ctx, &data_written))
{
av_frame_free(&output_frame);
return AVERROR_EXIT;
}
av_frame_free(&output_frame);
return 0;
}
/** Write the trailer of the output file container. */
static int write_output_file_trailer(AVFormatContext *enc_fmt_ctx)
{
int error;
if ((error = av_write_trailer(enc_fmt_ctx)) < 0)
{
fprintf(stderr, "Could not write output file trailer (error '%s')\n", get_error_text(error));
return error;
}
return 0;
}
/** Decode one packet */
static int decode_packet(AVFrame *frame, AVPacket *pkt, AVCodecContext *dec_ctx, int *got_frame)
{
// static int audio_frame_count = 0;
int ret = 0;
int decoded = pkt->size;
*got_frame = 0;
ret = avcodec_decode_audio4(dec_ctx, frame, got_frame, pkt);
if (ret < 0)
{
fprintf(stderr, "Error decoding audio frame (%s)\n", av_err2str(ret));
return ret;
}
/* Some audio decoders decode only part of the packet, and have to be
* called again with the remainder of the packet data.
* Sample: fate-suite/lossless-audio/luckynight-partial.shn
* Also, some decoders might over-read the packet. */
decoded = FFMIN(ret, pkt->size);
/*
if (*got_frame) {
printf("audio_frame%s n:%d nb_samples:%d pts:%s\n",
cached ? "(cached)" : "",
audio_frame_count++, frame->nb_samples,
av_ts2timestr(frame->pts, &dec_ctx->time_base));
}
*/
return decoded;
}
/**
* Initialize a temporary storage for the specified number of audio samples.
* The conversion requires temporary storage due to the different format.
* The number of audio samples to be allocated is specified in frame_size.
*/
static int init_converted_samples(uint8_t ***converted_input_samples, int frame_size)
{
int error;
/*
* Allocate as many pointers as there are audio channels.
* Each pointer will later point to the audio samples of the corresponding
* channels (although it may be NULL for interleaved formats).
*/
if (!(*converted_input_samples = calloc(OUTPUT_CHANNELS, sizeof(**converted_input_samples))))
{
fprintf(stderr, "Could not allocate converted input sample pointers\n");
return AVERROR(ENOMEM);
}
/*
* Allocate memory for the samples of all channels in one consecutive
* block for convenience.
*/
if ((error = av_samples_alloc(*converted_input_samples, NULL,
OUTPUT_CHANNELS,
frame_size,
OUTPUT_SAMPLE_FORMAT, 0)) < 0)
{
fprintf(stderr,
"Could not allocate converted input samples (error '%s')\n",
get_error_text(error));
av_freep(&(*converted_input_samples)[0]);
free(*converted_input_samples);
return error;
}
return 0;
}
/**
* Convert the input audio samples into the output sample format.
* The conversion happens on a per-frame basis, the size of which is specified
* by frame_size.
*/
static int convert_samples(SwrContext *resample_ctx, const uint8_t **input_data,
uint8_t **converted_data, int frame_size)
{
int error;
/* Convert the samples using the resampler. */
if ((error = swr_convert(resample_ctx,
converted_data, frame_size,
input_data , frame_size)) < 0)
{
fprintf(stderr, "Could not convert input samples (error '%s')\n",
get_error_text(error));
return error;
}
return 0;
}
/** Add converted input audio samples to the FIFO buffer for later processing. */
static int add_samples_to_fifo(AVAudioFifo *fifo, uint8_t **converted_input_samples, const int frame_size)
{
int error;
/*
* Make the FIFO as large as it needs to be to hold both,
* the old and the new samples.
*/
if ((error = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame_size)) < 0)
{
fprintf(stderr, "Could not reallocate FIFO\n");
return error;
}
/* Store the new samples in the FIFO buffer. */
if (av_audio_fifo_write(fifo, (void **)converted_input_samples, frame_size) < frame_size)
{
fprintf(stderr, "Could not write data to FIFO\n");
return AVERROR_EXIT;
}
return 0;
}
/** Decode packet and write frame to fifo */
static int decode_process_packet(AVAudioFifo *fifo, SwrContext *resample_ctx, AVFrame *frame,
AVPacket *pkt, AVCodecContext *dec_ctx, int *got_frame)
{
uint8_t **converted_input_samples = NULL;
int ret = -1;
// printf("input packet pts %ld dts %ld\n", pkt->pts, pkt->dts);
ret = decode_packet(frame, pkt, dec_ctx, got_frame);
if (*got_frame)
{
/* Initialize the temporary storage for the converted input samples. */
if (init_converted_samples(&converted_input_samples, frame->nb_samples))
{
ret = -1;
goto cleanup;
}
/*
* Convert the input samples to the desired output sample format.
* This requires a temporary storage provided by converted_input_samples.
*/
if (convert_samples(resample_ctx, (const uint8_t**)frame->extended_data, converted_input_samples,
frame->nb_samples))
{
ret = -1;
goto cleanup;
}
/* Add the converted input samples to the FIFO buffer for later processing. */
if (add_samples_to_fifo(fifo, converted_input_samples, frame->nb_samples))
{
ret = -1;
goto cleanup;
}
}
cleanup:
if (converted_input_samples)
{
av_freep(&converted_input_samples[0]);
free(converted_input_samples);
}
return ret;
}
/** Init input codec context */
static int open_codec_context(AVFormatContext *dec_fmt_ctx, AVCodecContext **dec_ctx)
{
const enum AVMediaType type = AVMEDIA_TYPE_AUDIO;
int ret;
AVStream *st = NULL;
AVCodec *dec = NULL;
ret = av_find_best_stream(dec_fmt_ctx, type, -1, -1, NULL, 0);
if (ret < 0) {
fprintf(stderr, "Could not find %s stream in input\n", av_get_media_type_string(type));
return ret;
}
else if (ret > 0)
{
fprintf(stderr, "Expected one input stream\n");
return -1;
}
st = dec_fmt_ctx->streams[0];
/* find decoder for the stream */
*dec_ctx = st->codec;
dec = avcodec_find_decoder((*dec_ctx)->codec_id);
if (!dec)
{
fprintf(stderr, "Failed to find %s codec\n",
av_get_media_type_string(type));
return AVERROR(EINVAL);
}
/* Init the decoders, with or without reference counting */
if ((ret = avcodec_open2(*dec_ctx, dec, NULL)) < 0)
{
fprintf(stderr, "Failed to open %s codec\n",
av_get_media_type_string(type));
return ret;
}
return 0;
}
/** Init resampler context */
static int init_resampler(SwrContext **resample_ctx, AVCodecContext *dec_ctx)
{
int error;
/*
* Create a resampler context for the conversion.
* Set the conversion parameters.
* Default channel layouts based on the number of channels
* are assumed for simplicity (they are sometimes not detected
* properly by the demuxer and/or decoder).
*/
*resample_ctx = swr_alloc_set_opts(NULL,
av_get_default_channel_layout(OUTPUT_CHANNELS),
OUTPUT_SAMPLE_FORMAT,
dec_ctx->sample_rate,
av_get_default_channel_layout(dec_ctx->channels),
dec_ctx->sample_fmt,
dec_ctx->sample_rate,
0, NULL);
if (!*resample_ctx)
{
fprintf(stderr, "Could not allocate resample context\n");
return AVERROR(ENOMEM);
}
/* Open the resampler with the specified parameters. */
if ((error = swr_init(*resample_ctx)) < 0)
{
fprintf(stderr, "Could not open resample context\n");
swr_free(resample_ctx);
return error;
}
return 0;
}
/** Read callback for input AVIOContext */
static int read_fn(void* opaque, uint8_t* buf, int buf_size)
{
struct DataBuf *in_buf = (struct DataBuf*)opaque;
if ((size_t)buf_size > in_buf->size - in_buf->offset)
{
buf_size = in_buf->size - in_buf->offset;
}
memcpy(buf, in_buf->buf + in_buf->offset, buf_size);
in_buf->offset += buf_size;
return buf_size;
}
/**
* Init input codec and stream
*/
static int init_input(struct DataBuf *src, AVFormatContext **dec_fmt_ctx, AVCodecContext **dec_ctx)
{
AVStream *input_stream = NULL;
uint8_t *filebuf = (uint8_t*)av_malloc(4096);
AVIOContext *avio = avio_alloc_context(filebuf, 4096, 0, src, &read_fn, NULL, NULL);
*dec_fmt_ctx = avformat_alloc_context();
(*dec_fmt_ctx)->pb = avio;
/* open input file, and allocate format context */
if (avformat_open_input(dec_fmt_ctx, "", NULL, NULL) < 0)
{
fprintf(stderr, "Could not open source\n");
return 1;
}
/* retrieve stream information */
if (avformat_find_stream_info(*dec_fmt_ctx, NULL) < 0)
{
fprintf(stderr, "Could not find stream information\n");
return 1;
}
if (open_codec_context(*dec_fmt_ctx, dec_ctx) >= 0)
{
input_stream = (*dec_fmt_ctx)->streams[0];
}
/* dump input information to stderr */
// av_dump_format(*dec_fmt_ctx, 0, NULL, 0);
if (!input_stream)
{
fprintf(stderr, "Could not find audio or video stream in the input, aborting\n");
return 1;
}
return 0;
}
/**
* Transcode amr audio from src buffer to vorbis/ogg.
* Return pointer to out buffer struct in dst, containing transcoded file or NULL on error.
* User should free returned buffer with free();
*/
static void transcode(struct DataBuf *src, struct DataBuf *dst)
{
AVFormatContext *dec_fmt_ctx = NULL, *enc_fmt_ctx = NULL;
AVCodecContext *dec_ctx = NULL, *enc_ctx = NULL;
AVAudioFifo *fifo = NULL;
SwrContext *resample_ctx = NULL;
AVFrame *frame = NULL;
AVPacket pkt;
int ret = 0, got_frame, data_written;
dst->buf = NULL;
dst->size = dst->capacity = 0;
/* register all formats and codecs */
av_register_all();
if (init_input(src, &dec_fmt_ctx, &dec_ctx))
{
fprintf(stderr, "open_input_file failed\n");
dst->buf = NULL;
goto end;
}
frame = av_frame_alloc();
if (!frame)
{
fprintf(stderr, "Could not allocate frame\n");
dst->buf = NULL;
goto end;
}
/* initialize packet, set data to NULL, let the demuxer fill it */
init_packet(&pkt);
/* Initialize the resampler to be able to convert audio sample formats. */
if (init_resampler(&resample_ctx, dec_ctx))
{
fprintf(stderr, "Failed to initialize resampler\n");
dst->buf = NULL;
goto end;
}
/* Create the FIFO buffer based on the specified output sample format. */
if (!(fifo = av_audio_fifo_alloc(OUTPUT_SAMPLE_FORMAT, OUTPUT_CHANNELS, 1)))
{
fprintf(stderr, "Could not allocate FIFO\n");
dst->buf = NULL;
goto end;
}
/* read frames from the file */
while ((ret = av_read_frame(dec_fmt_ctx, &pkt)) >= 0)
{
AVPacket orig_pkt = pkt;
// printf("av_read_frame returned %d\n", ret);
do
{
ret = decode_process_packet(fifo, resample_ctx, frame, &pkt, dec_ctx, &got_frame);
if (ret < 0)
break;
pkt.data += ret;
pkt.size -= ret;
} while (pkt.size > 0);
av_free_packet(&orig_pkt);
}
// printf("av_read_frame returned %d\n", ret);
/* flush cached frames */
pkt.data = NULL;
pkt.size = 0;
do
{
decode_process_packet(fifo, resample_ctx, frame, &pkt, dec_ctx, &got_frame);
} while (got_frame);
// printf("Demuxing succeeded.\n");
/* Open the output file for writing. */
if (init_output(dst, dec_ctx, &enc_fmt_ctx, &enc_ctx))
{
fprintf(stderr, "Could not open destination\n");
dst->buf = NULL;
goto end;
}
/* Write the header of the output file container. */
if (write_output_file_header(enc_fmt_ctx))
{
fprintf(stderr, "Failed to write header\n");
dst->buf = NULL;
goto end;
}
/*
* If we have enough samples for the encoder, we encode them.
* At the end of the file, we pass the remaining samples to
* the encoder.
*/
while (av_audio_fifo_size(fifo) >0)
{
/*
* Take one frame worth of audio samples from the FIFO buffer,
* encode it and write it to the output file.
*/
if (load_encode_and_write(fifo, enc_fmt_ctx, enc_ctx))
{
fprintf(stderr, "load_encode_and_write failed\n");
dst->buf = NULL;
goto end;
}
}
/*
* If we are at the end of the input file and have encoded
* all remaining samples, we can exit this loop and finish.
* Flush the encoder as it may have delayed frames.
*/
do {
if (encode_frame(NULL, enc_fmt_ctx, enc_ctx, &data_written))
{
fprintf(stderr, "encode_audio_frame failed\n");
dst->buf = NULL;
goto end;
}
} while (data_written);
/* Write the trailer of the output file container. */
if (write_output_file_trailer(enc_fmt_ctx))
{
fprintf(stderr, "write_output_file_trailer failed\n");
dst->buf = NULL;
goto end;
}
end:
av_audio_fifo_free(fifo);
swr_free(&resample_ctx);
avcodec_close(enc_ctx);
av_free(enc_fmt_ctx->pb->buffer);
av_free(enc_fmt_ctx->pb);
avformat_free_context(enc_fmt_ctx);
avcodec_close(dec_ctx);
av_free(dec_fmt_ctx->pb->buffer);
av_free(dec_fmt_ctx->pb);
avformat_free_context(dec_fmt_ctx);
av_frame_free(&frame);
}
int main(int argc, char **argv)
{
if (argc != 3)
{
fprintf(stderr, "usage: %s input_file output_file\n", argv[0]);
exit(1);
}
FILE *in_file = fopen(argv[1], "rb");
fseek(in_file, 0, SEEK_END);
const size_t input_file_size = ftell(in_file);
fseek(in_file, 0, SEEK_SET);
struct DataBuf in_buf;
memset(&in_buf, 0, sizeof in_buf);
in_buf.buf = (uint8_t*)malloc(input_file_size);
in_buf.capacity = in_buf.size = input_file_size;
fread(in_buf.buf, 1, input_file_size, in_file);
fclose(in_file);
struct DataBuf out_buf;
memset(&out_buf, 0, sizeof out_buf);
transcode(&in_buf, &out_buf);
free(in_buf.buf);
if (out_buf.buf)
{
FILE *out_file = fopen(argv[2], "wb");
fwrite(out_buf.buf, 1, out_buf.size, out_file);
fclose(out_file);
free(out_buf.buf);
}
else
{
fprintf(stderr, "Transcoding failed\n");
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment