Skip to content

Instantly share code, notes, and snippets.

@royshil
Last active May 10, 2024 17:04
Show Gist options
  • Save royshil/fff30890c7c19a4889f0a148101c9dff to your computer and use it in GitHub Desktop.
Save royshil/fff30890c7c19a4889f0a148101c9dff to your computer and use it in GitHub Desktop.
Read and convert an audio file to another format or sample rate with libav and libswresample in C++.
extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/frame.h>
#include <libavutil/mem.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
}
#include <iostream>
#include <vector>
std::vector<float> read_audio_file(const char *filename, int targetSampleRate)
{
AVFormatContext *formatContext = nullptr;
if (avformat_open_input(&formatContext, filename, nullptr, nullptr) != 0) {
std::cerr << "Error opening file";
return {};
}
if (avformat_find_stream_info(formatContext, nullptr) < 0) {
std::cerr << "Error finding stream information";
return {};
}
int audioStreamIndex = -1;
for (unsigned int i = 0; i < formatContext->nb_streams; i++) {
if (formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
audioStreamIndex = i;
break;
}
}
if (audioStreamIndex == -1) {
std::cerr << "No audio stream found";
return {};
}
AVCodecParameters *codecParams = formatContext->streams[audioStreamIndex]->codecpar;
const AVCodec *codec = avcodec_find_decoder(codecParams->codec_id);
if (!codec) {
std::cerr << "Decoder not found";
return {};
}
AVCodecContext *codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
std::cerr << "Failed to allocate codec context";
return {};
}
if (avcodec_parameters_to_context(codecContext, codecParams) < 0) {
std::cerr << "Failed to copy codec parameters to codec context";
return {};
}
if (avcodec_open2(codecContext, codec, nullptr) < 0) {
std::cerr << "Failed to open codec";
return {};
}
AVFrame *frame = av_frame_alloc();
AVPacket packet;
// set up swresample
AVChannelLayout ch_layout;
av_channel_layout_from_string(&ch_layout, "mono");
SwrContext *swr = nullptr;
int ret;
ret = swr_alloc_set_opts2(&swr, &ch_layout, AV_SAMPLE_FMT_FLT, targetSampleRate,
&(codecContext->ch_layout), codecContext->sample_fmt,
codecContext->sample_rate, 0, nullptr);
if (ret < 0) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE);
std::cerr << "Failed to set up swr context: " << errbuf;
return {};
}
// init swr
ret = swr_init(swr);
if (ret < 0) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE);
std::cerr << "Failed to initialize swr context: " << errbuf;
return {};
}
std::vector<float> audioFrames;
float *convertBuffer[1];
convertBuffer[0] = (float *)av_malloc(4096 * sizeof(float));
while (av_read_frame(formatContext, &packet) >= 0) {
if (packet.stream_index == audioStreamIndex) {
if (avcodec_send_packet(codecContext, &packet) == 0) {
while (avcodec_receive_frame(codecContext, frame) == 0) {
int ret = swr_convert(swr, (uint8_t **)convertBuffer, 4096,
(const uint8_t **)frame->data,
frame->nb_samples);
if (ret < 0) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE);
std::cerr << "Failed to convert audio frame: " << errbuf;
return {};
}
audioFrames.insert(audioFrames.end(), convertBuffer[0],
convertBuffer[0] + ret);
}
}
}
av_packet_unref(&packet);
}
av_free(convertBuffer[0]);
swr_free(&swr);
av_frame_free(&frame);
avcodec_free_context(&codecContext);
avformat_close_input(&formatContext);
return audioFrames;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment