Skip to content

Instantly share code, notes, and snippets.

@AldoMX
Last active July 11, 2020 14:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AldoMX/ce64f6fa2d1e50e4289841cf92205a76 to your computer and use it in GitHub Desktop.
Save AldoMX/ce64f6fa2d1e50e4289841cf92205a76 to your computer and use it in GitHub Desktop.
RageSoundReader_FFMpeg - Sound reader for any format supported by FFMpeg, written for StepMania 3.9, tested with FFMpeg 2.1.4, LGPLv2.1
#include "global.h"
#include "RageFile.h"
#include "RageLog.h"
#include "RageSoundManager.h"
#include "RageUtil.h"
#include "RageSoundReader_FFMpeg.h"
namespace avcodec
{
extern "C"
{
#include <libavutil/avutil.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswresample/swresample.h>
static const size_t AVIO_BUFFER_SIZE = 4096;
static const uint64_t DEFAULT_CHANNEL_LAYOUT = AV_CH_LAYOUT_STEREO;
static const int DEFAULT_SAMPLE_FORMAT = AV_SAMPLE_FMT_S16;
static const int DEFAULT_SAMPLE_RATE = 44100;
static const int64_t NOPTS_VALUE = AV_NOPTS_VALUE;
static const int SEEK_FLAG_BACKWARD = AVSEEK_FLAG_BACKWARD;
static const int TIME_BASE = AV_TIME_BASE;
typedef struct AudioState {
CString filename = "";
int streamIndex = -1;
AVCodecContext *pCodecCtx = nullptr;
AVStream *pStream = nullptr;
AVFrame *pFrame = nullptr;
AVFormatContext *pFormatCtx = nullptr;
AVIOContext *pIOCtx = nullptr;
SwrContext *pSwrCtx = nullptr;
AVPacket lastFramePacket;
int64_t lastFramePts = NOPTS_VALUE;
AVRational lastFrameTimeBase = { 1, TIME_BASE };
uint64_t lastFrameChannelLayout = DEFAULT_CHANNEL_LAYOUT;
int lastFrameSampleFormat = DEFAULT_SAMPLE_FORMAT;
int lastFrameSampleRate = DEFAULT_SAMPLE_RATE;
uint8_t *pResampleBuffer = nullptr;
size_t resampleBufferSize = 0;
} AudioState;
static int AVIO_RageFile_ReadPacket(void *file, uint8_t *buf, int buf_size)
{
return static_cast<RageFile *>(file)->Read(buf, buf_size);
}
static int64_t AVIO_RageFile_Seek(void *file, int64_t offset, int whence)
{
switch (whence)
{
case AVSEEK_SIZE:
return static_cast<RageFile *>(file)->GetFileSize();
case SEEK_SET:
case SEEK_CUR:
case SEEK_END:
return static_cast<RageFile *>(file)->Seek(static_cast<int>(offset), whence);
default:
LOG->Trace("Error: unsupported seek whence: %d", whence);
return -1;
}
}
}
};
using namespace avcodec;
#if defined(_MSC_VER)
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "swresample.lib")
#endif
RageSoundReader_FFMpeg::RageSoundReader_FFMpeg() :
m_pState(new AudioState()),
m_pFrameBuffer(nullptr),
m_frameBufferSize(0),
m_channelLayout(DEFAULT_CHANNEL_LAYOUT),
m_sampleFormat(DEFAULT_SAMPLE_FORMAT),
m_sampleRate(SOUNDMAN->GetDriverSampleRate(DEFAULT_SAMPLE_RATE)),
m_currentTime(0.f)
{
AudioState &state = *static_cast<AudioState *>(m_pState);
av_init_packet(&state.lastFramePacket);
state.lastFrameSampleRate = m_sampleRate;
}
RageSoundReader_FFMpeg::~RageSoundReader_FFMpeg()
{
DestroyDecoder();
delete static_cast<AudioState *>(m_pState);
}
void RageSoundReader_FFMpeg::RegisterProtocols()
{
static bool bRegistered = false;
if (bRegistered) return;
av_register_all();
bRegistered = true;
}
// ReSharper disable once CppMemberFunctionMayBeConst
SoundReader_FileReader::OpenResult RageSoundReader_FFMpeg::CreateDecoder(const CString &filename)
{
RegisterProtocols();
AudioState &state = *static_cast<AudioState *>(m_pState);
RageFile *file = new RageFile();
state.filename = filename;
if (false == file->Open(state.filename, RageFile::READ)) {
SetError("Error opening \"%s\" - %s.", state.filename.c_str(), file->GetError().c_str());
return OPEN_FATAL_ERROR;
}
const size_t &bufferSize = AVIO_BUFFER_SIZE;
unsigned char * buffer = static_cast<unsigned char *>(av_mallocz(bufferSize));
if (nullptr == buffer) {
SetError("Unable to allocate memory for the buffer.");
return OPEN_FATAL_ERROR;
}
state.pIOCtx = avio_alloc_context(
buffer, bufferSize, 0, file, AVIO_RageFile_ReadPacket, nullptr, AVIO_RageFile_Seek
);
if (nullptr == state.pIOCtx) {
SetError("Unable to allocate memory for AVIOContext.");
return OPEN_FATAL_ERROR;
}
state.pFormatCtx = static_cast<AVFormatContext *>(avformat_alloc_context());
if (nullptr == state.pFormatCtx) {
SetError("Unable to allocate memory for AVFormatContext.");
return OPEN_FATAL_ERROR;
}
state.pFormatCtx->pb = state.pIOCtx;
if (0 > avformat_open_input(&state.pFormatCtx, state.filename.c_str(), nullptr, nullptr)) {
SetError("AVFormat error opening \"%s\".", state.filename.c_str());
return OPEN_FATAL_ERROR;
}
if (0 > avformat_find_stream_info(state.pFormatCtx, nullptr)) {
SetError("Couldn't find codec info when opening \"%s\".", state.filename.c_str());
return OPEN_UNKNOWN_FILE_FORMAT;
}
AVCodec *codec = nullptr;
state.streamIndex = av_find_best_stream(state.pFormatCtx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
if (state.streamIndex < 0 ||
static_cast<unsigned int>(state.streamIndex) >= state.pFormatCtx->nb_streams ||
state.pFormatCtx->streams[state.streamIndex] == nullptr)
{
SetError("Couldn't find audio streams in \"%s\".", state.filename.c_str());
return OPEN_UNKNOWN_FILE_FORMAT;
}
state.pStream = state.pFormatCtx->streams[state.streamIndex];
state.lastFramePts = NOPTS_VALUE;
state.lastFrameTimeBase = state.pStream->time_base;
state.pCodecCtx = state.pStream->codec;
if (AV_CODEC_ID_NONE == state.pCodecCtx->codec_id) {
SetError(
"Codec tag \"%08x\" detected in \"%s\" is not supported.",
state.pCodecCtx->codec_tag, state.filename.c_str()
);
return OPEN_UNKNOWN_FILE_FORMAT;
}
if (nullptr == codec) {
const AVCodecDescriptor *codecDesc = av_codec_get_codec_descriptor(state.pCodecCtx);
const char *codecName = codecDesc->long_name != nullptr ? codecDesc->long_name : codecDesc->name;
SetError("Couldn't find decoder for \"%s\" when opening \"%s\".", codecName, state.filename.c_str());
return OPEN_UNKNOWN_FILE_FORMAT;
}
state.pCodecCtx->codec = codec;
if (0 > avcodec_open2(state.pCodecCtx, state.pCodecCtx->codec, nullptr)) {
const AVCodecDescriptor *codecDesc = av_codec_get_codec_descriptor(state.pCodecCtx);
const char *codecName = codecDesc->long_name != nullptr ? codecDesc->long_name : codecDesc->name;
SetError("Couldn't load decoder for \"%s\" when opening \"%s\".", codecName, state.filename.c_str());
return OPEN_FATAL_ERROR;
}
UpdateResamplingOpts(
state.pCodecCtx->channel_layout, state.pCodecCtx->sample_fmt, state.pCodecCtx->sample_rate
);
state.pFrame = av_frame_alloc();
if (nullptr == state.pFrame) {
SetError("Unable to allocate memory for AVFrame.");
return OPEN_FATAL_ERROR;
}
av_init_packet(&state.lastFramePacket);
return OPEN_OK;
}
// ReSharper disable once CppMemberFunctionMayBeConst
void RageSoundReader_FFMpeg::DestroyDecoder()
{
if (m_pState == nullptr)
return;
AudioState &state = *static_cast<AudioState *>(m_pState);
if (state.pResampleBuffer) {
av_free(state.pResampleBuffer);
state.pResampleBuffer = nullptr;
}
if (state.pFrame) {
av_frame_free(&state.pFrame);
state.pFrame = nullptr;
}
if (state.pSwrCtx) {
swr_free(&state.pSwrCtx);
state.pSwrCtx = nullptr;
}
if (state.pIOCtx) {
RageFile *file = static_cast<RageFile *>(state.pIOCtx->opaque);
if (file->IsOpen())
file->Close();
delete file;
av_free(state.pIOCtx->buffer);
av_free(state.pIOCtx);
state.pIOCtx = nullptr;
}
if (state.pCodecCtx) {
avcodec_close(state.pCodecCtx);
state.pCodecCtx = nullptr;
}
if (state.pFormatCtx) {
avformat_close_input(&state.pFormatCtx);
state.pFormatCtx = nullptr;
}
av_free_packet(&state.lastFramePacket);
}
void RageSoundReader_FFMpeg::SetError(const char *fmt, ...) const
{
va_list va;
va_start(va, fmt);
const CString formatedError = vssprintf(fmt, va);
va_end(va);
SoundReader::SetError("RageSoundReader_FFMpeg: " + formatedError);
}
SoundReader_FileReader::OpenResult RageSoundReader_FFMpeg::Open(CString filename)
{
OpenResult openResult = CreateDecoder(filename);
if (openResult != OPEN_OK) {
DestroyDecoder();
return openResult;
}
return OPEN_OK;
}
int RageSoundReader_FFMpeg::GetLength() const
{
return GetLength_Fast();
}
int RageSoundReader_FFMpeg::GetLength_Fast() const
{
AudioState &state = *static_cast<AudioState *>(m_pState);
return static_cast<int>(state.pFormatCtx->duration / 1000);
}
int RageSoundReader_FFMpeg::SetPosition_Accurate(int ms)
{
int ret = SetPosition_Fast(ms);
if (ret < 0) {
return ret;
}
// TODO: Decode frames until m_currentTime matches ms
return ms;
}
int RageSoundReader_FFMpeg::SetPosition_Fast(int ms)
{
AudioState &state = *static_cast<AudioState *>(m_pState);
int64_t timestamp = av_rescale_q(ms, { 1, 1000 }, state.pStream->time_base);
int ret = avformat_seek_file(state.pFormatCtx, state.streamIndex, INT64_MIN, timestamp, timestamp, 0);
if (ret < 0) {
return ret;
}
if (nullptr != state.lastFramePacket.data) {
av_free_packet(&state.lastFramePacket);
}
av_init_packet(&state.lastFramePacket);
state.lastFramePts = NOPTS_VALUE;
state.lastFrameTimeBase = state.pStream->time_base;
avcodec_flush_buffers(state.pCodecCtx);
m_pFrameBuffer = nullptr;
m_frameBufferSize = 0;
state.lastFrameChannelLayout = m_channelLayout;
state.lastFrameSampleFormat = m_sampleFormat;
state.lastFrameSampleRate = m_sampleRate;
UpdateResamplingOpts(
state.pCodecCtx->channel_layout, state.pCodecCtx->sample_fmt, state.pCodecCtx->sample_rate
);
return ms;
}
size_t RageSoundReader_FFMpeg::ReadFromLastFrame(char* buf, size_t len)
{
if (m_frameBufferSize == 0) {
return 0;
}
size_t remainingFrameData = min(m_frameBufferSize, len);
memcpy(buf, m_pFrameBuffer, remainingFrameData);
m_pFrameBuffer += remainingFrameData;
m_frameBufferSize -= remainingFrameData;
if (m_frameBufferSize == 0) {
AudioState &state = *static_cast<AudioState *>(m_pState);
av_free_packet(&state.lastFramePacket);
m_pFrameBuffer = nullptr;
}
return remainingFrameData;
}
// ReSharper disable once CppMemberFunctionMayBeConst
bool RageSoundReader_FFMpeg::UpdateResamplingOpts(uint64_t channelLayout, int sampleFormat, int sampleRate)
{
AudioState &state = *static_cast<AudioState *>(m_pState);
bool needsResampling = channelLayout != m_channelLayout ||
sampleFormat != m_sampleFormat ||
sampleRate != m_sampleRate;
bool updateOpts = channelLayout != state.lastFrameChannelLayout ||
sampleFormat != state.lastFrameSampleFormat ||
sampleRate != state.lastFrameSampleRate;
bool freeCtx = !needsResampling || updateOpts;
if (freeCtx && nullptr != state.pSwrCtx) {
swr_free(&state.pSwrCtx);
state.pSwrCtx = nullptr;
}
if (needsResampling && nullptr == state.pSwrCtx) {
state.pSwrCtx = swr_alloc_set_opts(
nullptr, m_channelLayout, static_cast<AVSampleFormat>(m_sampleFormat), m_sampleRate,
channelLayout, static_cast<AVSampleFormat>(sampleFormat), sampleRate, 0, nullptr
);
ASSERT(nullptr != state.pSwrCtx);
ASSERT(0 >= swr_init(state.pSwrCtx));
}
if (updateOpts) {
state.lastFrameChannelLayout = channelLayout;
state.lastFrameSampleFormat = sampleFormat;
state.lastFrameSampleRate = sampleRate;
}
return needsResampling;
}
int RageSoundReader_FFMpeg::DecodeFrame()
{
AudioState &state = *static_cast<AudioState *>(m_pState);
AVFrame &frame = *state.pFrame;
AVPacket tmpPacket = state.lastFramePacket;
while (tmpPacket.size > 0) {
int gotFrame = 0;
int readEncodedBytes = avcodec_decode_audio4(state.pCodecCtx, &frame, &gotFrame, &tmpPacket);
if (readEncodedBytes < 0) {
tmpPacket.size = 0;
break;
}
tmpPacket.dts = tmpPacket.pts = NOPTS_VALUE;
tmpPacket.data += readEncodedBytes;
tmpPacket.size -= readEncodedBytes;
if (tmpPacket.data && tmpPacket.size <= 0 || !tmpPacket.data && !gotFrame) {
tmpPacket.size = 0;
}
if (!gotFrame) {
continue;
}
AVRational timeBase = { 1, frame.sample_rate };
if (frame.pts != NOPTS_VALUE) {
frame.pts = av_rescale_q(frame.pts, state.pCodecCtx->time_base, timeBase);
}
else if (frame.pkt_pts != NOPTS_VALUE) {
frame.pts = av_rescale_q(frame.pkt_pts, state.pStream->time_base, timeBase);
}
else if (state.lastFramePts != NOPTS_VALUE) {
frame.pts = av_rescale_q(state.lastFramePts, state.lastFrameTimeBase, timeBase);
}
if (frame.pts != NOPTS_VALUE) {
state.lastFramePts = frame.pts + frame.nb_samples;
}
state.lastFrameTimeBase = timeBase;
int numChannels = av_frame_get_channels(&frame);
int decodedSize = av_samples_get_buffer_size(
nullptr, numChannels, frame.nb_samples, static_cast<AVSampleFormat>(frame.format), 1
);
if (decodedSize < 0) {
return decodedSize;
}
uint64_t channelLayout =
frame.channel_layout && numChannels == av_get_channel_layout_nb_channels(frame.channel_layout)
? frame.channel_layout
: av_get_default_channel_layout(numChannels);
bool needsResampling = UpdateResamplingOpts(channelLayout, frame.format, frame.sample_rate);
if (needsResampling) {
int expectedSamples = static_cast<int64_t>(frame.nb_samples) * m_sampleRate / frame.sample_rate + 256;
int expectedBufferSize = av_samples_get_buffer_size(
nullptr, DEFAULT_CHANNELS, expectedSamples, static_cast<AVSampleFormat>(m_sampleFormat), 0
);
if (expectedBufferSize < 0) {
return expectedBufferSize;
}
av_fast_malloc(&state.pResampleBuffer, &state.resampleBufferSize, expectedBufferSize);
if (nullptr == state.pResampleBuffer) {
return -ENOMEM;
}
int samples = swr_convert(
state.pSwrCtx, &state.pResampleBuffer, state.resampleBufferSize,
const_cast<const uint8_t **>(frame.extended_data), frame.nb_samples
);
ASSERT(samples >= 0 && samples < expectedSamples);
m_pFrameBuffer = state.pResampleBuffer;
m_frameBufferSize = samples * DEFAULT_CHANNELS * av_get_bytes_per_sample(static_cast<AVSampleFormat>(m_sampleFormat));
}
else {
m_pFrameBuffer = frame.data[0];
m_frameBufferSize = decodedSize;
}
if (frame.pts != NOPTS_VALUE) {
m_currentTime = 1000.f * frame.pts * static_cast<float>(av_q2d(timeBase)) +
static_cast<float>(frame.nb_samples) / frame.sample_rate;
}
else {
m_currentTime = NAN;
}
return m_frameBufferSize;
}
if (tmpPacket.data) {
av_free_packet(&tmpPacket);
}
return 0;
}
int RageSoundReader_FFMpeg::Read(char *buf, unsigned len)
{
AudioState &state = *static_cast<AudioState *>(m_pState);
int bytesRead = 0;
for (;;) {
size_t remainingFrameData = ReadFromLastFrame(buf, len);
if (remainingFrameData > 0) {
bytesRead += remainingFrameData;
buf += remainingFrameData;
len -= remainingFrameData;
if (len == 0)
return bytesRead;
}
if (0 != av_read_frame(state.pFormatCtx, &state.lastFramePacket)) {
break; // EOF
}
if (state.lastFramePacket.stream_index != state.streamIndex) {
av_free_packet(&state.lastFramePacket);
continue;
}
int ret = DecodeFrame();
if (ret < 0) {
return ret;
}
}
return bytesRead;
}
SoundReader * RageSoundReader_FFMpeg::Copy() const
{
AudioState &state = *static_cast<AudioState *>(m_pState);
RageSoundReader_FFMpeg *reader = new RageSoundReader_FFMpeg;
reader->Open(state.filename);
return reader;
}
/*
* StepMania AMX is (c) 2008-2017 Aldo Fregoso "Aldo_MX".
* All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
/*
* RageSoundReader_FFMpeg - Sound reader for any format supported by FFMpeg
*/
#ifndef RAGE_SOUND_READER_FFMPEG
#define RAGE_SOUND_READER_FFMPEG
#include "RageSoundReader_FileReader.h"
class RageSoundReader_FFMpeg : public SoundReader_FileReader
{
static const unsigned DEFAULT_CHANNELS = 2;
void *m_pState;
uint8_t *m_pFrameBuffer;
size_t m_frameBufferSize;
uint64_t m_channelLayout;
int m_sampleFormat;
int m_sampleRate;
float m_currentTime;
static void RegisterProtocols();
OpenResult CreateDecoder(const CString &filename);
void DestroyDecoder();
void SetError(const char *fmt, ...) const;
size_t ReadFromLastFrame(char *buf, size_t len);
bool UpdateResamplingOpts(uint64_t channelLayout, int sampleFormat, int sampleRate);
int DecodeFrame();
public:
RageSoundReader_FFMpeg();
~RageSoundReader_FFMpeg();
OpenResult Open(CString filename);
int GetLength() const;
int GetLength_Fast() const;
int SetPosition_Accurate(int ms);
int SetPosition_Fast(int ms);
int Read(char *buf, unsigned len);
SoundReader * Copy() const;
int GetSampleRate() const { return m_sampleRate; }
unsigned GetNumChannels() const { return DEFAULT_CHANNELS; }
};
#endif
/*
* StepMania AMX is (c) 2008-2017 Aldo Fregoso "Aldo_MX".
* All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment