Skip to content

Instantly share code, notes, and snippets.

@CrendKing
Last active March 6, 2024 22:56
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CrendKing/0be37d6b61b5346b921c4db168f6bcd6 to your computer and use it in GitHub Desktop.
Save CrendKing/0be37d6b61b5346b921c4db168f6bcd6 to your computer and use it in GitHub Desktop.
ffmpeg extract video thumbnails
extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/bprint.h>
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "swscale.lib")
#define USE_FFMPEG_HW_ACCEL
#ifdef USE_FFMPEG_HW_ACCEL
static enum AVHWDeviceType HW_DEVICE_TYPE = AV_HWDEVICE_TYPE_CUDA;
#endif
// input
static AVFormatContext *input_format_ctx;
static AVStream *in_stream;
#ifdef USE_FFMPEG_HW_ACCEL
static AVBufferRef *hw_device_ctx;
static enum AVPixelFormat hw_pix_fmt;
#endif
static enum AVPixelFormat sw_pix_fmt;
static AVCodec *decoder_codec;
static AVCodecContext *decoder_ctx;
// output
static AVCodec *encoder_codec;
static AVCodecContext *encoder_ctx;
// swscale
static SwsContext *sws_ctx = nullptr;
// frames and packets
static AVFrame *in_hw_frame;
static AVFrame *in_sw_frame;
static AVFrame *out_frame;
static AVPacket *decode_packet;
static AVPacket *encode_packet;
static uint8_t *out_frame_buf;
#define check_true(condition) if (!(condition)) { av_log(nullptr, AV_LOG_PANIC, "ffmpeg false condition at %s:%d\n", __FILE__, __LINE__); terminate(); }
#define check_gte_0(code) if ((code) < 0) { av_log(nullptr, AV_LOG_PANIC, "ffmpeg error code at %s:%d\n", __FILE__, __LINE__); terminate(); }
static auto ffmpeg_open_input(string_view input) -> void {
input_format_ctx = avformat_alloc_context();
if (avformat_open_input(&input_format_ctx, input.data(), nullptr, nullptr) == AVERROR(ENOENT)) {
std::cerr << "target video does not exist: " << input << std::endl;
exit(EXIT_FAILURE);
}
check_gte_0(avformat_find_stream_info(input_format_ctx, nullptr));
const int input_video_stream_idx = av_find_best_stream(input_format_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
check_gte_0(input_video_stream_idx);
in_stream = input_format_ctx->streams[input_video_stream_idx];
decoder_codec = avcodec_find_decoder(in_stream->codecpar->codec_id);
check_true(decoder_codec);
decoder_ctx = avcodec_alloc_context3(decoder_codec);
check_true(decoder_ctx);
check_gte_0(avcodec_parameters_to_context(decoder_ctx, in_stream->codecpar));
#ifdef USE_FFMPEG_HW_ACCEL
// initialize hardware acceleration
for (int i = 0;; ++i) {
const AVCodecHWConfig *hw_config = avcodec_get_hw_config(decoder_codec, i);
check_true(hw_config);
if (hw_config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && hw_config->device_type == HW_DEVICE_TYPE) {
check_gte_0(av_hwdevice_ctx_create(&hw_device_ctx, HW_DEVICE_TYPE, nullptr, nullptr, 0));
hw_pix_fmt = hw_config->pix_fmt;
const AVHWFramesConstraints *hw_constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx, hw_config);
check_true(hw_constraints);
sw_pix_fmt = hw_constraints->valid_sw_formats[0];
break;
}
}
decoder_ctx->hw_device_ctx = hw_device_ctx;
#else
sw_pix_fmt = decoder_ctx->pix_fmt;
#endif
// thread_count = 0 to leave ffmpeg to determine the optimal thread count
decoder_ctx->thread_count = 0;
check_gte_0(avcodec_open2(decoder_ctx, decoder_codec, nullptr));
}
static auto RoundDivision(int dividend, int divisor) -> int {
return (dividend + (divisor / 2)) / divisor;
}
static auto init_encoder() -> void {
encoder_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
check_true(encoder_codec);
encoder_ctx = avcodec_alloc_context3(encoder_codec);
check_true(encoder_ctx);
encoder_ctx->width = decoder_ctx->width;
encoder_ctx->height = decoder_ctx->height;
// MJPEG codec must use the "deprecated" YUVJ4xxP pixel formats (which actually has better color range than YUV4xxP)
encoder_ctx->pix_fmt = AV_PIX_FMT_YUVJ444P;
encoder_ctx->time_base = AVRational { .num = 1, .den = RoundDivision(in_stream->avg_frame_rate.num, in_stream->avg_frame_rate.den) };
// use best quality
encoder_ctx->flags = AV_CODEC_FLAG_QSCALE;
encoder_ctx->qmin = 1;
encoder_ctx->qmax = 1;
check_gte_0(avcodec_open2(encoder_ctx, encoder_codec, nullptr));
}
static auto init_swscale(enum AVPixelFormat src_pix_fmt) -> void {
sws_ctx = sws_getContext(decoder_ctx->width, decoder_ctx->height, src_pix_fmt,
encoder_ctx->width, encoder_ctx->height, encoder_ctx->pix_fmt,
SWS_LANCZOS, nullptr, nullptr, nullptr);
check_true(sws_ctx);
}
static auto init_frames() -> void {
in_hw_frame = av_frame_alloc();
check_true(in_hw_frame);
#ifdef USE_FFMPEG_HW_ACCEL
in_sw_frame = av_frame_alloc();
check_true(in_sw_frame);
#endif
out_frame = av_frame_alloc();
check_true(out_frame);
decode_packet = av_packet_alloc();
check_true(decode_packet);
encode_packet = av_packet_alloc();
check_true(encode_packet);
out_frame->format = encoder_ctx->pix_fmt;
out_frame->width = encoder_ctx->width;
out_frame->height = encoder_ctx->height;
// instead of directly sending the decoded frame to the encoder, we do a sws scale to convert the frame's pixel format and color range
// prepare a buffer for the output frame to hold the scaled image data
const int out_frame_buf_size = av_image_get_buffer_size(encoder_ctx->pix_fmt, encoder_ctx->width, encoder_ctx->height, 1);
check_gte_0(out_frame_buf_size);
out_frame_buf = reinterpret_cast<uint8_t *>(av_malloc(out_frame_buf_size));
check_true(out_frame_buf);
check_true(av_image_fill_arrays(out_frame->data, out_frame->linesize, out_frame_buf, encoder_ctx->pix_fmt, encoder_ctx->width, encoder_ctx->height, 1) == out_frame_buf_size);
}
static auto ffmpeg_init() -> void {
init_encoder();
init_swscale(sw_pix_fmt);
init_frames();
}
/*
* return: tuple of
* transcode start time, in in_stream's time base
* gap time between each transcode, in in_stream's time base
*/
static auto calculate_seek_time() -> std::tuple<int64_t, int64_t, int64_t> {
const double video_start_time = static_cast<double>(input_format_ctx->start_time) / AV_TIME_BASE;
const double video_duration = static_cast<double>(input_format_ctx->duration) / AV_TIME_BASE;
const double transcode_start_time = video_start_time;
const double transcode_end_time = video_duration;
const int thumbnails_count = 10;
const double frame_gap = (transcode_end_time - transcode_start_time) / thumbnails_count;
return {
static_cast<int64_t>(transcode_start_time * in_stream->time_base.den / in_stream->time_base.num),
static_cast<int64_t>(transcode_end_time * in_stream->time_base.den / in_stream->time_base.num),
static_cast<int64_t>(frame_gap * in_stream->time_base.den / in_stream->time_base.num),
};
}
/*
* return: seeking was successful at the specified time or not.
*/
static auto seek_to_time(int64_t time_in_tb) -> bool {
/*
without AVSEEK_FLAG_ANY, av_seek_frame() always seeks to a keyframe near the specified time
default is to the keyframe AFTER the time
with AVSEEK_FLAG_BACKWARD, seek to the keyframe BEFORE the time
with AVSEEK_FLAG_ANY, it seeks to the frame precisely at the specified time
unfortunately, if such frame is not a keyframe, the frame will not be fully decoded
instead, we use AVSEEK_FLAG_BACKWARD to get to the nearest pre-keyframe, then keep reading frame from decoder
until we reach the specified time (by comparing the frame's pts)
note: AVSEEK_FLAG_FRAME is not available in most decoders
*/
const int seek_ret = av_seek_frame(input_format_ctx, in_stream->index, time_in_tb, AVSEEK_FLAG_BACKWARD);
if (seek_ret >= 0) {
// must flush after seeking to make the new frame available
avcodec_flush_buffers(decoder_ctx);
}
return seek_ret >= 0;
}
/*
* return: able to read more frame from input or not.
*/
static auto read_frame_until_time(int64_t time_in_tb) -> bool {
while (true) {
if (av_read_frame(input_format_ctx, decode_packet) < 0) {
return false;
}
if (decode_packet->stream_index != in_stream->index) {
goto cleanup;
}
if (avcodec_send_packet(decoder_ctx, decode_packet) < 0) {
goto cleanup;
}
if (avcodec_receive_frame(decoder_ctx, in_hw_frame) < 0) {
goto cleanup;
}
#ifdef USE_FFMPEG_HW_ACCEL
check_true(in_hw_frame->format == hw_pix_fmt);
#endif
if (in_hw_frame->best_effort_timestamp >= time_in_tb) {
break;
}
cleanup:
av_frame_unref(in_hw_frame);
av_packet_unref(decode_packet);
}
return true;
}
/*
* return: transcoding was successful or not.
*/
static auto transcode_frame() -> bool {
#ifdef USE_FFMPEG_HW_ACCEL
check_gte_0(av_hwframe_transfer_data(in_sw_frame, in_hw_frame, 0));
#else
in_sw_frame = in_hw_frame;
#endif
check_true(sws_scale(sws_ctx, in_sw_frame->data, in_sw_frame->linesize, 0, in_sw_frame->height, out_frame->data, out_frame->linesize) == out_frame->height);
av_frame_unref(in_sw_frame);
check_gte_0(avcodec_send_frame(encoder_ctx, out_frame));
check_gte_0(avcodec_receive_packet(encoder_ctx, encode_packet));
FILE *out_file;
static int out_file_idx = 0;
fopen_s(&out_file, std::format(R"(output/{}.jpg)", out_file_idx++).c_str(), "wb");
fwrite(encode_packet->data, 1, encode_packet->size, out_file);
fclose(out_file);
av_packet_unref(encode_packet);
return true;
}
static auto ffmpeg_transcode() -> void {
auto [curr_timestamp_in_tb, transcode_end_in_tb, frame_gap_in_tb] = calculate_seek_time();
int frameCount = 0;
bool do_seek = true;
while (true) {
if (do_seek && !seek_to_time(curr_timestamp_in_tb)) {
break;
}
if (!read_frame_until_time(curr_timestamp_in_tb)) {
break;
}
do_seek = transcode_frame();
av_packet_unref(decode_packet);
if (do_seek) {
curr_timestamp_in_tb += frame_gap_in_tb;
frameCount += 1;
if (curr_timestamp_in_tb >= transcode_end_in_tb) {
break;
}
}
}
// flush the codec contexts with null packet
avcodec_send_frame(encoder_ctx, nullptr);
avcodec_send_packet(decoder_ctx, nullptr);
}
static auto ffmpeg_cleanup() -> void {
av_packet_free(&encode_packet);
av_packet_free(&decode_packet);
av_frame_free(&out_frame);
av_frame_free(&in_sw_frame);
#ifdef USE_FFMPEG_HW_ACCEL
av_frame_free(&in_hw_frame);
#endif
av_free(out_frame_buf);
sws_freeContext(sws_ctx);
avcodec_free_context(&encoder_ctx);
avcodec_free_context(&decoder_ctx);
avformat_close_input(&input_format_ctx);
}
auto main(int argc, char **argv) -> int {
ffmpeg_open_input(argv[1]);
ffmpeg_init();
ffmpeg_transcode();
ffmpeg_cleanup();
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment