Skip to content

Instantly share code, notes, and snippets.

@devymex
Created May 22, 2022 07:35
Show Gist options
  • Save devymex/0ab6fdf990e64cdbbe2e660b8c3315d3 to your computer and use it in GitHub Desktop.
Save devymex/0ab6fdf990e64cdbbe2e660b8c3315d3 to your computer and use it in GitHub Desktop.
Video encoder with GPU
#include "NvEncoder/NvEncoderCuda.h"
#include "NvEncoder/NvEncoderCLIOptions.h"
#include "NvEncoder/NvCodecUtils.h"
#include <cuda_runtime.h>
extern "C" {
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
}
#include <opencv2/opencv.hpp>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/cudaarithm.hpp>
#include <opencv2/cudawarping.hpp>
#include <tbb/parallel_for.h>
#include <glog/logging.h>
#include <string>
#include <fstream>
#include <chrono>
class Encoder {
private:
std::unique_ptr<NvEncoderCuda> m_pEnc;
CUcontext m_cuContext;
std::string m_strVideoFile;
std::ofstream m_OutFile;
std::vector<NvPacket> m_NvPackets;
AVCodecContext *m_pAvCtx = nullptr;
AVFormatContext *m_pOutCtx = nullptr;
AVStream *m_pAvStream = nullptr;
uint32_t m_nFPS = 0;
double m_dTimeBase = 0;
size_t m_nTotalSize = 0;
size_t m_iFrame = 0;
public:
Encoder(int32_t nGpuID, std::string strVideoFile, cv::Size imageSize, uint32_t nFPS) {
int nBitRate = 2000;
auto pOutFormat = "mp4";
auto inFormat = NV_ENC_BUFFER_FORMAT_ARGB;
m_strVideoFile = strVideoFile;
m_nFPS = nFPS;
std::string strCliParams = "-codec h264 -profile main -fps " + std::to_string(nFPS);
av_register_all();
avcodec_register_all();
auto pFmt = __FindFormat(pOutFormat);
CHECK(!(pFmt->flags & AVFMT_NOFILE));
AVCodec* pCodec = avcodec_find_encoder(pFmt->video_codec);
CHECK_NOTNULL(pCodec);
m_pAvCtx = avcodec_alloc_context3(pCodec);
CHECK_NOTNULL(m_pAvCtx);
m_pAvCtx->max_b_frames = 2;
m_pAvCtx->gop_size = 12;
m_pAvCtx->time_base = AVRational{ 1, 90000 };
m_pAvCtx->framerate = AVRational{ (int)nFPS, 1 };
if (pFmt->video_codec == AV_CODEC_ID_H264 ||
pFmt->video_codec == AV_CODEC_ID_H265) {
av_opt_set(m_pAvCtx, "preset", "ultrafast", 0);
}
m_dTimeBase = av_q2d(av_inv_q(av_mul_q(
m_pAvCtx->framerate, m_pAvCtx->time_base)));
CHECK_EQ(avformat_alloc_output_context2(&m_pOutCtx, pFmt,
nullptr, m_strVideoFile.c_str()), 0);
m_pOutCtx->oformat->flags |= AVFMT_TS_NONSTRICT;
m_pAvStream = avformat_new_stream(m_pOutCtx, nullptr);
CHECK_NOTNULL(m_pAvStream);
m_pAvStream->codecpar->codec_id = pFmt->video_codec;
m_pAvStream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
m_pAvStream->codecpar->codec_tag = 0;
m_pAvStream->codecpar->width = imageSize.width;
m_pAvStream->codecpar->height = imageSize.height;
m_pAvStream->codecpar->format = AV_PIX_FMT_YUV420P;
m_pAvStream->codecpar->bit_rate = nBitRate * 1000;
m_pAvStream->time_base = m_pAvCtx->time_base;
avcodec_parameters_to_context(m_pAvCtx, m_pAvStream->codecpar);
avcodec_parameters_from_context(m_pAvStream->codecpar, m_pAvCtx);
CHECK_EQ(avio_open(&m_pOutCtx->pb, m_strVideoFile.c_str(), AVIO_FLAG_WRITE), 0);
CHECK_EQ(avformat_write_header(m_pOutCtx, nullptr), 0);
CUdevice cuDevice = 0;
ck(cuDeviceGet(&cuDevice, nGpuID));
ck(cuCtxCreate(&m_cuContext, 0, cuDevice));
m_pEnc.reset(new NvEncoderCuda(m_cuContext,
imageSize.width, imageSize.height, inFormat));
NV_ENC_INITIALIZE_PARAMS initializeParams = { NV_ENC_INITIALIZE_PARAMS_VER };
NV_ENC_CONFIG encodeConfig = { NV_ENC_CONFIG_VER };
initializeParams.encodeConfig = &encodeConfig;
NvEncoderInitParam cliOptions(strCliParams.c_str());
m_pEnc->CreateDefaultEncoderParams(&initializeParams,
cliOptions.GetEncodeGUID(), cliOptions.GetPresetGUID(),
cliOptions.GetTuningInfo());
cliOptions.SetInitParams(&initializeParams, inFormat);
m_pEnc->CreateEncoder(&initializeParams);
}
~Encoder() {
Release();
}
void Release() {
if (m_pEnc) {
m_pEnc->EndEncode(m_NvPackets);
for (auto &nvPkt : m_NvPackets) {
if (!nvPkt.buf.empty()) {
__WriteOut(nvPkt);
}
}
m_pEnc->DestroyEncoder();
cuCtxDestroy(m_cuContext);
m_pEnc.reset(nullptr);
}
if (m_pAvCtx) {
CHECK_EQ(av_write_trailer(m_pOutCtx), 0);
CHECK_EQ(avio_close(m_pOutCtx->pb), 0);
avcodec_free_context(&m_pAvCtx);
avformat_free_context(m_pOutCtx);
m_pAvCtx = nullptr;
}
}
void EncodeFrame(cv::cuda::GpuMat image) {
CHECK_NOTNULL(m_pEnc);
const NvEncInputFrame *encoderInputFrame = m_pEnc->GetNextInputFrame();
NvEncoderCuda::CopyToDeviceFrame(m_cuContext, image.cudaPtr(), image.step,
(CUdeviceptr)encoderInputFrame->inputPtr, (int)encoderInputFrame->pitch,
m_pEnc->GetEncodeWidth(), m_pEnc->GetEncodeHeight(),
CU_MEMORYTYPE_DEVICE, encoderInputFrame->bufferFormat,
encoderInputFrame->chromaOffsets, encoderInputFrame->numChromaPlanes
);
m_pEnc->EncodeFrame(m_NvPackets);
for (auto &nvPkt : m_NvPackets) {
if (!nvPkt.buf.empty()) {
__WriteOut(nvPkt);
}
}
}
private:
AVOutputFormat* __FindFormat(const std::string &strFmtName) {
AVOutputFormat *pFmt = nullptr;
while (true) {
pFmt = av_oformat_next(pFmt);
if (nullptr == pFmt || strFmtName == pFmt->name) {
break;
}
}
return pFmt;
}
uint32_t GetHeaderSize(const std::vector<uint8_t> &buf) {
CHECK_GT(buf.size(), 5);
auto *pBuf = buf.data();
uint32_t iEnd = 0;
for (; iEnd < buf.size() - 5; ++iEnd) {
auto pCur = pBuf + iEnd;
const char *pFlag = "\x00\x00\x00\x01";
if (*(uint32_t*)pCur == *(uint32_t*)pFlag) {
if (pCur[4] == 0x06 || pCur[4] == 0x65 ||
(iEnd < buf.size() - 6 && pCur[4] == 0x26 && pCur[5])) {
break;
}
}
}
CHECK_GT(iEnd, 0);
CHECK_LE(iEnd, buf.size() - 5);
return iEnd;
}
void __WriteOut(NvPacket &nvPkt) {
AVPacket pkt = { 0 };
pkt.size = nvPkt.buf.size();
CHECK_EQ(av_new_packet(&pkt, pkt.size + AV_INPUT_BUFFER_PADDING_SIZE), 0);
memcpy(pkt.data, nvPkt.buf.data(), nvPkt.buf.size());
pkt.pts = pkt.dts = m_iFrame;
if (m_iFrame % 2) {
pkt.pts += 2;
}
pkt.dts *= m_dTimeBase;
pkt.pts *= m_dTimeBase;
++m_iFrame;
av_write_frame(m_pOutCtx, &pkt);
av_packet_unref(&pkt);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment