Skip to content

Instantly share code, notes, and snippets.

@iwalton3
Last active April 15, 2020 13:07
Show Gist options
  • Save iwalton3/a91fee4fe68a3a7983812e6d54066063 to your computer and use it in GitHub Desktop.
Save iwalton3/a91fee4fe68a3a7983812e6d54066063 to your computer and use it in GitHub Desktop.
Patch in Nvenc yuv420p10le on latest ffmpeg.

These instructions will allow you to encode yuv420p10le with ffmpeg while using nvenc. Note that other pixel formats have not been tested and may be broken. I do not suggest using this as your main ffmpeg installation. You also need to install the nv-codec-headers and other dependencies as required by ./configure when building.

git clone https://github.com/FFmpeg/FFmpeg
cd FFmpeg
git apply nvenc-yuv420p10le.patch
./configure pkg_config='pkg-config --static' --prefix=/usr/local --extra-version=ntd_20150126 --disable-shared --enable-static --enable-gpl --enable-pthreads --enable-nonfree --enable-fontconfig --enable-libfreetype --enable-libass --enable-libfdk-aac  --enable-libmp3lame --enable-libopus --enable-libtheora --enable-libvorbis --enable-libvpx --enable-libx264 --enable-filters --enable-runtime-cpudetect
make -j8
# Do not run sudo make install, just use the ffmpeg as-is.

You can now use the ffmpeg in the folder using a command like this. Note that I haven't gotten debanding working yet.

./ffmpeg -i test1-orig.mkv -c:a copy -vf deband=r=32:1thr=0.01:2thr=0.01:3thr=0.01:4thr=0.01 -profile:v main10 -c:v hevc_nvenc -pix_fmt yuv420p10le -preset hq -cq 27 -rc vbr -qmin 27 -qmax 27 -b:v 0 -bf 3 -tune animation test1-nvenc.mkv

This patch is based on:

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 9a96bf2bba..65247475a9 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -41,9 +41,11 @@
const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
AV_PIX_FMT_YUV420P,
+ AV_PIX_FMT_YUV420P10LE,
AV_PIX_FMT_NV12,
AV_PIX_FMT_P010,
AV_PIX_FMT_YUV444P,
+ AV_PIX_FMT_YUV444P10LE,
AV_PIX_FMT_P016, // Truncated to 10bits
AV_PIX_FMT_YUV444P16, // Truncated to 10bits
AV_PIX_FMT_0RGB32,
@@ -1100,6 +1102,12 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
break;
}
+ // force setting profile as main10 if input is AV_PIX_FMT_YUVXXXP10LE
+ if (ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE) {
+ cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+ avctx->profile = FF_PROFILE_HEVC_MAIN_10;
+ }
+
// force setting profile as main10 if input is 10 bit
if (IS_10BIT(ctx->data_pix_fmt)) {
cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
@@ -1112,9 +1120,9 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
avctx->profile = FF_PROFILE_HEVC_REXT;
}
- hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1;
+ hevc->chromaFormatIDC = ctx->data_pix_fmt == AV_PIX_FMT_YUV444P || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE ? 3 : 1;
- hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0;
+ hevc->pixelBitDepthMinus8 = ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE ? 2 : 0;
hevc->level = ctx->level;
@@ -1297,6 +1305,8 @@ static NV_ENC_BUFFER_FORMAT nvenc_map_buffer_format(enum AVPixelFormat pix_fmt)
switch (pix_fmt) {
case AV_PIX_FMT_YUV420P:
return NV_ENC_BUFFER_FORMAT_YV12_PL;
+ case AV_PIX_FMT_YUV420P10LE:
+ return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
case AV_PIX_FMT_NV12:
return NV_ENC_BUFFER_FORMAT_NV12_PL;
case AV_PIX_FMT_P010:
@@ -1304,6 +1314,8 @@ static NV_ENC_BUFFER_FORMAT nvenc_map_buffer_format(enum AVPixelFormat pix_fmt)
return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
case AV_PIX_FMT_YUV444P:
return NV_ENC_BUFFER_FORMAT_YUV444_PL;
+ case AV_PIX_FMT_YUV444P10LE:
+ return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
case AV_PIX_FMT_YUV444P16:
return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
case AV_PIX_FMT_0RGB32:
@@ -1576,32 +1588,129 @@ static NvencSurface *get_free_frame(NvencContext *ctx)
return tmp_surf;
}
-static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
- NV_ENC_LOCK_INPUT_BUFFER *lock_buffer_params, const AVFrame *frame)
+static void copy_single_10bit_plane(uint8_t *dst, int dst_linesize,
+ const uint8_t *src, int src_linesize,
+ int width, int height)
{
- int dst_linesize[4] = {
- lock_buffer_params->pitch,
- lock_buffer_params->pitch,
- lock_buffer_params->pitch,
- lock_buffer_params->pitch
- };
- uint8_t *dst_data[4];
- int ret;
-
- if (frame->format == AV_PIX_FMT_YUV420P)
- dst_linesize[1] = dst_linesize[2] >>= 1;
+ if (!dst || !src)
+ return;
+ av_assert0(abs(src_linesize) >= width << 1);
+ av_assert0(abs(dst_linesize) >= width << 1);
+ for (;height > 0; height--) {
+ uint16_t* tdst = (uint16_t*)dst;
+ uint16_t* tsrc = (uint16_t*)src;
+ for (int w = width; w > 0; w--) {
+ *tdst++ = *tsrc++ << 6;
+ }
+ dst += dst_linesize;
+ src += src_linesize;
+ }
+}
- ret = av_image_fill_pointers(dst_data, frame->format, nv_surface->height,
- lock_buffer_params->bufferDataPtr, dst_linesize);
- if (ret < 0)
- return ret;
+static void interleave_10bit_planes(uint8_t *dst, int dst_linesize,
+ const uint8_t *src1, int src1_linesize,
+ const uint8_t *src2, int src2_linesize,
+ int width, int height)
+{
+ if (!dst || !src1 || !src2)
+ return;
+ av_assert0(abs(src1_linesize) >= width);
+ av_assert0(abs(src2_linesize) >= width);
+ av_assert0(abs(dst_linesize) >= width << 1);
+ for (;height > 0; height--) {
+ uint16_t* tdst = (uint16_t*)dst;
+ uint16_t* tsrc1 = (uint16_t*)src1;
+ uint16_t* tsrc2 = (uint16_t*)src2;
+ for (int w = width; w > 0; w-=2) {
+ *tdst++ = *tsrc1++ << 6;
+ *tdst++ = *tsrc2++ << 6;
+ }
+ dst += dst_linesize;
+ src1 += src1_linesize;
+ src2 += src2_linesize;
+ }
+}
- if (frame->format == AV_PIX_FMT_YUV420P)
- FFSWAP(uint8_t*, dst_data[1], dst_data[2]);
- av_image_copy(dst_data, dst_linesize,
- (const uint8_t**)frame->data, frame->linesize, frame->format,
- avctx->width, avctx->height);
+static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
+ NV_ENC_LOCK_INPUT_BUFFER *lock_buffer_params, const AVFrame *frame)
+{
+ uint8_t *buf = lock_buffer_params->bufferDataPtr;
+ int off = nv_surface->height * lock_buffer_params->pitch;
+
+ if (frame->format == AV_PIX_FMT_YUV420P) {
+ av_image_copy_plane(buf, lock_buffer_params->pitch,
+ frame->data[0], frame->linesize[0],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ av_image_copy_plane(buf, lock_buffer_params->pitch >> 1,
+ frame->data[2], frame->linesize[2],
+ avctx->width >> 1, avctx->height >> 1);
+
+ buf += off >> 2;
+
+ av_image_copy_plane(buf, lock_buffer_params->pitch >> 1,
+ frame->data[1], frame->linesize[1],
+ avctx->width >> 1, avctx->height >> 1);
+ } else if (frame->format == AV_PIX_FMT_YUV420P10LE) {
+ copy_single_10bit_plane(buf, lock_buffer_params->pitch,
+ frame->data[0], frame->linesize[0],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ interleave_10bit_planes(buf, lock_buffer_params->pitch,
+ frame->data[1], frame->linesize[1],
+ frame->data[2], frame->linesize[2],
+ avctx->width, avctx->height >> 1);
+ } else if (frame->format == AV_PIX_FMT_NV12) {
+ av_image_copy_plane(buf, lock_buffer_params->pitch,
+ frame->data[0], frame->linesize[0],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ av_image_copy_plane(buf, lock_buffer_params->pitch,
+ frame->data[1], frame->linesize[1],
+ avctx->width, avctx->height >> 1);
+ } else if (frame->format == AV_PIX_FMT_YUV444P) {
+ av_image_copy_plane(buf, lock_buffer_params->pitch,
+ frame->data[0], frame->linesize[0],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ av_image_copy_plane(buf, lock_buffer_params->pitch,
+ frame->data[1], frame->linesize[1],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ av_image_copy_plane(buf, lock_buffer_params->pitch,
+ frame->data[2], frame->linesize[2],
+ avctx->width, avctx->height);
+ } else if (frame->format == AV_PIX_FMT_YUV444P10LE) {
+ copy_single_10bit_plane(buf, lock_buffer_params->pitch,
+ frame->data[0], frame->linesize[0],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ copy_single_10bit_plane(buf, lock_buffer_params->pitch,
+ frame->data[1], frame->linesize[1],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ copy_single_10bit_plane(buf, lock_buffer_params->pitch,
+ frame->data[2], frame->linesize[2],
+ avctx->width, avctx->height);
+ } else {
+ av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
+ return AVERROR(EINVAL);
+ }
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment