Skip to content

Instantly share code, notes, and snippets.

@adworacz
Last active December 17, 2015 21:19
Show Gist options
  • Save adworacz/5673394 to your computer and use it in GitHub Desktop.
Save adworacz/5673394 to your computer and use it in GitHub Desktop.
Fix Lut2 for multi bitdepth handling.
diff --git doc/functions/lut2.rst doc/functions/lut2.rst
index 4aa2933..6ee4560 100644
--- doc/functions/lut2.rst
+++ doc/functions/lut2.rst
@@ -1,15 +1,24 @@
Lut2
=======
-.. function:: Lut2(clip[] clips, int[] lut, int[] planes)
+.. function:: Lut2(clip[] clips, int[] lut, int[] planes[, int bits])
:module: std
Applies a lut that takes the pixel values of two clips into account. The lut needs to contain 2^(clip1.bits_per_sample + clip2.bits_per_sample) entries and will be applied to the planes listed in *planes*. The other planes will simply be passed through unchanged.
+ Lut2 also takes an optional bit depth parameter, *bits*. *bits* defaults to the bit depth of the first input clip, and specifies the bit depth of the output clip. The user is responsible for understanding the effects of bit depth conversion, specifically from higher bit depths to lower bit depths, as no scaling or clamping is applied.
+
How to average 2 clips::
lut = []
- for y in range(2**clipx.format.bits_per_sample):
- for x in range(2**clipy.format.bits_per_sample):
+ for y in range(2 ** clipy.format.bits_per_sample):
+ for x in range(2 ** clipx.format.bits_per_sample):
lut.append((x + y)//2)
Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2])
+
+ How to average 2 clips with a 10-bit output::
+ lut = []
+ for y in range(2 ** clipy.format.bits_per_sample):
+ for x in range(2 ** clipx.format.bits_per_sample):
+ lut.append((x + y)//2)
+ Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=10)
diff --git src/core/simplefilters.c src/core/simplefilters.c
index 3d5d494..4832f22 100644
--- src/core/simplefilters.c
+++ src/core/simplefilters.c
@@ -1997,13 +1997,26 @@ static void VS_CC lutCreate(const VSMap *in, VSMap *out, void *userData, VSCore
typedef struct {
VSNodeRef *node[2];
const VSVideoInfo *vi[2];
+ VSVideoInfo *vi_out;
void *lut;
int process[3];
} Lut2Data;
+#define LUT2_PROCESS(X_CAST, Y_CAST, DST_CAST) \
+ do { \
+ for (hl = 0; hl < h; hl++) { \
+ for (x = 0; x < w; x++) { \
+ ((DST_CAST *)dstp)[x] = lut[(((Y_CAST *)srcpy)[x] << shift) + ((X_CAST *)srcpx)[x]]; \
+ } \
+ dstp += dst_stride; \
+ srcpx += srcx_stride; \
+ srcpy += srcy_stride; \
+ } \
+ } while(0)
+
static void VS_CC lut2Init(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) {
Lut2Data *d = (Lut2Data *) * instanceData;
- vsapi->setVideoInfo(d->vi[0], 1, node);
+ vsapi->setVideoInfo(d->vi_out, 1, node);
vsapi->clearMap(in);
}
@@ -2017,7 +2030,7 @@ static const VSFrameRef *VS_CC lut2Getframe(int n, int activationReason, void **
int plane;
const VSFrameRef *srcx = vsapi->getFrameFilter(n, d->node[0], frameCtx);
const VSFrameRef *srcy = vsapi->getFrameFilter(n, d->node[1], frameCtx);
- const VSFormat *fi = vsapi->getFrameFormat(srcx);
+ const VSFormat *fi = d->vi_out->format;
const int pl[] = {0, 1, 2};
const VSFrameRef *fr[] = {d->process[0] ? 0 : srcx, d->process[1] ? 0 : srcx, d->process[2] ? 0 : srcx};
VSFrameRef *dst = vsapi->newVideoFrame2(fi, vsapi->getFrameWidth(srcx, 0), vsapi->getFrameHeight(srcx, 0), fr, pl, srcx, core);
@@ -2025,13 +2038,14 @@ static const VSFrameRef *VS_CC lut2Getframe(int n, int activationReason, void **
for (plane = 0; plane < fi->numPlanes; plane++) {
const uint8_t *srcpx = vsapi->getReadPtr(srcx, plane);
const uint8_t *srcpy = vsapi->getReadPtr(srcy, plane);
- int src_stride = vsapi->getStride(srcx, plane);
+ int srcx_stride = vsapi->getStride(srcx, plane);
+ int srcy_stride = vsapi->getStride(srcy, plane);
uint8_t *dstp = vsapi->getWritePtr(dst, plane);
int dst_stride = vsapi->getStride(dst, plane);
int h = vsapi->getFrameHeight(srcx, plane);
if (d->process[plane]) {
- int shift = fi->bitsPerSample;
+ int shift = d->vi[0]->format->bitsPerSample;
int hl;
int w = vsapi->getFrameWidth(srcx, plane);
int x;
@@ -2039,24 +2053,26 @@ static const VSFrameRef *VS_CC lut2Getframe(int n, int activationReason, void **
if (fi->bytesPerSample == 1) {
const uint8_t *lut = (uint8_t *)d->lut;
- for (hl = 0; hl < h; hl++) {
- for (x = 0; x < w; x++)
- dstp[x] = lut[(srcpy[x] << shift) + srcpx[x]];
-
- dstp += dst_stride;
- srcpx += src_stride;
- srcpy += src_stride;
+ if (d->vi[0]->format->bitsPerSample == 8 && d->vi[1]->format->bitsPerSample == 8) {
+ LUT2_PROCESS(uint8_t, uint8_t, uint8_t);
+ } else if (d->vi[0]->format->bitsPerSample == 8 && d->vi[1]->format->bitsPerSample > 8) {
+ LUT2_PROCESS(uint8_t, uint16_t, uint8_t);
+ } else if (d->vi[0]->format->bitsPerSample > 8 && d->vi[1]->format->bitsPerSample == 8) {
+ LUT2_PROCESS(uint16_t, uint8_t, uint8_t);
+ } else {
+ LUT2_PROCESS(uint16_t, uint16_t, uint8_t);
}
} else {
const uint16_t *lut = (uint16_t *)d->lut;
- for (hl = 0; hl < h; hl++) {
- for (x = 0; x < w; x++)
- ((uint16_t *)dstp)[x] = lut[(srcpy[x] << shift) + srcpx[x]];
-
- dstp += dst_stride;
- srcpx += src_stride;
- srcpy += src_stride;
+ if (d->vi[0]->format->bitsPerSample == 8 && d->vi[1]->format->bitsPerSample == 8) {
+ LUT2_PROCESS(uint8_t, uint8_t, uint16_t);
+ } else if (d->vi[0]->format->bitsPerSample == 8 && d->vi[1]->format->bitsPerSample > 8) {
+ LUT2_PROCESS(uint8_t, uint16_t, uint16_t);
+ } else if (d->vi[0]->format->bitsPerSample > 8 && d->vi[1]->format->bitsPerSample == 8) {
+ LUT2_PROCESS(uint16_t, uint8_t, uint16_t);
+ } else {
+ LUT2_PROCESS(uint16_t, uint16_t, uint16_t);
}
}
}
@@ -2135,9 +2151,26 @@ static void VS_CC lut2Create(const VSMap *in, VSMap *out, void *userData, VSCore
RETERROR("Lut2: bad lut length");
}
- d.lut = malloc(d.vi[0]->format->bytesPerSample * n);
+ int err;
+ int bits = int64ToIntS(vsapi->propGetInt(in, "bits", 0, &err));
+ if (bits == 0) {
+ bits = d.vi[0]->format->bitsPerSample;
+ } else if (bits < 8 || bits > 16) {
+ vsapi->freeNode(d.node[0]);
+ vsapi->freeNode(d.node[1]);
+ RETERROR("Lut2: Output format must be between 8 and 16 bits.");
+ }
+
+ d.vi_out = (VSVideoInfo *)malloc(sizeof(VSVideoInfo));
+ *d.vi_out = *d.vi[0];
+ d.vi_out->format = vsapi->registerFormat(d.vi[0]->format->colorFamily, d.vi[0]->format->sampleType, bits, d.vi[0]->format->subSamplingW, d.vi[0]->format->subSamplingH, core);
+
+ if (bits == 8)
+ d.lut = malloc(sizeof(uint8_t) * n);
+ else
+ d.lut = malloc(sizeof(uint16_t) * n);
- if (d.vi[0]->format->bytesPerSample == 1) {
+ if (bits == 8) {
uint8_t *lut = d.lut;
for (i = 0; i < n; i++) {
@@ -3400,7 +3433,7 @@ void VS_CC stdlibInitialize(VSConfigPlugin configFunc, VSRegisterFunction regist
registerFunc("BlankClip", "clip:clip:opt;width:int:opt;height:int:opt;format:int:opt;length:int:opt;fpsnum:int:opt;fpsden:int:opt;color:float[]:opt;", blankClipCreate, 0, plugin);
registerFunc("AssumeFPS", "clip:clip;src:clip:opt;fpsnum:int:opt;fpsden:int:opt;", assumeFPSCreate, 0, plugin);
registerFunc("Lut", "clip:clip;lut:int[];planes:int[];", lutCreate, 0, plugin);
- registerFunc("Lut2", "clips:clip[];lut:int[];planes:int[];", lut2Create, 0, plugin);
+ registerFunc("Lut2", "clips:clip[];lut:int[];planes:int[];bits:int:opt;", lut2Create, 0, plugin);
registerFunc("SelectClip", "clips:clip[];src:clip[];selector:func;", selectClipCreate, 0, plugin);
registerFunc("ModifyFrame", "clips:clip[];selector:func;", modifyFrameCreate, 0, plugin);
registerFunc("Transpose", "clip:clip;", transposeCreate, 0, plugin);
diff --git src/core/vsapi.cpp src/core/vsapi.cpp
index de3a5c7..e649f9c 100644
--- src/core/vsapi.cpp
+++ src/core/vsapi.cpp
@@ -33,8 +33,8 @@ static const VSFormat *VS_CC getFormatPreset(int id, VSCore *core) {
return core->getFormatPreset((VSPresetFormat)id);
}
-static const VSFormat *VS_CC registerFormat(int colorFamily, int sampleType, int bytesPerSample, int subSamplingW, int subSamplingH, VSCore *core) {
- return core->registerFormat((VSColorFamily)colorFamily, (VSSampleType)sampleType, bytesPerSample, subSamplingW, subSamplingH);
+static const VSFormat *VS_CC registerFormat(int colorFamily, int sampleType, int bitsPerSample, int subSamplingW, int subSamplingH, VSCore *core) {
+ return core->registerFormat((VSColorFamily)colorFamily, (VSSampleType)sampleType, bitsPerSample, subSamplingW, subSamplingH);
}
static const VSFrameRef *VS_CC cloneFrameRef(const VSFrameRef *frame) {
diff --git test/filter_test.py test/filter_test.py
index 58cc9f4..7256d5d 100644
--- test/filter_test.py
+++ test/filter_test.py
@@ -1,15 +1,16 @@
import unittest
import vapoursynth as vs
+
class FilterTestSequence(unittest.TestCase):
def setUp(self):
self.core = vs.Core()
- def checkDifference(self, cpu, gpu):
- diff = self.core.std.PlaneDifference([cpu, gpu], 0, prop="PlaneDifference0")
- diff = self.core.std.PlaneDifference([diff, gpu], 1, prop="PlaneDifference1")
- diff = self.core.std.PlaneDifference([diff, gpu], 2, prop="PlaneDifference2")
+ def checkDifference(self, original, processed):
+ diff = self.core.std.PlaneDifference([original, processed], 0, prop="PlaneDifference0")
+ diff = self.core.std.PlaneDifference([diff, processed], 1, prop="PlaneDifference1")
+ diff = self.core.std.PlaneDifference([diff, processed], 2, prop="PlaneDifference2")
for i in range(diff.num_frames):
frame = diff.get_frame(i)
@@ -28,5 +29,89 @@ class FilterTestSequence(unittest.TestCase):
self.checkDifference(clip, ret)
+ def testLUT2_8Bit(self):
+ clipx = self.core.std.BlankClip(format=vs.YUV420P8, color=[69, 242, 115])
+ clipy = self.core.std.BlankClip(format=vs.YUV420P8, color=[115, 103, 205])
+
+ lut = []
+ for y in range(2 ** clipy.format.bits_per_sample):
+ for x in range(2 ** clipx.format.bits_per_sample):
+ lut.append(x)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=8)
+ self.checkDifference(clipx, ret)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=10)
+ comp = self.core.std.BlankClip(format=vs.YUV420P10, color=[69, 242, 115])
+ self.checkDifference(comp, ret)
+
+ def testLUT2_8Bit_10Bit(self):
+ # Check 8-bit, 10-bit source.
+ clipx = self.core.std.BlankClip(format=vs.YUV420P8, color=[69, 242, 115])
+ clipy = self.core.std.BlankClip(format=vs.YUV420P10, color=[15, 900, 442])
+
+ lut = []
+ for y in range(2 ** clipy.format.bits_per_sample):
+ for x in range(2 ** clipx.format.bits_per_sample):
+ lut.append(x)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=8)
+ self.checkDifference(clipx, ret)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=10)
+ comp = self.core.std.BlankClip(format=vs.YUV420P10, color=[69, 242, 115])
+ self.checkDifference(comp, ret)
+
+ # Check 10-bit, 8-bit source.
+ # Colors are 8-bit levels for 10-bit clip so that we can verify output.
+ clipx = self.core.std.BlankClip(format=vs.YUV420P10, color=[15, 235, 115])
+ clipy = self.core.std.BlankClip(format=vs.YUV420P8, color=[69, 242, 115])
+
+ lut = []
+ for y in range(2 ** clipy.format.bits_per_sample):
+ for x in range(2 ** clipx.format.bits_per_sample):
+ lut.append(x)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=8)
+ comp = self.core.std.BlankClip(format=vs.YUV420P8, color=[15, 235, 115])
+ self.checkDifference(comp, ret)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=10)
+ self.checkDifference(clipx, ret)
+
+ def testLUT2_9Bit_10Bit(self):
+ # Check 9-bit, 10-bit source.
+ clipx = self.core.std.BlankClip(format=vs.YUV420P9, color=[384, 10, 500])
+ clipy = self.core.std.BlankClip(format=vs.YUV420P10, color=[15, 600, 900])
+
+ lut = []
+ for y in range(2 ** clipy.format.bits_per_sample):
+ for x in range(2 ** clipx.format.bits_per_sample):
+ lut.append(x)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=9)
+ self.checkDifference(clipx, ret)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=8)
+ comp = self.core.std.BlankClip(format=vs.YUV420P8, color=[128, 10, 244])
+ self.checkDifference(comp, ret)
+
+ # Check 10-bit, 9-bit source.
+ clipx = self.core.std.BlankClip(format=vs.YUV420P10, color=[384, 10, 500])
+ clipy = self.core.std.BlankClip(format=vs.YUV420P9, color=[15, 384, 511])
+
+ lut = []
+ for y in range(2 ** clipy.format.bits_per_sample):
+ for x in range(2 ** clipx.format.bits_per_sample):
+ lut.append(x)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=9)
+ comp = self.core.std.BlankClip(format=vs.YUV420P9, color=[384, 10, 500])
+ self.checkDifference(comp, ret)
+
+ ret = self.core.std.Lut2(clips=[clipx, clipy], lut=lut, planes=[0, 1, 2], bits=8)
+ comp = self.core.std.BlankClip(format=vs.YUV420P8, color=[128, 10, 244])
+ self.checkDifference(comp, ret)
+
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment