Created
December 24, 2017 18:49
-
-
Save gatoatigrado/5ed4f608f06a5e68586ad3e850478b10 to your computer and use it in GitHub Desktop.
LCMS manual function inlining
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/cmslut.c b/src/cmslut.c | |
index 3c0c245..6c7edfd 100644 | |
--- a/src/cmslut.c | |
+++ b/src/cmslut.c | |
@@ -24,6 +24,9 @@ | |
//--------------------------------------------------------------------------------- | |
// | |
+#include <stdio.h> | |
+#include <stdint-gcc.h> | |
+ | |
#include "lcms2_internal.h" | |
@@ -78,28 +81,6 @@ cmsStage* CMSEXPORT cmsStageAllocIdentity(cmsContext ContextID, cmsUInt32Number | |
NULL); | |
} | |
-// Conversion functions. From floating point to 16 bits | |
-static | |
-void FromFloatTo16(const cmsFloat32Number In[], cmsUInt16Number Out[], cmsUInt32Number n) | |
-{ | |
- cmsUInt32Number i; | |
- | |
- for (i=0; i < n; i++) { | |
- Out[i] = _cmsQuickSaturateWord(In[i] * 65535.0); | |
- } | |
-} | |
- | |
-// From 16 bits to floating point | |
-static | |
-void From16ToFloat(const cmsUInt16Number In[], cmsFloat32Number Out[], cmsUInt32Number n) | |
-{ | |
- cmsUInt32Number i; | |
- | |
- for (i=0; i < n; i++) { | |
- Out[i] = (cmsFloat32Number) In[i] / 65535.0F; | |
- } | |
-} | |
- | |
// This function is quite useful to analyze the structure of a LUT and retrieve the MPE elements | |
// that conform the LUT. It should be called with the LUT, the number of expected elements and | |
@@ -308,7 +289,6 @@ cmsStage* CMSEXPORT _cmsStageAllocIdentityCurves(cmsContext ContextID, cmsUInt32 | |
// Special care should be taken here because precision loss. A temporary cmsFloat64Number buffer is being used | |
-static | |
void EvaluateMatrix(const cmsFloat32Number In[], | |
cmsFloat32Number Out[], | |
const cmsStage *mpe) | |
@@ -940,7 +920,6 @@ cmsInt32Number CMSEXPORT cmsSliceSpaceFloat(cmsUInt32Number nInputs, const cmsUI | |
// ******************************************************************************** | |
-static | |
void EvaluateLab2XYZ(const cmsFloat32Number In[], | |
cmsFloat32Number Out[], | |
const cmsStage *mpe) | |
@@ -1316,7 +1295,6 @@ cmsBool BlessLUT(cmsPipeline* lut) | |
// Default to evaluate the LUT on 16 bit-basis. Precision is retained. | |
-static | |
void _LUTeval16(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register const void* D) | |
{ | |
cmsPipeline* lut = (cmsPipeline*) D; | |
@@ -1326,12 +1304,14 @@ void _LUTeval16(register const cmsUInt16Number In[], register cmsUInt16Number Ou | |
From16ToFloat(In, &Storage[Phase][0], lut ->InputChannels); | |
- for (mpe = lut ->Elements; | |
+ for (mpe = lut->Elements; | |
mpe != NULL; | |
- mpe = mpe ->Next) { | |
+ mpe = mpe->Next) { | |
NextPhase = Phase ^ 1; | |
- mpe ->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe); | |
+ uintptr_t ptr = (uintptr_t)(mpe->EvalPtr); | |
+ printf("Function: %lx\n", ptr); | |
+ mpe->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe); | |
Phase = NextPhase; | |
} | |
diff --git a/src/cmsxform.c b/src/cmsxform.c | |
index 6b2950e..d199365 100644 | |
--- a/src/cmsxform.c | |
+++ b/src/cmsxform.c | |
@@ -341,7 +341,7 @@ void NullFloatXFORM(_cmsTRANSFORM* p, | |
// 16 bit precision ----------------------------------------------------------------------------------------------------------- | |
-// Null transformation, only applies formatters. No cach� | |
+// Null transformation, only applies formatters. No cach� | |
static | |
void NullXFORM(_cmsTRANSFORM* p, | |
const void* in, | |
@@ -400,20 +400,27 @@ void PrecalculatedXFORM(_cmsTRANSFORM* p, | |
memset(wIn, 0, sizeof(wIn)); | |
memset(wOut, 0, sizeof(wOut)); | |
- for (i = 0; i < LineCount; i++) { | |
- | |
- accum = (cmsUInt8Number*)in + strideIn; | |
- output = (cmsUInt8Number*)out + strideOut; | |
- | |
- for (j = 0; j < PixelsPerLine; j++) { | |
- | |
- accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn); | |
- p->Lut->Eval16Fn(wIn, wOut, p->Lut->Data); | |
- output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut); | |
- } | |
+#define PRECALCULATED_LOOP_BODY(fcn) \ | |
+ for (i = 0; i < LineCount; i++) { \ | |
+ \ | |
+ accum = (cmsUInt8Number*)in + strideIn; \ | |
+ output = (cmsUInt8Number*)out + strideOut; \ | |
+ \ | |
+ for (j = 0; j < PixelsPerLine; j++) { \ | |
+ \ | |
+ accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn); \ | |
+ fcn(wIn, wOut, p->Lut->Data); \ | |
+ output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut); \ | |
+ } \ | |
+ \ | |
+ strideIn += Stride->BytesPerLineIn; \ | |
+ strideOut += Stride->BytesPerLineOut; \ | |
+ } | |
- strideIn += Stride->BytesPerLineIn; | |
- strideOut += Stride->BytesPerLineOut; | |
+ if (p->Lut->Eval16Fn == &_LUTeval16) { | |
+ PRECALCULATED_LOOP_BODY(_LUTeval16Inline) | |
+ } else { | |
+ PRECALCULATED_LOOP_BODY(p->Lut->Eval16Fn) | |
} | |
} | |
@@ -442,7 +449,7 @@ void TransformOnePixelWithGamutCheck(_cmsTRANSFORM* p, | |
p ->Lut ->Eval16Fn(wIn, wOut, p -> Lut->Data); | |
} | |
-// Gamut check, No cach�, 16 bits. | |
+// Gamut check, No cach�, 16 bits. | |
static | |
void PrecalculatedXFORMGamutCheck(_cmsTRANSFORM* p, | |
const void* in, | |
@@ -481,7 +488,7 @@ void PrecalculatedXFORMGamutCheck(_cmsTRANSFORM* p, | |
} | |
-// No gamut check, Cach�, 16 bits, | |
+// No gamut check, Cach�, 16 bits, | |
static | |
void CachedXFORM(_cmsTRANSFORM* p, | |
const void* in, | |
@@ -839,7 +846,7 @@ _cmsTRANSFORM* AllocEmptyTransform(cmsContext ContextID, cmsPipeline* lut, | |
p ->xform = NullFloatXFORM; | |
} | |
else { | |
- // Float transforms don't use cach�, always are non-NULL | |
+ // Float transforms don't use cach�, always are non-NULL | |
p ->xform = FloatXFORM; | |
} | |
@@ -878,16 +885,16 @@ _cmsTRANSFORM* AllocEmptyTransform(cmsContext ContextID, cmsPipeline* lut, | |
if (*dwFlags & cmsFLAGS_NOCACHE) { | |
if (*dwFlags & cmsFLAGS_GAMUTCHECK) | |
- p ->xform = PrecalculatedXFORMGamutCheck; // Gamut check, no cach� | |
+ p ->xform = PrecalculatedXFORMGamutCheck; // Gamut check, no cach� | |
else | |
- p ->xform = PrecalculatedXFORM; // No cach�, no gamut check | |
+ p ->xform = PrecalculatedXFORM; // No cach�, no gamut check | |
} | |
else { | |
if (*dwFlags & cmsFLAGS_GAMUTCHECK) | |
- p ->xform = CachedXFORMGamutCheck; // Gamut check, cach� | |
+ p ->xform = CachedXFORMGamutCheck; // Gamut check, cach� | |
else | |
- p ->xform = CachedXFORM; // No gamut check, cach� | |
+ p ->xform = CachedXFORM; // No gamut check, cach� | |
} | |
} | |
diff --git a/src/lcms2_internal.h b/src/lcms2_internal.h | |
index 5f5270c..a0183b8 100644 | |
--- a/src/lcms2_internal.h | |
+++ b/src/lcms2_internal.h | |
@@ -875,6 +875,14 @@ struct _cmsStage_struct { | |
struct _cmsStage_struct* Next; | |
}; | |
+// For templating | |
+void _LUTeval16(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register const void* D); | |
+void EvaluateLab2XYZ(const cmsFloat32Number In[], | |
+ cmsFloat32Number Out[], | |
+ const cmsStage *mpe); | |
+void EvaluateMatrix(const cmsFloat32Number In[], | |
+ cmsFloat32Number Out[], | |
+ const cmsStage *mpe); | |
// Special Stages (cannot be saved) | |
CMSCHECKPOINT cmsStage* CMSEXPORT _cmsStageAllocLab2XYZ(cmsContext ContextID); | |
@@ -1106,6 +1114,145 @@ cmsBool _cmsAdaptationMatrix(cmsMAT3* r, const cmsMAT3* ConeMatrix, const cmsC | |
cmsBool _cmsBuildRGB2XYZtransferMatrix(cmsMAT3* r, const cmsCIExyY* WhitePoint, const cmsCIExyYTRIPLE* Primaries); | |
+// Conversion functions. From floating point to 16 bits | |
+inline __attribute__((always_inline)) | |
+void FromFloatTo16(const cmsFloat32Number In[], cmsUInt16Number Out[], cmsUInt32Number n) | |
+{ | |
+ cmsUInt32Number i; | |
+ | |
+ for (i=0; i < n; i++) { | |
+ Out[i] = _cmsQuickSaturateWord(In[i] * 65535.0); | |
+ } | |
+} | |
+ | |
+// From 16 bits to floating point | |
+inline __attribute__((always_inline)) | |
+void From16ToFloat(const cmsUInt16Number In[], cmsFloat32Number Out[], cmsUInt32Number n) | |
+{ | |
+ cmsUInt32Number i; | |
+ | |
+ for (i=0; i < n; i++) { | |
+ Out[i] = (cmsFloat32Number) In[i] / 65535.0F; | |
+ } | |
+} | |
+ | |
+inline __attribute__((always_inline)) | |
+cmsFloat64Number f_1Inline(cmsFloat64Number t) | |
+{ | |
+ const cmsFloat64Number Limit = (24.0/116.0); | |
+ | |
+ if (t <= Limit) { | |
+ return (108.0/841.0) * (t - (16.0/116.0)); | |
+ } | |
+ | |
+ return t * t * t; | |
+} | |
+ | |
+inline __attribute__((always_inline)) | |
+void CMSEXPORT cmsLab2XYZInline(const cmsCIEXYZ* WhitePoint, cmsCIEXYZ* xyz, const cmsCIELab* Lab) | |
+{ | |
+ cmsFloat64Number x, y, z; | |
+ | |
+ if (WhitePoint == NULL) | |
+ WhitePoint = cmsD50_XYZ(); | |
+ | |
+ y = (Lab-> L + 16.0) / 116.0; | |
+ x = y + 0.002 * Lab -> a; | |
+ z = y - 0.005 * Lab -> b; | |
+ | |
+ xyz -> X = f_1Inline(x) * WhitePoint -> X; | |
+ xyz -> Y = f_1Inline(y) * WhitePoint -> Y; | |
+ xyz -> Z = f_1Inline(z) * WhitePoint -> Z; | |
+ | |
+} | |
+ | |
+inline __attribute__((always_inline)) | |
+void EvaluateLab2XYZInline(const cmsFloat32Number In[], | |
+ cmsFloat32Number Out[], | |
+ const cmsStage *mpe) | |
+{ | |
+ cmsCIELab Lab; | |
+ cmsCIEXYZ XYZ; | |
+ const cmsFloat64Number XYZadj = MAX_ENCODEABLE_XYZ; | |
+ | |
+ // V4 rules | |
+ Lab.L = In[0] * 100.0; | |
+ Lab.a = In[1] * 255.0 - 128.0; | |
+ Lab.b = In[2] * 255.0 - 128.0; | |
+ | |
+ cmsLab2XYZInline(NULL, &XYZ, &Lab); | |
+ | |
+ // From XYZ, range 0..19997 to 0..1.0, note that 1.99997 comes from 0xffff | |
+ // encoded as 1.15 fixed point, so 1 + (32767.0 / 32768.0) | |
+ | |
+ Out[0] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.X / XYZadj); | |
+ Out[1] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Y / XYZadj); | |
+ Out[2] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Z / XYZadj); | |
+ return; | |
+ | |
+ cmsUNUSED_PARAMETER(mpe); | |
+} | |
+ | |
+inline __attribute__((always_inline)) | |
+void EvaluateMatrixInline(const cmsFloat32Number In[], | |
+ cmsFloat32Number Out[], | |
+ const cmsStage *mpe) | |
+{ | |
+ cmsUInt32Number i, j; | |
+ _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data; | |
+ cmsFloat64Number Tmp; | |
+ | |
+ // Input is already in 0..1.0 notation | |
+ for (i=0; i < mpe ->OutputChannels; i++) { | |
+ | |
+ Tmp = 0; | |
+ for (j=0; j < mpe->InputChannels; j++) { | |
+ Tmp += In[j] * Data->Double[i*mpe->InputChannels + j]; | |
+ } | |
+ | |
+ if (Data ->Offset != NULL) | |
+ Tmp += Data->Offset[i]; | |
+ | |
+ Out[i] = (cmsFloat32Number) Tmp; | |
+ } | |
+ | |
+ | |
+ // Output in 0..1.0 domain | |
+} | |
+ | |
+inline __attribute__((always_inline)) | |
+void _LUTeval16Inline( | |
+ register const cmsUInt16Number In[], | |
+ register cmsUInt16Number Out[], | |
+ register const void* D) | |
+{ | |
+ cmsPipeline* lut = (cmsPipeline*) D; | |
+ cmsStage *mpe; | |
+ cmsFloat32Number Storage[2][MAX_STAGE_CHANNELS]; | |
+ int Phase = 0, NextPhase; | |
+ | |
+ From16ToFloat(In, &Storage[Phase][0], lut ->InputChannels); | |
+ | |
+ for (mpe = lut->Elements; | |
+ mpe != NULL; | |
+ mpe = mpe->Next) { | |
+ | |
+ NextPhase = Phase ^ 1; | |
+ if (mpe->EvalPtr == &EvaluateLab2XYZ) { | |
+ EvaluateLab2XYZInline(&Storage[Phase][0], &Storage[NextPhase][0], mpe); | |
+ } else if (mpe->EvalPtr == &EvaluateMatrix) { | |
+ EvaluateMatrixInline(&Storage[Phase][0], &Storage[NextPhase][0], mpe); | |
+ } else { | |
+ mpe->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe); | |
+ } | |
+ Phase = NextPhase; | |
+ } | |
+ | |
+ | |
+ FromFloatTo16(&Storage[Phase][0], Out, lut->OutputChannels); | |
+} | |
+ | |
+ | |
#define _lcms_internal_H | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment