13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 */
24
25 // This file is available under and governed by the GNU General Public
26 // License version 2 only, as published by the Free Software Foundation.
27 // However, the following notice accompanied the original version of this
28 // file:
29 //
30 //---------------------------------------------------------------------------------
31 //
32 // Little Color Management System
33 // Copyright (c) 1998-2017 Marti Maria Saguer
34 //
35 // Permission is hereby granted, free of charge, to any person obtaining
36 // a copy of this software and associated documentation files (the "Software"),
37 // to deal in the Software without restriction, including without limitation
38 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
39 // and/or sell copies of the Software, and to permit persons to whom the Software
40 // is furnished to do so, subject to the following conditions:
41 //
42 // The above copyright notice and this permission notice shall be included in
43 // all copies or substantial portions of the Software.
44 //
45 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
46 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
47 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
48 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
49 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
50 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
51 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
52 //
53 //---------------------------------------------------------------------------------
300 // Remove V2 to V4 followed by V4 to V2
301 Opt |= _Remove2Op(Lut, cmsSigLabV2toV4, cmsSigLabV4toV2);
302
303 // Remove float pcs Lab conversions
304 Opt |= _Remove2Op(Lut, cmsSigLab2FloatPCS, cmsSigFloatPCS2Lab);
305
306 // Remove float pcs Lab conversions
307 Opt |= _Remove2Op(Lut, cmsSigXYZ2FloatPCS, cmsSigFloatPCS2XYZ);
308
309 // Simplify matrix.
310 Opt |= _MultiplyMatrix(Lut);
311
312 if (Opt) AnyOpt = TRUE;
313
314 } while (Opt);
315
316 return AnyOpt;
317 }
318
319 static
320 void Eval16nop1D(register const cmsUInt16Number Input[],
321 register cmsUInt16Number Output[],
322 register const struct _cms_interp_struc* p)
323 {
324 Output[0] = Input[0];
325
326 cmsUNUSED_PARAMETER(p);
327 }
328
329 static
330 void PrelinEval16(register const cmsUInt16Number Input[],
331 register cmsUInt16Number Output[],
332 register const void* D)
333 {
334 Prelin16Data* p16 = (Prelin16Data*) D;
335 cmsUInt16Number StageABC[MAX_INPUT_DIMENSIONS];
336 cmsUInt16Number StageDEF[cmsMAXCHANNELS];
337 cmsUInt32Number i;
338
339 for (i=0; i < p16 ->nInputs; i++) {
340
341 p16 ->EvalCurveIn16[i](&Input[i], &StageABC[i], p16 ->ParamsCurveIn16[i]);
342 }
343
344 p16 ->EvalCLUT(StageABC, StageDEF, p16 ->CLUTparams);
345
346 for (i=0; i < p16 ->nOutputs; i++) {
347
348 p16 ->EvalCurveOut16[i](&StageDEF[i], &Output[i], p16 ->ParamsCurveOut16[i]);
349 }
350 }
351
352
418 }
419 else {
420
421 p16 ->ParamsCurveOut16[i] = Out[i] ->InterpParams;
422 p16 -> EvalCurveOut16[i] = p16 ->ParamsCurveOut16[i]->Interpolation.Lerp16;
423 }
424 }
425
426 return p16;
427 }
428
429
430
431 // Resampling ---------------------------------------------------------------------------------
432
433 #define PRELINEARIZATION_POINTS 4096
434
435 // Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for
436 // almost any transform. We use floating point precision and then convert from floating point to 16 bits.
437 static
438 cmsInt32Number XFormSampler16(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
439 {
440 cmsPipeline* Lut = (cmsPipeline*) Cargo;
441 cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
442 cmsUInt32Number i;
443
444 _cmsAssert(Lut -> InputChannels < cmsMAXCHANNELS);
445 _cmsAssert(Lut -> OutputChannels < cmsMAXCHANNELS);
446
447 // From 16 bit to floating point
448 for (i=0; i < Lut ->InputChannels; i++)
449 InFloat[i] = (cmsFloat32Number) (In[i] / 65535.0);
450
451 // Evaluate in floating point
452 cmsPipelineEvalFloat(InFloat, OutFloat, Lut);
453
454 // Back to 16 bits representation
455 for (i=0; i < Lut ->OutputChannels; i++)
456 Out[i] = _cmsQuickSaturateWord(OutFloat[i] * 65535.0);
457
458 // Always succeed
656 // This function should be used on 16-bits LUTS only, as floating point losses precision when simplified
657 // -----------------------------------------------------------------------------------------------------------------------------------------------
658
659 static
660 cmsBool OptimizeByResampling(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
661 {
662 cmsPipeline* Src = NULL;
663 cmsPipeline* Dest = NULL;
664 cmsStage* mpe;
665 cmsStage* CLUT;
666 cmsStage *KeepPreLin = NULL, *KeepPostLin = NULL;
667 cmsUInt32Number nGridPoints;
668 cmsColorSpaceSignature ColorSpace, OutputColorSpace;
669 cmsStage *NewPreLin = NULL;
670 cmsStage *NewPostLin = NULL;
671 _cmsStageCLutData* DataCLUT;
672 cmsToneCurve** DataSetIn;
673 cmsToneCurve** DataSetOut;
674 Prelin16Data* p16;
675
676 // This is a loosy optimization! does not apply in floating-point cases
677 if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
678
679 ColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
680 OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
681
682 // Color space must be specified
683 if (ColorSpace == (cmsColorSpaceSignature)0 ||
684 OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
685
686 nGridPoints = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
687
688 // For empty LUTs, 2 points are enough
689 if (cmsPipelineStageCount(*Lut) == 0)
690 nGridPoints = 2;
691
692 Src = *Lut;
693
694 // Named color pipelines cannot be optimized either
695 for (mpe = cmsPipelineGetPtrToFirstStage(Src);
696 mpe != NULL;
920
921 return p8;
922 }
923
924 static
925 void Prelin8free(cmsContext ContextID, void* ptr)
926 {
927 _cmsFree(ContextID, ptr);
928 }
929
930 static
931 void* Prelin8dup(cmsContext ContextID, const void* ptr)
932 {
933 return _cmsDupMem(ContextID, ptr, sizeof(Prelin8Data));
934 }
935
936
937
938 // A optimized interpolation for 8-bit input.
939 #define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
940 static
941 void PrelinEval8(register const cmsUInt16Number Input[],
942 register cmsUInt16Number Output[],
943 register const void* D)
944 {
945
946 cmsUInt8Number r, g, b;
947 cmsS15Fixed16Number rx, ry, rz;
948 cmsS15Fixed16Number c0, c1, c2, c3, Rest;
949 int OutChan;
950 register cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
951 Prelin8Data* p8 = (Prelin8Data*) D;
952 register const cmsInterpParams* p = p8 ->p;
953 int TotalOut = (int) p -> nOutputs;
954 const cmsUInt16Number* LutTable = (const cmsUInt16Number*) p->Table;
955
956 r = (cmsUInt8Number) (Input[0] >> 8);
957 g = (cmsUInt8Number) (Input[1] >> 8);
958 b = (cmsUInt8Number) (Input[2] >> 8);
959
960 X0 = X1 = (cmsS15Fixed16Number) p8->X0[r];
961 Y0 = Y1 = (cmsS15Fixed16Number) p8->Y0[g];
962 Z0 = Z1 = (cmsS15Fixed16Number) p8->Z0[b];
963
964 rx = p8 ->rx[r];
965 ry = p8 ->ry[g];
966 rz = p8 ->rz[b];
967
968 X1 = X0 + (cmsS15Fixed16Number)((rx == 0) ? 0 : p ->opta[2]);
969 Y1 = Y0 + (cmsS15Fixed16Number)((ry == 0) ? 0 : p ->opta[1]);
970 Z1 = Z0 + (cmsS15Fixed16Number)((rz == 0) ? 0 : p ->opta[0]);
971
972
1053 // We need xput over here
1054
1055 static
1056 cmsBool OptimizeByComputingLinearization(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
1057 {
1058 cmsPipeline* OriginalLut;
1059 cmsUInt32Number nGridPoints;
1060 cmsToneCurve *Trans[cmsMAXCHANNELS], *TransReverse[cmsMAXCHANNELS];
1061 cmsUInt32Number t, i;
1062 cmsFloat32Number v, In[cmsMAXCHANNELS], Out[cmsMAXCHANNELS];
1063 cmsBool lIsSuitable, lIsLinear;
1064 cmsPipeline* OptimizedLUT = NULL, *LutPlusCurves = NULL;
1065 cmsStage* OptimizedCLUTmpe;
1066 cmsColorSpaceSignature ColorSpace, OutputColorSpace;
1067 cmsStage* OptimizedPrelinMpe;
1068 cmsStage* mpe;
1069 cmsToneCurve** OptimizedPrelinCurves;
1070 _cmsStageCLutData* OptimizedPrelinCLUT;
1071
1072
1073 // This is a loosy optimization! does not apply in floating-point cases
1074 if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
1075
1076 // Only on chunky RGB
1077 if (T_COLORSPACE(*InputFormat) != PT_RGB) return FALSE;
1078 if (T_PLANAR(*InputFormat)) return FALSE;
1079
1080 if (T_COLORSPACE(*OutputFormat) != PT_RGB) return FALSE;
1081 if (T_PLANAR(*OutputFormat)) return FALSE;
1082
1083 // On 16 bits, user has to specify the feature
1084 if (!_cmsFormatterIs8bit(*InputFormat)) {
1085 if (!(*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION)) return FALSE;
1086 }
1087
1088 OriginalLut = *Lut;
1089
1090 // Named color pipelines cannot be optimized either
1091 for (mpe = cmsPipelineGetPtrToFirstStage(OriginalLut);
1092 mpe != NULL;
1093 mpe = cmsStageNext(mpe)) {
1096
1097 ColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
1098 OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
1099
1100 // Color space must be specified
1101 if (ColorSpace == (cmsColorSpaceSignature)0 ||
1102 OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
1103
1104 nGridPoints = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
1105
1106 // Empty gamma containers
1107 memset(Trans, 0, sizeof(Trans));
1108 memset(TransReverse, 0, sizeof(TransReverse));
1109
1110 // If the last stage of the original lut are curves, and those curves are
1111 // degenerated, it is likely the transform is squeezing and clipping
1112 // the output from previous CLUT. We cannot optimize this case
1113 {
1114 cmsStage* last = cmsPipelineGetPtrToLastStage(OriginalLut);
1115
1116 if (cmsStageType(last) == cmsSigCurveSetElemType) {
1117
1118 _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*)cmsStageData(last);
1119 for (i = 0; i < Data->nCurves; i++) {
1120 if (IsDegenerated(Data->TheCurves[i]))
1121 goto Error;
1122 }
1123 }
1124 }
1125
1126 for (t = 0; t < OriginalLut ->InputChannels; t++) {
1127 Trans[t] = cmsBuildTabulatedToneCurve16(OriginalLut ->ContextID, PRELINEARIZATION_POINTS, NULL);
1128 if (Trans[t] == NULL) goto Error;
1129 }
1130
1131 // Populate the curves
1132 for (i=0; i < PRELINEARIZATION_POINTS; i++) {
1133
1134 v = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1));
1135
1204
1205 // Free resources
1206 for (t = 0; t < OriginalLut ->InputChannels; t++) {
1207
1208 if (Trans[t]) cmsFreeToneCurve(Trans[t]);
1209 if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
1210 }
1211
1212 cmsPipelineFree(LutPlusCurves);
1213
1214
1215 OptimizedPrelinCurves = _cmsStageGetPtrToCurveSet(OptimizedPrelinMpe);
1216 OptimizedPrelinCLUT = (_cmsStageCLutData*) OptimizedCLUTmpe ->Data;
1217
1218 // Set the evaluator if 8-bit
1219 if (_cmsFormatterIs8bit(*InputFormat)) {
1220
1221 Prelin8Data* p8 = PrelinOpt8alloc(OptimizedLUT ->ContextID,
1222 OptimizedPrelinCLUT ->Params,
1223 OptimizedPrelinCurves);
1224 if (p8 == NULL) {
1225 cmsPipelineFree(OptimizedLUT);
1226 return FALSE;
1227 }
1228
1229 _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval8, (void*) p8, Prelin8free, Prelin8dup);
1230
1231 }
1232 else
1233 {
1234 Prelin16Data* p16 = PrelinOpt16alloc(OptimizedLUT ->ContextID,
1235 OptimizedPrelinCLUT ->Params,
1236 3, OptimizedPrelinCurves, 3, NULL);
1237 if (p16 == NULL) {
1238 cmsPipelineFree(OptimizedLUT);
1239 return FALSE;
1240 }
1241
1242 _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
1243
1244 }
1245
1246 // Don't fix white on absolute colorimetric
1247 if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
1248 *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP;
1249
1250 if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
1251
1252 if (!FixWhiteMisalignment(OptimizedLUT, ColorSpace, OutputColorSpace)) {
1253
1254 return FALSE;
1255 }
1256 }
1257
1258 // And return the obtained LUT
1259
1260 cmsPipelineFree(OriginalLut);
1348
1349 if (nElements == 256U) {
1350
1351 for (j=0; j < nElements; j++) {
1352
1353 c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], FROM_8_TO_16(j));
1354 }
1355 }
1356 else {
1357
1358 for (j=0; j < nElements; j++) {
1359 c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], (cmsUInt16Number) j);
1360 }
1361 }
1362 }
1363
1364 return c16;
1365 }
1366
1367 static
1368 void FastEvaluateCurves8(register const cmsUInt16Number In[],
1369 register cmsUInt16Number Out[],
1370 register const void* D)
1371 {
1372 Curves16Data* Data = (Curves16Data*) D;
1373 int x;
1374 cmsUInt32Number i;
1375
1376 for (i=0; i < Data ->nCurves; i++) {
1377
1378 x = (In[i] >> 8);
1379 Out[i] = Data -> Curves[i][x];
1380 }
1381 }
1382
1383
1384 static
1385 void FastEvaluateCurves16(register const cmsUInt16Number In[],
1386 register cmsUInt16Number Out[],
1387 register const void* D)
1388 {
1389 Curves16Data* Data = (Curves16Data*) D;
1390 cmsUInt32Number i;
1391
1392 for (i=0; i < Data ->nCurves; i++) {
1393 Out[i] = Data -> Curves[i][In[i]];
1394 }
1395 }
1396
1397
1398 static
1399 void FastIdentity16(register const cmsUInt16Number In[],
1400 register cmsUInt16Number Out[],
1401 register const void* D)
1402 {
1403 cmsPipeline* Lut = (cmsPipeline*) D;
1404 cmsUInt32Number i;
1405
1406 for (i=0; i < Lut ->InputChannels; i++) {
1407 Out[i] = In[i];
1408 }
1409 }
1410
1411
1412 // If the target LUT holds only curves, the optimization procedure is to join all those
1413 // curves together. That only works on curves and does not work on matrices.
1414 static
1415 cmsBool OptimizeByJoiningCurves(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
1416 {
1417 cmsToneCurve** GammaTables = NULL;
1418 cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
1419 cmsUInt32Number i, j;
1420 cmsPipeline* Src = *Lut;
1421 cmsPipeline* Dest = NULL;
1422 cmsStage* mpe;
1423 cmsStage* ObtainedCurves = NULL;
1424
1425
1426 // This is a loosy optimization! does not apply in floating-point cases
1427 if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
1428
1429 // Only curves in this LUT?
1430 for (mpe = cmsPipelineGetPtrToFirstStage(Src);
1431 mpe != NULL;
1432 mpe = cmsStageNext(mpe)) {
1433 if (cmsStageType(mpe) != cmsSigCurveSetElemType) return FALSE;
1434 }
1435
1436 // Allocate an empty LUT
1437 Dest = cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
1438 if (Dest == NULL) return FALSE;
1439
1440 // Create target curves
1441 GammaTables = (cmsToneCurve**) _cmsCalloc(Src ->ContextID, Src ->InputChannels, sizeof(cmsToneCurve*));
1442 if (GammaTables == NULL) goto Error;
1443
1444 for (i=0; i < Src ->InputChannels; i++) {
1445 GammaTables[i] = cmsBuildTabulatedToneCurve16(Src ->ContextID, PRELINEARIZATION_POINTS, NULL);
1446 if (GammaTables[i] == NULL) goto Error;
1456
1457 for (j=0; j < Src ->InputChannels; j++)
1458 GammaTables[j] -> Table16[i] = _cmsQuickSaturateWord(OutFloat[j] * 65535.0);
1459 }
1460
1461 ObtainedCurves = cmsStageAllocToneCurves(Src ->ContextID, Src ->InputChannels, GammaTables);
1462 if (ObtainedCurves == NULL) goto Error;
1463
1464 for (i=0; i < Src ->InputChannels; i++) {
1465 cmsFreeToneCurve(GammaTables[i]);
1466 GammaTables[i] = NULL;
1467 }
1468
1469 if (GammaTables != NULL) {
1470 _cmsFree(Src->ContextID, GammaTables);
1471 GammaTables = NULL;
1472 }
1473
1474 // Maybe the curves are linear at the end
1475 if (!AllCurvesAreLinear(ObtainedCurves)) {
1476
1477 if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, ObtainedCurves))
1478 goto Error;
1479
1480 // If the curves are to be applied in 8 bits, we can save memory
1481 if (_cmsFormatterIs8bit(*InputFormat)) {
1482
1483 _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) ObtainedCurves ->Data;
1484 Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 256, Data ->TheCurves);
1485
1486 if (c16 == NULL) goto Error;
1487 *dwFlags |= cmsFLAGS_NOCACHE;
1488 _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves8, c16, CurvesFree, CurvesDup);
1489
1490 }
1491 else {
1492
1493 _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) cmsStageData(ObtainedCurves);
1494 Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 65536, Data ->TheCurves);
1495
1496 if (c16 == NULL) goto Error;
1497 *dwFlags |= cmsFLAGS_NOCACHE;
1498 _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves16, c16, CurvesFree, CurvesDup);
1499 }
1500 }
1501 else {
1502
1503 // LUT optimizes to nothing. Set the identity LUT
1504 cmsStageFree(ObtainedCurves);
1505 ObtainedCurves = NULL;
1506
1507 if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageAllocIdentity(Dest ->ContextID, Src ->InputChannels)))
1508 goto Error;
1509
1510 *dwFlags |= cmsFLAGS_NOCACHE;
1511 _cmsPipelineSetOptimizationParameters(Dest, FastIdentity16, (void*) Dest, NULL, NULL);
1512 }
1513
1540 // LUT is Shaper - Matrix - Matrix - Shaper, which is very frequent when combining two matrix-shaper profiles
1541
1542
1543 static
1544 void FreeMatShaper(cmsContext ContextID, void* Data)
1545 {
1546 if (Data != NULL) _cmsFree(ContextID, Data);
1547 }
1548
1549 static
1550 void* DupMatShaper(cmsContext ContextID, const void* Data)
1551 {
1552 return _cmsDupMem(ContextID, Data, sizeof(MatShaper8Data));
1553 }
1554
1555
1556 // A fast matrix-shaper evaluator for 8 bits. This is a bit ticky since I'm using 1.14 signed fixed point
1557 // to accomplish some performance. Actually it takes 256x3 16 bits tables and 16385 x 3 tables of 8 bits,
1558 // in total about 50K, and the performance boost is huge!
1559 static
1560 void MatShaperEval16(register const cmsUInt16Number In[],
1561 register cmsUInt16Number Out[],
1562 register const void* D)
1563 {
1564 MatShaper8Data* p = (MatShaper8Data*) D;
1565 cmsS1Fixed14Number l1, l2, l3, r, g, b;
1566 cmsUInt32Number ri, gi, bi;
1567
1568 // In this case (and only in this case!) we can use this simplification since
1569 // In[] is assured to come from a 8 bit number. (a << 8 | a)
1570 ri = In[0] & 0xFFU;
1571 gi = In[1] & 0xFFU;
1572 bi = In[2] & 0xFFU;
1573
1574 // Across first shaper, which also converts to 1.14 fixed point
1575 r = p->Shaper1R[ri];
1576 g = p->Shaper1G[gi];
1577 b = p->Shaper1B[bi];
1578
1579 // Evaluate the matrix in 1.14 fixed point
1580 l1 = (p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b + p->Off[0] + 0x2000) >> 14;
1581 l2 = (p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b + p->Off[1] + 0x2000) >> 14;
1582 l3 = (p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b + p->Off[2] + 0x2000) >> 14;
1782 goto Error;
1783
1784 if (!IdentityMat) {
1785
1786 if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageAllocMatrix(Dest->ContextID, 3, 3, (const cmsFloat64Number*)&res, Offset)))
1787 goto Error;
1788 }
1789
1790 if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2)))
1791 goto Error;
1792
1793 // If identity on matrix, we can further optimize the curves, so call the join curves routine
1794 if (IdentityMat) {
1795
1796 OptimizeByJoiningCurves(&Dest, Intent, InputFormat, OutputFormat, dwFlags);
1797 }
1798 else {
1799 _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
1800 _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
1801
1802 // In this particular optimization, caché does not help as it takes more time to deal with
1803 // the caché that with the pixel handling
1804 *dwFlags |= cmsFLAGS_NOCACHE;
1805
1806 // Setup the optimizarion routines
1807 SetMatShaper(Dest, mpeC1 ->TheCurves, &res, (cmsVEC3*) Offset, mpeC2->TheCurves, OutputFormat);
1808 }
1809
1810 cmsPipelineFree(Src);
1811 *Lut = Dest;
1812 return TRUE;
1813 Error:
1814 // Leave Src unchanged
1815 cmsPipelineFree(Dest);
1816 return FALSE;
1817 }
1818
1819
1820 // -------------------------------------------------------------------------------------------------------------------------------------
1821 // Optimization plug-ins
1822
1823 // List of optimizations
|
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 */
24
25 // This file is available under and governed by the GNU General Public
26 // License version 2 only, as published by the Free Software Foundation.
27 // However, the following notice accompanied the original version of this
28 // file:
29 //
30 //---------------------------------------------------------------------------------
31 //
32 // Little Color Management System
33 // Copyright (c) 1998-2020 Marti Maria Saguer
34 //
35 // Permission is hereby granted, free of charge, to any person obtaining
36 // a copy of this software and associated documentation files (the "Software"),
37 // to deal in the Software without restriction, including without limitation
38 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
39 // and/or sell copies of the Software, and to permit persons to whom the Software
40 // is furnished to do so, subject to the following conditions:
41 //
42 // The above copyright notice and this permission notice shall be included in
43 // all copies or substantial portions of the Software.
44 //
45 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
46 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
47 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
48 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
49 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
50 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
51 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
52 //
53 //---------------------------------------------------------------------------------
300 // Remove V2 to V4 followed by V4 to V2
301 Opt |= _Remove2Op(Lut, cmsSigLabV2toV4, cmsSigLabV4toV2);
302
303 // Remove float pcs Lab conversions
304 Opt |= _Remove2Op(Lut, cmsSigLab2FloatPCS, cmsSigFloatPCS2Lab);
305
306 // Remove float pcs Lab conversions
307 Opt |= _Remove2Op(Lut, cmsSigXYZ2FloatPCS, cmsSigFloatPCS2XYZ);
308
309 // Simplify matrix.
310 Opt |= _MultiplyMatrix(Lut);
311
312 if (Opt) AnyOpt = TRUE;
313
314 } while (Opt);
315
316 return AnyOpt;
317 }
318
319 static
320 void Eval16nop1D(CMSREGISTER const cmsUInt16Number Input[],
321 CMSREGISTER cmsUInt16Number Output[],
322 CMSREGISTER const struct _cms_interp_struc* p)
323 {
324 Output[0] = Input[0];
325
326 cmsUNUSED_PARAMETER(p);
327 }
328
329 static
330 void PrelinEval16(CMSREGISTER const cmsUInt16Number Input[],
331 CMSREGISTER cmsUInt16Number Output[],
332 CMSREGISTER const void* D)
333 {
334 Prelin16Data* p16 = (Prelin16Data*) D;
335 cmsUInt16Number StageABC[MAX_INPUT_DIMENSIONS];
336 cmsUInt16Number StageDEF[cmsMAXCHANNELS];
337 cmsUInt32Number i;
338
339 for (i=0; i < p16 ->nInputs; i++) {
340
341 p16 ->EvalCurveIn16[i](&Input[i], &StageABC[i], p16 ->ParamsCurveIn16[i]);
342 }
343
344 p16 ->EvalCLUT(StageABC, StageDEF, p16 ->CLUTparams);
345
346 for (i=0; i < p16 ->nOutputs; i++) {
347
348 p16 ->EvalCurveOut16[i](&StageDEF[i], &Output[i], p16 ->ParamsCurveOut16[i]);
349 }
350 }
351
352
418 }
419 else {
420
421 p16 ->ParamsCurveOut16[i] = Out[i] ->InterpParams;
422 p16 -> EvalCurveOut16[i] = p16 ->ParamsCurveOut16[i]->Interpolation.Lerp16;
423 }
424 }
425
426 return p16;
427 }
428
429
430
431 // Resampling ---------------------------------------------------------------------------------
432
433 #define PRELINEARIZATION_POINTS 4096
434
435 // Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for
436 // almost any transform. We use floating point precision and then convert from floating point to 16 bits.
437 static
438 cmsInt32Number XFormSampler16(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
439 {
440 cmsPipeline* Lut = (cmsPipeline*) Cargo;
441 cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
442 cmsUInt32Number i;
443
444 _cmsAssert(Lut -> InputChannels < cmsMAXCHANNELS);
445 _cmsAssert(Lut -> OutputChannels < cmsMAXCHANNELS);
446
447 // From 16 bit to floating point
448 for (i=0; i < Lut ->InputChannels; i++)
449 InFloat[i] = (cmsFloat32Number) (In[i] / 65535.0);
450
451 // Evaluate in floating point
452 cmsPipelineEvalFloat(InFloat, OutFloat, Lut);
453
454 // Back to 16 bits representation
455 for (i=0; i < Lut ->OutputChannels; i++)
456 Out[i] = _cmsQuickSaturateWord(OutFloat[i] * 65535.0);
457
458 // Always succeed
656 // This function should be used on 16-bits LUTS only, as floating point losses precision when simplified
657 // -----------------------------------------------------------------------------------------------------------------------------------------------
658
659 static
660 cmsBool OptimizeByResampling(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
661 {
662 cmsPipeline* Src = NULL;
663 cmsPipeline* Dest = NULL;
664 cmsStage* mpe;
665 cmsStage* CLUT;
666 cmsStage *KeepPreLin = NULL, *KeepPostLin = NULL;
667 cmsUInt32Number nGridPoints;
668 cmsColorSpaceSignature ColorSpace, OutputColorSpace;
669 cmsStage *NewPreLin = NULL;
670 cmsStage *NewPostLin = NULL;
671 _cmsStageCLutData* DataCLUT;
672 cmsToneCurve** DataSetIn;
673 cmsToneCurve** DataSetOut;
674 Prelin16Data* p16;
675
676 // This is a lossy optimization! does not apply in floating-point cases
677 if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
678
679 ColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
680 OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
681
682 // Color space must be specified
683 if (ColorSpace == (cmsColorSpaceSignature)0 ||
684 OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
685
686 nGridPoints = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
687
688 // For empty LUTs, 2 points are enough
689 if (cmsPipelineStageCount(*Lut) == 0)
690 nGridPoints = 2;
691
692 Src = *Lut;
693
694 // Named color pipelines cannot be optimized either
695 for (mpe = cmsPipelineGetPtrToFirstStage(Src);
696 mpe != NULL;
920
921 return p8;
922 }
923
924 static
925 void Prelin8free(cmsContext ContextID, void* ptr)
926 {
927 _cmsFree(ContextID, ptr);
928 }
929
930 static
931 void* Prelin8dup(cmsContext ContextID, const void* ptr)
932 {
933 return _cmsDupMem(ContextID, ptr, sizeof(Prelin8Data));
934 }
935
936
937
938 // A optimized interpolation for 8-bit input.
939 #define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
940 static CMS_NO_SANITIZE
941 void PrelinEval8(CMSREGISTER const cmsUInt16Number Input[],
942 CMSREGISTER cmsUInt16Number Output[],
943 CMSREGISTER const void* D)
944 {
945
946 cmsUInt8Number r, g, b;
947 cmsS15Fixed16Number rx, ry, rz;
948 cmsS15Fixed16Number c0, c1, c2, c3, Rest;
949 int OutChan;
950 CMSREGISTER cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
951 Prelin8Data* p8 = (Prelin8Data*) D;
952 CMSREGISTER const cmsInterpParams* p = p8 ->p;
953 int TotalOut = (int) p -> nOutputs;
954 const cmsUInt16Number* LutTable = (const cmsUInt16Number*) p->Table;
955
956 r = (cmsUInt8Number) (Input[0] >> 8);
957 g = (cmsUInt8Number) (Input[1] >> 8);
958 b = (cmsUInt8Number) (Input[2] >> 8);
959
960 X0 = X1 = (cmsS15Fixed16Number) p8->X0[r];
961 Y0 = Y1 = (cmsS15Fixed16Number) p8->Y0[g];
962 Z0 = Z1 = (cmsS15Fixed16Number) p8->Z0[b];
963
964 rx = p8 ->rx[r];
965 ry = p8 ->ry[g];
966 rz = p8 ->rz[b];
967
968 X1 = X0 + (cmsS15Fixed16Number)((rx == 0) ? 0 : p ->opta[2]);
969 Y1 = Y0 + (cmsS15Fixed16Number)((ry == 0) ? 0 : p ->opta[1]);
970 Z1 = Z0 + (cmsS15Fixed16Number)((rz == 0) ? 0 : p ->opta[0]);
971
972
1053 // We need xput over here
1054
1055 static
1056 cmsBool OptimizeByComputingLinearization(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
1057 {
1058 cmsPipeline* OriginalLut;
1059 cmsUInt32Number nGridPoints;
1060 cmsToneCurve *Trans[cmsMAXCHANNELS], *TransReverse[cmsMAXCHANNELS];
1061 cmsUInt32Number t, i;
1062 cmsFloat32Number v, In[cmsMAXCHANNELS], Out[cmsMAXCHANNELS];
1063 cmsBool lIsSuitable, lIsLinear;
1064 cmsPipeline* OptimizedLUT = NULL, *LutPlusCurves = NULL;
1065 cmsStage* OptimizedCLUTmpe;
1066 cmsColorSpaceSignature ColorSpace, OutputColorSpace;
1067 cmsStage* OptimizedPrelinMpe;
1068 cmsStage* mpe;
1069 cmsToneCurve** OptimizedPrelinCurves;
1070 _cmsStageCLutData* OptimizedPrelinCLUT;
1071
1072
1073 // This is a lossy optimization! does not apply in floating-point cases
1074 if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
1075
1076 // Only on chunky RGB
1077 if (T_COLORSPACE(*InputFormat) != PT_RGB) return FALSE;
1078 if (T_PLANAR(*InputFormat)) return FALSE;
1079
1080 if (T_COLORSPACE(*OutputFormat) != PT_RGB) return FALSE;
1081 if (T_PLANAR(*OutputFormat)) return FALSE;
1082
1083 // On 16 bits, user has to specify the feature
1084 if (!_cmsFormatterIs8bit(*InputFormat)) {
1085 if (!(*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION)) return FALSE;
1086 }
1087
1088 OriginalLut = *Lut;
1089
1090 // Named color pipelines cannot be optimized either
1091 for (mpe = cmsPipelineGetPtrToFirstStage(OriginalLut);
1092 mpe != NULL;
1093 mpe = cmsStageNext(mpe)) {
1096
1097 ColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
1098 OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
1099
1100 // Color space must be specified
1101 if (ColorSpace == (cmsColorSpaceSignature)0 ||
1102 OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
1103
1104 nGridPoints = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
1105
1106 // Empty gamma containers
1107 memset(Trans, 0, sizeof(Trans));
1108 memset(TransReverse, 0, sizeof(TransReverse));
1109
1110 // If the last stage of the original lut are curves, and those curves are
1111 // degenerated, it is likely the transform is squeezing and clipping
1112 // the output from previous CLUT. We cannot optimize this case
1113 {
1114 cmsStage* last = cmsPipelineGetPtrToLastStage(OriginalLut);
1115
1116 if (last == NULL) goto Error;
1117 if (cmsStageType(last) == cmsSigCurveSetElemType) {
1118
1119 _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*)cmsStageData(last);
1120 for (i = 0; i < Data->nCurves; i++) {
1121 if (IsDegenerated(Data->TheCurves[i]))
1122 goto Error;
1123 }
1124 }
1125 }
1126
1127 for (t = 0; t < OriginalLut ->InputChannels; t++) {
1128 Trans[t] = cmsBuildTabulatedToneCurve16(OriginalLut ->ContextID, PRELINEARIZATION_POINTS, NULL);
1129 if (Trans[t] == NULL) goto Error;
1130 }
1131
1132 // Populate the curves
1133 for (i=0; i < PRELINEARIZATION_POINTS; i++) {
1134
1135 v = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1));
1136
1205
1206 // Free resources
1207 for (t = 0; t < OriginalLut ->InputChannels; t++) {
1208
1209 if (Trans[t]) cmsFreeToneCurve(Trans[t]);
1210 if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
1211 }
1212
1213 cmsPipelineFree(LutPlusCurves);
1214
1215
1216 OptimizedPrelinCurves = _cmsStageGetPtrToCurveSet(OptimizedPrelinMpe);
1217 OptimizedPrelinCLUT = (_cmsStageCLutData*) OptimizedCLUTmpe ->Data;
1218
1219 // Set the evaluator if 8-bit
1220 if (_cmsFormatterIs8bit(*InputFormat)) {
1221
1222 Prelin8Data* p8 = PrelinOpt8alloc(OptimizedLUT ->ContextID,
1223 OptimizedPrelinCLUT ->Params,
1224 OptimizedPrelinCurves);
1225 if (p8 == NULL) return FALSE;
1226
1227 _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval8, (void*) p8, Prelin8free, Prelin8dup);
1228
1229 }
1230 else
1231 {
1232 Prelin16Data* p16 = PrelinOpt16alloc(OptimizedLUT ->ContextID,
1233 OptimizedPrelinCLUT ->Params,
1234 3, OptimizedPrelinCurves, 3, NULL);
1235 if (p16 == NULL) return FALSE;
1236
1237 _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
1238
1239 }
1240
1241 // Don't fix white on absolute colorimetric
1242 if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
1243 *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP;
1244
1245 if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
1246
1247 if (!FixWhiteMisalignment(OptimizedLUT, ColorSpace, OutputColorSpace)) {
1248
1249 return FALSE;
1250 }
1251 }
1252
1253 // And return the obtained LUT
1254
1255 cmsPipelineFree(OriginalLut);
1343
1344 if (nElements == 256U) {
1345
1346 for (j=0; j < nElements; j++) {
1347
1348 c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], FROM_8_TO_16(j));
1349 }
1350 }
1351 else {
1352
1353 for (j=0; j < nElements; j++) {
1354 c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], (cmsUInt16Number) j);
1355 }
1356 }
1357 }
1358
1359 return c16;
1360 }
1361
1362 static
1363 void FastEvaluateCurves8(CMSREGISTER const cmsUInt16Number In[],
1364 CMSREGISTER cmsUInt16Number Out[],
1365 CMSREGISTER const void* D)
1366 {
1367 Curves16Data* Data = (Curves16Data*) D;
1368 int x;
1369 cmsUInt32Number i;
1370
1371 for (i=0; i < Data ->nCurves; i++) {
1372
1373 x = (In[i] >> 8);
1374 Out[i] = Data -> Curves[i][x];
1375 }
1376 }
1377
1378
1379 static
1380 void FastEvaluateCurves16(CMSREGISTER const cmsUInt16Number In[],
1381 CMSREGISTER cmsUInt16Number Out[],
1382 CMSREGISTER const void* D)
1383 {
1384 Curves16Data* Data = (Curves16Data*) D;
1385 cmsUInt32Number i;
1386
1387 for (i=0; i < Data ->nCurves; i++) {
1388 Out[i] = Data -> Curves[i][In[i]];
1389 }
1390 }
1391
1392
1393 static
1394 void FastIdentity16(CMSREGISTER const cmsUInt16Number In[],
1395 CMSREGISTER cmsUInt16Number Out[],
1396 CMSREGISTER const void* D)
1397 {
1398 cmsPipeline* Lut = (cmsPipeline*) D;
1399 cmsUInt32Number i;
1400
1401 for (i=0; i < Lut ->InputChannels; i++) {
1402 Out[i] = In[i];
1403 }
1404 }
1405
1406
1407 // If the target LUT holds only curves, the optimization procedure is to join all those
1408 // curves together. That only works on curves and does not work on matrices.
1409 static
1410 cmsBool OptimizeByJoiningCurves(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
1411 {
1412 cmsToneCurve** GammaTables = NULL;
1413 cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
1414 cmsUInt32Number i, j;
1415 cmsPipeline* Src = *Lut;
1416 cmsPipeline* Dest = NULL;
1417 cmsStage* mpe;
1418 cmsStage* ObtainedCurves = NULL;
1419
1420
1421 // This is a lossy optimization! does not apply in floating-point cases
1422 if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
1423
1424 // Only curves in this LUT?
1425 for (mpe = cmsPipelineGetPtrToFirstStage(Src);
1426 mpe != NULL;
1427 mpe = cmsStageNext(mpe)) {
1428 if (cmsStageType(mpe) != cmsSigCurveSetElemType) return FALSE;
1429 }
1430
1431 // Allocate an empty LUT
1432 Dest = cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
1433 if (Dest == NULL) return FALSE;
1434
1435 // Create target curves
1436 GammaTables = (cmsToneCurve**) _cmsCalloc(Src ->ContextID, Src ->InputChannels, sizeof(cmsToneCurve*));
1437 if (GammaTables == NULL) goto Error;
1438
1439 for (i=0; i < Src ->InputChannels; i++) {
1440 GammaTables[i] = cmsBuildTabulatedToneCurve16(Src ->ContextID, PRELINEARIZATION_POINTS, NULL);
1441 if (GammaTables[i] == NULL) goto Error;
1451
1452 for (j=0; j < Src ->InputChannels; j++)
1453 GammaTables[j] -> Table16[i] = _cmsQuickSaturateWord(OutFloat[j] * 65535.0);
1454 }
1455
1456 ObtainedCurves = cmsStageAllocToneCurves(Src ->ContextID, Src ->InputChannels, GammaTables);
1457 if (ObtainedCurves == NULL) goto Error;
1458
1459 for (i=0; i < Src ->InputChannels; i++) {
1460 cmsFreeToneCurve(GammaTables[i]);
1461 GammaTables[i] = NULL;
1462 }
1463
1464 if (GammaTables != NULL) {
1465 _cmsFree(Src->ContextID, GammaTables);
1466 GammaTables = NULL;
1467 }
1468
1469 // Maybe the curves are linear at the end
1470 if (!AllCurvesAreLinear(ObtainedCurves)) {
1471 _cmsStageToneCurvesData* Data;
1472
1473 if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, ObtainedCurves))
1474 goto Error;
1475 Data = (_cmsStageToneCurvesData*) cmsStageData(ObtainedCurves);
1476 ObtainedCurves = NULL;
1477
1478 // If the curves are to be applied in 8 bits, we can save memory
1479 if (_cmsFormatterIs8bit(*InputFormat)) {
1480 Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 256, Data ->TheCurves);
1481
1482 if (c16 == NULL) goto Error;
1483 *dwFlags |= cmsFLAGS_NOCACHE;
1484 _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves8, c16, CurvesFree, CurvesDup);
1485
1486 }
1487 else {
1488 Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 65536, Data ->TheCurves);
1489
1490 if (c16 == NULL) goto Error;
1491 *dwFlags |= cmsFLAGS_NOCACHE;
1492 _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves16, c16, CurvesFree, CurvesDup);
1493 }
1494 }
1495 else {
1496
1497 // LUT optimizes to nothing. Set the identity LUT
1498 cmsStageFree(ObtainedCurves);
1499 ObtainedCurves = NULL;
1500
1501 if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageAllocIdentity(Dest ->ContextID, Src ->InputChannels)))
1502 goto Error;
1503
1504 *dwFlags |= cmsFLAGS_NOCACHE;
1505 _cmsPipelineSetOptimizationParameters(Dest, FastIdentity16, (void*) Dest, NULL, NULL);
1506 }
1507
1534 // LUT is Shaper - Matrix - Matrix - Shaper, which is very frequent when combining two matrix-shaper profiles
1535
1536
1537 static
1538 void FreeMatShaper(cmsContext ContextID, void* Data)
1539 {
1540 if (Data != NULL) _cmsFree(ContextID, Data);
1541 }
1542
1543 static
1544 void* DupMatShaper(cmsContext ContextID, const void* Data)
1545 {
1546 return _cmsDupMem(ContextID, Data, sizeof(MatShaper8Data));
1547 }
1548
1549
1550 // A fast matrix-shaper evaluator for 8 bits. This is a bit ticky since I'm using 1.14 signed fixed point
1551 // to accomplish some performance. Actually it takes 256x3 16 bits tables and 16385 x 3 tables of 8 bits,
1552 // in total about 50K, and the performance boost is huge!
1553 static
1554 void MatShaperEval16(CMSREGISTER const cmsUInt16Number In[],
1555 CMSREGISTER cmsUInt16Number Out[],
1556 CMSREGISTER const void* D)
1557 {
1558 MatShaper8Data* p = (MatShaper8Data*) D;
1559 cmsS1Fixed14Number l1, l2, l3, r, g, b;
1560 cmsUInt32Number ri, gi, bi;
1561
1562 // In this case (and only in this case!) we can use this simplification since
1563 // In[] is assured to come from a 8 bit number. (a << 8 | a)
1564 ri = In[0] & 0xFFU;
1565 gi = In[1] & 0xFFU;
1566 bi = In[2] & 0xFFU;
1567
1568 // Across first shaper, which also converts to 1.14 fixed point
1569 r = p->Shaper1R[ri];
1570 g = p->Shaper1G[gi];
1571 b = p->Shaper1B[bi];
1572
1573 // Evaluate the matrix in 1.14 fixed point
1574 l1 = (p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b + p->Off[0] + 0x2000) >> 14;
1575 l2 = (p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b + p->Off[1] + 0x2000) >> 14;
1576 l3 = (p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b + p->Off[2] + 0x2000) >> 14;
1776 goto Error;
1777
1778 if (!IdentityMat) {
1779
1780 if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageAllocMatrix(Dest->ContextID, 3, 3, (const cmsFloat64Number*)&res, Offset)))
1781 goto Error;
1782 }
1783
1784 if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2)))
1785 goto Error;
1786
1787 // If identity on matrix, we can further optimize the curves, so call the join curves routine
1788 if (IdentityMat) {
1789
1790 OptimizeByJoiningCurves(&Dest, Intent, InputFormat, OutputFormat, dwFlags);
1791 }
1792 else {
1793 _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
1794 _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
1795
1796 // In this particular optimization, cache does not help as it takes more time to deal with
1797 // the cache that with the pixel handling
1798 *dwFlags |= cmsFLAGS_NOCACHE;
1799
1800 // Setup the optimizarion routines
1801 SetMatShaper(Dest, mpeC1 ->TheCurves, &res, (cmsVEC3*) Offset, mpeC2->TheCurves, OutputFormat);
1802 }
1803
1804 cmsPipelineFree(Src);
1805 *Lut = Dest;
1806 return TRUE;
1807 Error:
1808 // Leave Src unchanged
1809 cmsPipelineFree(Dest);
1810 return FALSE;
1811 }
1812
1813
1814 // -------------------------------------------------------------------------------------------------------------------------------------
1815 // Optimization plug-ins
1816
1817 // List of optimizations
|