src/share/native/sun/java2d/cmm/lcms/cmsopt.c

Print this page




 175     pt1 = &Lut ->Elements;
 176     if (*pt1 == NULL) return AnyOpt;
 177 
 178     while (*pt1 != NULL) {
 179 
 180         pt2 = &((*pt1) -> Next);
 181         if (*pt2 == NULL) return AnyOpt;
 182 
 183         if ((*pt1) ->Implements == Op1 && (*pt2) ->Implements == Op2) {
 184             _RemoveElement(pt2);
 185             _RemoveElement(pt1);
 186             AnyOpt = TRUE;
 187         }
 188         else
 189             pt1 = &((*pt1) -> Next);
 190     }
 191 
 192     return AnyOpt;
 193 }
 194 


















































































 195 // Preoptimize just gets rif of no-ops coming paired. Conversion from v2 to v4 followed
 196 // by a v4 to v2 and vice-versa. The elements are then discarded.
 197 static
 198 cmsBool PreOptimize(cmsPipeline* Lut)
 199 {
 200     cmsBool AnyOpt = FALSE, Opt;
 201 
 202     do {
 203 
 204         Opt = FALSE;
 205 
 206         // Remove all identities
 207         Opt |= _Remove1Op(Lut, cmsSigIdentityElemType);
 208 
 209         // Remove XYZ2Lab followed by Lab2XYZ
 210         Opt |= _Remove2Op(Lut, cmsSigXYZ2LabElemType, cmsSigLab2XYZElemType);
 211 
 212         // Remove Lab2XYZ followed by XYZ2Lab
 213         Opt |= _Remove2Op(Lut, cmsSigLab2XYZElemType, cmsSigXYZ2LabElemType);
 214 
 215         // Remove V4 to V2 followed by V2 to V4
 216         Opt |= _Remove2Op(Lut, cmsSigLabV4toV2, cmsSigLabV2toV4);
 217 
 218         // Remove V2 to V4 followed by V4 to V2
 219         Opt |= _Remove2Op(Lut, cmsSigLabV2toV4, cmsSigLabV4toV2);
 220 
 221         // Remove float pcs Lab conversions
 222         Opt |= _Remove2Op(Lut, cmsSigLab2FloatPCS, cmsSigFloatPCS2Lab);
 223 
 224         // Remove float pcs Lab conversions
 225         Opt |= _Remove2Op(Lut, cmsSigXYZ2FloatPCS, cmsSigFloatPCS2XYZ);
 226 



 227         if (Opt) AnyOpt = TRUE;
 228 
 229     } while (Opt);
 230 
 231     return AnyOpt;
 232 }
 233 
 234 static
 235 void Eval16nop1D(register const cmsUInt16Number Input[],
 236                  register cmsUInt16Number Output[],
 237                  register const struct _cms_interp_struc* p)
 238 {
 239     Output[0] = Input[0];
 240 
 241     cmsUNUSED_PARAMETER(p);
 242 }
 243 
 244 static
 245 void PrelinEval16(register const cmsUInt16Number Input[],
 246                   register cmsUInt16Number Output[],


 263         p16 ->EvalCurveOut16[i](&StageDEF[i], &Output[i], p16 ->ParamsCurveOut16[i]);
 264     }
 265 }
 266 
 267 
 268 static
 269 void PrelinOpt16free(cmsContext ContextID, void* ptr)
 270 {
 271     Prelin16Data* p16 = (Prelin16Data*) ptr;
 272 
 273     _cmsFree(ContextID, p16 ->EvalCurveOut16);
 274     _cmsFree(ContextID, p16 ->ParamsCurveOut16);
 275 
 276     _cmsFree(ContextID, p16);
 277 }
 278 
 279 static
 280 void* Prelin16dup(cmsContext ContextID, const void* ptr)
 281 {
 282     Prelin16Data* p16 = (Prelin16Data*) ptr;
 283     Prelin16Data* Duped = _cmsDupMem(ContextID, p16, sizeof(Prelin16Data));
 284 
 285     if (Duped == NULL) return NULL;
 286 
 287     Duped ->EvalCurveOut16   = _cmsDupMem(ContextID, p16 ->EvalCurveOut16, p16 ->nOutputs * sizeof(_cmsInterpFn16));
 288     Duped ->ParamsCurveOut16 = _cmsDupMem(ContextID, p16 ->ParamsCurveOut16, p16 ->nOutputs * sizeof(cmsInterpParams* ));
 289 
 290     return Duped;
 291 }
 292 
 293 
 294 static
 295 Prelin16Data* PrelinOpt16alloc(cmsContext ContextID,
 296                                const cmsInterpParams* ColorMap,
 297                                int nInputs, cmsToneCurve** In,
 298                                int nOutputs, cmsToneCurve** Out )
 299 {
 300     int i;
 301     Prelin16Data* p16 = _cmsMallocZero(ContextID, sizeof(Prelin16Data));
 302     if (p16 == NULL) return NULL;
 303 
 304     p16 ->nInputs = nInputs;
 305     p16 -> nOutputs = nOutputs;
 306 
 307 
 308     for (i=0; i < nInputs; i++) {
 309 
 310         if (In == NULL) {
 311             p16 -> ParamsCurveIn16[i] = NULL;
 312             p16 -> EvalCurveIn16[i] = Eval16nop1D;
 313 
 314         }
 315         else {
 316             p16 -> ParamsCurveIn16[i] = In[i] ->InterpParams;
 317             p16 -> EvalCurveIn16[i] = p16 ->ParamsCurveIn16[i]->Interpolation.Lerp16;
 318         }
 319     }
 320 
 321     p16 ->CLUTparams = ColorMap;


 770 
 771     // Compute slope and offset for the end
 772     Val   = g ->Table16[AtEnd];
 773     Slope = (EndVal - Val) / AtBegin;   // AtBegin holds the X interval, which is same in both cases
 774     beta  = Val - Slope * AtEnd;
 775 
 776     for (i = AtEnd; i < (int) g ->nEntries; i++)
 777         g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
 778 }
 779 
 780 
 781 // Precomputes tables for 8-bit on input devicelink.
 782 static
 783 Prelin8Data* PrelinOpt8alloc(cmsContext ContextID, const cmsInterpParams* p, cmsToneCurve* G[3])
 784 {
 785     int i;
 786     cmsUInt16Number Input[3];
 787     cmsS15Fixed16Number v1, v2, v3;
 788     Prelin8Data* p8;
 789 
 790     p8 = _cmsMallocZero(ContextID, sizeof(Prelin8Data));
 791     if (p8 == NULL) return NULL;
 792 
 793     // Since this only works for 8 bit input, values comes always as x * 257,
 794     // we can safely take msb byte (x << 8 + x)
 795 
 796     for (i=0; i < 256; i++) {
 797 
 798         if (G != NULL) {
 799 
 800             // Get 16-bit representation
 801             Input[0] = cmsEvalToneCurve16(G[0], FROM_8_TO_16(i));
 802             Input[1] = cmsEvalToneCurve16(G[1], FROM_8_TO_16(i));
 803             Input[2] = cmsEvalToneCurve16(G[2], FROM_8_TO_16(i));
 804         }
 805         else {
 806             Input[0] = FROM_8_TO_16(i);
 807             Input[1] = FROM_8_TO_16(i);
 808             Input[2] = FROM_8_TO_16(i);
 809         }
 810 


 844 }
 845 
 846 
 847 
 848 // A optimized interpolation for 8-bit input.
 849 #define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
 850 static
 851 void PrelinEval8(register const cmsUInt16Number Input[],
 852                   register cmsUInt16Number Output[],
 853                   register const void* D)
 854 {
 855 
 856     cmsUInt8Number         r, g, b;
 857     cmsS15Fixed16Number    rx, ry, rz;
 858     cmsS15Fixed16Number    c0, c1, c2, c3, Rest;
 859     int                    OutChan;
 860     register cmsS15Fixed16Number    X0, X1, Y0, Y1, Z0, Z1;
 861     Prelin8Data* p8 = (Prelin8Data*) D;
 862     register const cmsInterpParams* p = p8 ->p;
 863     int                    TotalOut = p -> nOutputs;
 864     const cmsUInt16Number* LutTable = p -> Table;
 865 
 866     r = Input[0] >> 8;
 867     g = Input[1] >> 8;
 868     b = Input[2] >> 8;
 869 
 870     X0 = X1 = p8->X0[r];
 871     Y0 = Y1 = p8->Y0[g];
 872     Z0 = Z1 = p8->Z0[b];
 873 
 874     rx = p8 ->rx[r];
 875     ry = p8 ->ry[g];
 876     rz = p8 ->rz[b];
 877 
 878     X1 = X0 + ((rx == 0) ? 0 : p ->opta[2]);
 879     Y1 = Y0 + ((ry == 0) ? 0 : p ->opta[1]);
 880     Z1 = Z0 + ((rz == 0) ? 0 : p ->opta[0]);
 881 
 882 
 883     // These are the 6 Tetrahedral
 884     for (OutChan=0; OutChan < TotalOut; OutChan++) {


1163 // Curves optimizer ------------------------------------------------------------------------------------------------------------------
1164 
1165 static
1166 void CurvesFree(cmsContext ContextID, void* ptr)
1167 {
1168      Curves16Data* Data = (Curves16Data*) ptr;
1169      int i;
1170 
1171      for (i=0; i < Data -> nCurves; i++) {
1172 
1173          _cmsFree(ContextID, Data ->Curves[i]);
1174      }
1175 
1176      _cmsFree(ContextID, Data ->Curves);
1177      _cmsFree(ContextID, ptr);
1178 }
1179 
1180 static
1181 void* CurvesDup(cmsContext ContextID, const void* ptr)
1182 {
1183     Curves16Data* Data = _cmsDupMem(ContextID, ptr, sizeof(Curves16Data));
1184     int i;
1185 
1186     if (Data == NULL) return NULL;
1187 
1188     Data ->Curves = _cmsDupMem(ContextID, Data ->Curves, Data ->nCurves * sizeof(cmsUInt16Number*));
1189 
1190     for (i=0; i < Data -> nCurves; i++) {
1191         Data ->Curves[i] = _cmsDupMem(ContextID, Data ->Curves[i], Data -> nElements * sizeof(cmsUInt16Number));
1192     }
1193 
1194     return (void*) Data;
1195 }
1196 
1197 // Precomputes tables for 8-bit on input devicelink.
1198 static
1199 Curves16Data* CurvesAlloc(cmsContext ContextID, int nCurves, int nElements, cmsToneCurve** G)
1200 {
1201     int i, j;
1202     Curves16Data* c16;
1203 
1204     c16 = _cmsMallocZero(ContextID, sizeof(Curves16Data));
1205     if (c16 == NULL) return NULL;
1206 
1207     c16 ->nCurves = nCurves;
1208     c16 ->nElements = nElements;
1209 
1210     c16 ->Curves = _cmsCalloc(ContextID, nCurves, sizeof(cmsUInt16Number*));
1211     if (c16 ->Curves == NULL) return NULL;
1212 
1213     for (i=0; i < nCurves; i++) {
1214 
1215         c16->Curves[i] = _cmsCalloc(ContextID, nElements, sizeof(cmsUInt16Number));
1216 
1217         if (c16->Curves[i] == NULL) {
1218 
1219             for (j=0; j < i; j++) {
1220                 _cmsFree(ContextID, c16->Curves[j]);
1221             }
1222             _cmsFree(ContextID, c16->Curves);
1223             _cmsFree(ContextID, c16);
1224             return NULL;
1225         }
1226 
1227         if (nElements == 256) {
1228 
1229             for (j=0; j < nElements; j++) {
1230 
1231                 c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], FROM_8_TO_16(j));
1232             }
1233         }
1234         else {
1235 


1543     for (i=0; i < 3; i++) {
1544 
1545         if (Off == NULL) {
1546             p ->Off[i] = 0;
1547         }
1548         else {
1549             p ->Off[i] = DOUBLE_TO_1FIXED14(Off->n[i]);
1550         }
1551     }
1552 
1553     // Mark as optimized for faster formatter
1554     if (Is8Bits)
1555         *OutputFormat |= OPTIMIZED_SH(1);
1556 
1557     // Fill function pointers
1558     _cmsPipelineSetOptimizationParameters(Dest, MatShaperEval16, (void*) p, FreeMatShaper, DupMatShaper);
1559     return TRUE;
1560 }
1561 
1562 //  8 bits on input allows matrix-shaper boot up to 25 Mpixels per second on RGB. That's fast!
1563 // TODO: Allow a third matrix for abs. colorimetric
1564 static
1565 cmsBool OptimizeMatrixShaper(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
1566 {
1567     cmsStage* Curve1, *Curve2;
1568     cmsStage* Matrix1, *Matrix2;
1569     _cmsStageMatrixData* Data1;
1570     _cmsStageMatrixData* Data2;
1571     cmsMAT3 res;
1572     cmsBool IdentityMat;
1573     cmsPipeline* Dest, *Src;

1574 
1575     // Only works on RGB to RGB
1576     if (T_CHANNELS(*InputFormat) != 3 || T_CHANNELS(*OutputFormat) != 3) return FALSE;
1577 
1578     // Only works on 8 bit input
1579     if (!_cmsFormatterIs8bit(*InputFormat)) return FALSE;
1580 
1581     // Seems suitable, proceed
1582     Src = *Lut;
1583 
1584     // Check for shaper-matrix-matrix-shaper structure, that is what this optimizer stands for
1585     if (!cmsPipelineCheckAndRetreiveStages(Src, 4,








1586         cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
1587         &Curve1, &Matrix1, &Matrix2, &Curve2)) return FALSE;
1588 
1589     // Get both matrices
1590     Data1 = (_cmsStageMatrixData*) cmsStageData(Matrix1);
1591     Data2 = (_cmsStageMatrixData*) cmsStageData(Matrix2);
1592 
1593     // Input offset should be zero
1594     if (Data1 ->Offset != NULL) return FALSE;
1595 
1596     // Multiply both matrices to get the result
1597     _cmsMAT3per(&res, (cmsMAT3*) Data2 ->Double, (cmsMAT3*) Data1 ->Double);



1598 
1599     // Now the result is in res + Data2 -> Offset. Maybe is a plain identity?
1600     IdentityMat = FALSE;
1601     if (_cmsMAT3isIdentity(&res) && Data2 ->Offset == NULL) {




















1602 
1603         // We can get rid of full matrix
1604         IdentityMat = TRUE;
1605     }





1606 
1607       // Allocate an empty LUT
1608     Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
1609     if (!Dest) return FALSE;
1610 
1611     // Assamble the new LUT
1612     if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageDup(Curve1)))
1613         goto Error;
1614 
1615     if (!IdentityMat)
1616         if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageAllocMatrix(Dest ->ContextID, 3, 3, (const cmsFloat64Number*) &res, Data2 ->Offset)))

1617             goto Error;


1618     if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2)))
1619         goto Error;
1620 
1621     // If identity on matrix, we can further optimize the curves, so call the join curves routine
1622     if (IdentityMat) {
1623 
1624         OptimizeByJoiningCurves(&Dest, Intent, InputFormat, OutputFormat, dwFlags);
1625     }
1626     else {
1627         _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
1628         _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
1629 
1630         // In this particular optimization, caché does not help as it takes more time to deal with
1631         // the caché that with the pixel handling
1632         *dwFlags |= cmsFLAGS_NOCACHE;
1633 
1634         // Setup the optimizarion routines
1635         SetMatShaper(Dest, mpeC1 ->TheCurves, &res, (cmsVEC3*) Data2 ->Offset, mpeC2->TheCurves, OutputFormat);
1636     }
1637 
1638     cmsPipelineFree(Src);
1639     *Lut = Dest;
1640     return TRUE;
1641 Error:
1642     // Leave Src unchanged
1643     cmsPipelineFree(Dest);
1644     return FALSE;
1645 }
1646 
1647 
1648 // -------------------------------------------------------------------------------------------------------------------------------------
1649 // Optimization plug-ins
1650 
1651 // List of optimizations
1652 typedef struct _cmsOptimizationCollection_st {
1653 
1654     _cmsOPToptimizeFn  OptimizePtr;
1655 




 175     pt1 = &Lut ->Elements;
 176     if (*pt1 == NULL) return AnyOpt;
 177 
 178     while (*pt1 != NULL) {
 179 
 180         pt2 = &((*pt1) -> Next);
 181         if (*pt2 == NULL) return AnyOpt;
 182 
 183         if ((*pt1) ->Implements == Op1 && (*pt2) ->Implements == Op2) {
 184             _RemoveElement(pt2);
 185             _RemoveElement(pt1);
 186             AnyOpt = TRUE;
 187         }
 188         else
 189             pt1 = &((*pt1) -> Next);
 190     }
 191 
 192     return AnyOpt;
 193 }
 194 
 195 
 196 static
 197 cmsBool CloseEnoughFloat(cmsFloat64Number a, cmsFloat64Number b)
 198 {
 199        return fabs(b - a) < 0.00001f;
 200 }
 201 
 202 static
 203 cmsBool  isFloatMatrixIdentity(const cmsMAT3* a)
 204 {
 205        cmsMAT3 Identity;
 206        int i, j;
 207 
 208        _cmsMAT3identity(&Identity);
 209 
 210        for (i = 0; i < 3; i++)
 211               for (j = 0; j < 3; j++)
 212                      if (!CloseEnoughFloat(a->v[i].n[j], Identity.v[i].n[j])) return FALSE;
 213 
 214        return TRUE;
 215 }
 216 // if two adjacent matrices are found, multiply them.
 217 static
 218 cmsBool _MultiplyMatrix(cmsPipeline* Lut)
 219 {
 220        cmsStage** pt1;
 221        cmsStage** pt2;
 222        cmsStage*  chain;
 223        cmsBool AnyOpt = FALSE;
 224 
 225        pt1 = &Lut->Elements;
 226        if (*pt1 == NULL) return AnyOpt;
 227 
 228        while (*pt1 != NULL) {
 229 
 230               pt2 = &((*pt1)->Next);
 231               if (*pt2 == NULL) return AnyOpt;
 232 
 233               if ((*pt1)->Implements == cmsSigMatrixElemType && (*pt2)->Implements == cmsSigMatrixElemType) {
 234 
 235                      // Get both matrices
 236                      _cmsStageMatrixData* m1 = (_cmsStageMatrixData*) cmsStageData(*pt1);
 237                      _cmsStageMatrixData* m2 = (_cmsStageMatrixData*) cmsStageData(*pt2);
 238                      cmsMAT3 res;
 239 
 240                      // Input offset and output offset should be zero to use this optimization
 241                      if (m1->Offset != NULL || m2 ->Offset != NULL ||
 242                             cmsStageInputChannels(*pt1) != 3 || cmsStageOutputChannels(*pt1) != 3 ||
 243                             cmsStageInputChannels(*pt2) != 3 || cmsStageOutputChannels(*pt2) != 3)
 244                             return FALSE;
 245 
 246                      // Multiply both matrices to get the result
 247                      _cmsMAT3per(&res, (cmsMAT3*)m2->Double, (cmsMAT3*)m1->Double);
 248 
 249                      // Get the next in chain afer the matrices
 250                      chain = (*pt2)->Next;
 251 
 252                      // Remove both matrices
 253                      _RemoveElement(pt2);
 254                      _RemoveElement(pt1);
 255 
 256                      // Now what if the result is a plain identity?
 257                      if (!isFloatMatrixIdentity(&res)) {
 258 
 259                             // We can not get rid of full matrix
 260                             cmsStage* Multmat = cmsStageAllocMatrix(Lut->ContextID, 3, 3, (const cmsFloat64Number*) &res, NULL);
 261 
 262                             // Recover the chain
 263                             Multmat->Next = chain;
 264                             *pt1 = Multmat;
 265                      }
 266 
 267                      AnyOpt = TRUE;
 268               }
 269               else
 270                      pt1 = &((*pt1)->Next);
 271        }
 272 
 273        return AnyOpt;
 274 }
 275 
 276 
 277 // Preoptimize just gets rif of no-ops coming paired. Conversion from v2 to v4 followed
 278 // by a v4 to v2 and vice-versa. The elements are then discarded.
 279 static
 280 cmsBool PreOptimize(cmsPipeline* Lut)
 281 {
 282     cmsBool AnyOpt = FALSE, Opt;
 283 
 284     do {
 285 
 286         Opt = FALSE;
 287 
 288         // Remove all identities
 289         Opt |= _Remove1Op(Lut, cmsSigIdentityElemType);
 290 
 291         // Remove XYZ2Lab followed by Lab2XYZ
 292         Opt |= _Remove2Op(Lut, cmsSigXYZ2LabElemType, cmsSigLab2XYZElemType);
 293 
 294         // Remove Lab2XYZ followed by XYZ2Lab
 295         Opt |= _Remove2Op(Lut, cmsSigLab2XYZElemType, cmsSigXYZ2LabElemType);
 296 
 297         // Remove V4 to V2 followed by V2 to V4
 298         Opt |= _Remove2Op(Lut, cmsSigLabV4toV2, cmsSigLabV2toV4);
 299 
 300         // Remove V2 to V4 followed by V4 to V2
 301         Opt |= _Remove2Op(Lut, cmsSigLabV2toV4, cmsSigLabV4toV2);
 302 
 303         // Remove float pcs Lab conversions
 304         Opt |= _Remove2Op(Lut, cmsSigLab2FloatPCS, cmsSigFloatPCS2Lab);
 305 
 306         // Remove float pcs Lab conversions
 307         Opt |= _Remove2Op(Lut, cmsSigXYZ2FloatPCS, cmsSigFloatPCS2XYZ);
 308 
 309         // Simplify matrix.
 310         Opt |= _MultiplyMatrix(Lut);
 311 
 312         if (Opt) AnyOpt = TRUE;
 313 
 314     } while (Opt);
 315 
 316     return AnyOpt;
 317 }
 318 
 319 static
 320 void Eval16nop1D(register const cmsUInt16Number Input[],
 321                  register cmsUInt16Number Output[],
 322                  register const struct _cms_interp_struc* p)
 323 {
 324     Output[0] = Input[0];
 325 
 326     cmsUNUSED_PARAMETER(p);
 327 }
 328 
 329 static
 330 void PrelinEval16(register const cmsUInt16Number Input[],
 331                   register cmsUInt16Number Output[],


 348         p16 ->EvalCurveOut16[i](&StageDEF[i], &Output[i], p16 ->ParamsCurveOut16[i]);
 349     }
 350 }
 351 
 352 
 353 static
 354 void PrelinOpt16free(cmsContext ContextID, void* ptr)
 355 {
 356     Prelin16Data* p16 = (Prelin16Data*) ptr;
 357 
 358     _cmsFree(ContextID, p16 ->EvalCurveOut16);
 359     _cmsFree(ContextID, p16 ->ParamsCurveOut16);
 360 
 361     _cmsFree(ContextID, p16);
 362 }
 363 
 364 static
 365 void* Prelin16dup(cmsContext ContextID, const void* ptr)
 366 {
 367     Prelin16Data* p16 = (Prelin16Data*) ptr;
 368     Prelin16Data* Duped = (Prelin16Data*) _cmsDupMem(ContextID, p16, sizeof(Prelin16Data));
 369 
 370     if (Duped == NULL) return NULL;
 371 
 372     Duped->EvalCurveOut16 = (_cmsInterpFn16*) _cmsDupMem(ContextID, p16->EvalCurveOut16, p16->nOutputs * sizeof(_cmsInterpFn16));
 373     Duped->ParamsCurveOut16 = (cmsInterpParams**)_cmsDupMem(ContextID, p16->ParamsCurveOut16, p16->nOutputs * sizeof(cmsInterpParams*));
 374 
 375     return Duped;
 376 }
 377 
 378 
 379 static
 380 Prelin16Data* PrelinOpt16alloc(cmsContext ContextID,
 381                                const cmsInterpParams* ColorMap,
 382                                int nInputs, cmsToneCurve** In,
 383                                int nOutputs, cmsToneCurve** Out )
 384 {
 385     int i;
 386     Prelin16Data* p16 = (Prelin16Data*)_cmsMallocZero(ContextID, sizeof(Prelin16Data));
 387     if (p16 == NULL) return NULL;
 388 
 389     p16 ->nInputs = nInputs;
 390     p16 -> nOutputs = nOutputs;
 391 
 392 
 393     for (i=0; i < nInputs; i++) {
 394 
 395         if (In == NULL) {
 396             p16 -> ParamsCurveIn16[i] = NULL;
 397             p16 -> EvalCurveIn16[i] = Eval16nop1D;
 398 
 399         }
 400         else {
 401             p16 -> ParamsCurveIn16[i] = In[i] ->InterpParams;
 402             p16 -> EvalCurveIn16[i] = p16 ->ParamsCurveIn16[i]->Interpolation.Lerp16;
 403         }
 404     }
 405 
 406     p16 ->CLUTparams = ColorMap;


 855 
 856     // Compute slope and offset for the end
 857     Val   = g ->Table16[AtEnd];
 858     Slope = (EndVal - Val) / AtBegin;   // AtBegin holds the X interval, which is same in both cases
 859     beta  = Val - Slope * AtEnd;
 860 
 861     for (i = AtEnd; i < (int) g ->nEntries; i++)
 862         g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
 863 }
 864 
 865 
 866 // Precomputes tables for 8-bit on input devicelink.
 867 static
 868 Prelin8Data* PrelinOpt8alloc(cmsContext ContextID, const cmsInterpParams* p, cmsToneCurve* G[3])
 869 {
 870     int i;
 871     cmsUInt16Number Input[3];
 872     cmsS15Fixed16Number v1, v2, v3;
 873     Prelin8Data* p8;
 874 
 875     p8 = (Prelin8Data*)_cmsMallocZero(ContextID, sizeof(Prelin8Data));
 876     if (p8 == NULL) return NULL;
 877 
 878     // Since this only works for 8 bit input, values comes always as x * 257,
 879     // we can safely take msb byte (x << 8 + x)
 880 
 881     for (i=0; i < 256; i++) {
 882 
 883         if (G != NULL) {
 884 
 885             // Get 16-bit representation
 886             Input[0] = cmsEvalToneCurve16(G[0], FROM_8_TO_16(i));
 887             Input[1] = cmsEvalToneCurve16(G[1], FROM_8_TO_16(i));
 888             Input[2] = cmsEvalToneCurve16(G[2], FROM_8_TO_16(i));
 889         }
 890         else {
 891             Input[0] = FROM_8_TO_16(i);
 892             Input[1] = FROM_8_TO_16(i);
 893             Input[2] = FROM_8_TO_16(i);
 894         }
 895 


 929 }
 930 
 931 
 932 
 933 // A optimized interpolation for 8-bit input.
 934 #define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
 935 static
 936 void PrelinEval8(register const cmsUInt16Number Input[],
 937                   register cmsUInt16Number Output[],
 938                   register const void* D)
 939 {
 940 
 941     cmsUInt8Number         r, g, b;
 942     cmsS15Fixed16Number    rx, ry, rz;
 943     cmsS15Fixed16Number    c0, c1, c2, c3, Rest;
 944     int                    OutChan;
 945     register cmsS15Fixed16Number    X0, X1, Y0, Y1, Z0, Z1;
 946     Prelin8Data* p8 = (Prelin8Data*) D;
 947     register const cmsInterpParams* p = p8 ->p;
 948     int                    TotalOut = p -> nOutputs;
 949     const cmsUInt16Number* LutTable = (const cmsUInt16Number*) p->Table;
 950 
 951     r = Input[0] >> 8;
 952     g = Input[1] >> 8;
 953     b = Input[2] >> 8;
 954 
 955     X0 = X1 = p8->X0[r];
 956     Y0 = Y1 = p8->Y0[g];
 957     Z0 = Z1 = p8->Z0[b];
 958 
 959     rx = p8 ->rx[r];
 960     ry = p8 ->ry[g];
 961     rz = p8 ->rz[b];
 962 
 963     X1 = X0 + ((rx == 0) ? 0 : p ->opta[2]);
 964     Y1 = Y0 + ((ry == 0) ? 0 : p ->opta[1]);
 965     Z1 = Z0 + ((rz == 0) ? 0 : p ->opta[0]);
 966 
 967 
 968     // These are the 6 Tetrahedral
 969     for (OutChan=0; OutChan < TotalOut; OutChan++) {


1248 // Curves optimizer ------------------------------------------------------------------------------------------------------------------
1249 
1250 static
1251 void CurvesFree(cmsContext ContextID, void* ptr)
1252 {
1253      Curves16Data* Data = (Curves16Data*) ptr;
1254      int i;
1255 
1256      for (i=0; i < Data -> nCurves; i++) {
1257 
1258          _cmsFree(ContextID, Data ->Curves[i]);
1259      }
1260 
1261      _cmsFree(ContextID, Data ->Curves);
1262      _cmsFree(ContextID, ptr);
1263 }
1264 
1265 static
1266 void* CurvesDup(cmsContext ContextID, const void* ptr)
1267 {
1268     Curves16Data* Data = (Curves16Data*)_cmsDupMem(ContextID, ptr, sizeof(Curves16Data));
1269     int i;
1270 
1271     if (Data == NULL) return NULL;
1272 
1273     Data->Curves = (cmsUInt16Number**) _cmsDupMem(ContextID, Data->Curves, Data->nCurves * sizeof(cmsUInt16Number*));
1274 
1275     for (i=0; i < Data -> nCurves; i++) {
1276         Data->Curves[i] = (cmsUInt16Number*) _cmsDupMem(ContextID, Data->Curves[i], Data->nElements * sizeof(cmsUInt16Number));
1277     }
1278 
1279     return (void*) Data;
1280 }
1281 
1282 // Precomputes tables for 8-bit on input devicelink.
1283 static
1284 Curves16Data* CurvesAlloc(cmsContext ContextID, int nCurves, int nElements, cmsToneCurve** G)
1285 {
1286     int i, j;
1287     Curves16Data* c16;
1288 
1289     c16 = (Curves16Data*)_cmsMallocZero(ContextID, sizeof(Curves16Data));
1290     if (c16 == NULL) return NULL;
1291 
1292     c16 ->nCurves = nCurves;
1293     c16 ->nElements = nElements;
1294 
1295     c16->Curves = (cmsUInt16Number**) _cmsCalloc(ContextID, nCurves, sizeof(cmsUInt16Number*));
1296     if (c16 ->Curves == NULL) return NULL;
1297 
1298     for (i=0; i < nCurves; i++) {
1299 
1300         c16->Curves[i] = (cmsUInt16Number*) _cmsCalloc(ContextID, nElements, sizeof(cmsUInt16Number));
1301 
1302         if (c16->Curves[i] == NULL) {
1303 
1304             for (j=0; j < i; j++) {
1305                 _cmsFree(ContextID, c16->Curves[j]);
1306             }
1307             _cmsFree(ContextID, c16->Curves);
1308             _cmsFree(ContextID, c16);
1309             return NULL;
1310         }
1311 
1312         if (nElements == 256) {
1313 
1314             for (j=0; j < nElements; j++) {
1315 
1316                 c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], FROM_8_TO_16(j));
1317             }
1318         }
1319         else {
1320 


1628     for (i=0; i < 3; i++) {
1629 
1630         if (Off == NULL) {
1631             p ->Off[i] = 0;
1632         }
1633         else {
1634             p ->Off[i] = DOUBLE_TO_1FIXED14(Off->n[i]);
1635         }
1636     }
1637 
1638     // Mark as optimized for faster formatter
1639     if (Is8Bits)
1640         *OutputFormat |= OPTIMIZED_SH(1);
1641 
1642     // Fill function pointers
1643     _cmsPipelineSetOptimizationParameters(Dest, MatShaperEval16, (void*) p, FreeMatShaper, DupMatShaper);
1644     return TRUE;
1645 }
1646 
1647 //  8 bits on input allows matrix-shaper boot up to 25 Mpixels per second on RGB. That's fast!

1648 static
1649 cmsBool OptimizeMatrixShaper(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
1650 {
1651        cmsStage* Curve1, *Curve2;
1652        cmsStage* Matrix1, *Matrix2;


1653        cmsMAT3 res;
1654        cmsBool IdentityMat;
1655        cmsPipeline* Dest, *Src;
1656        cmsFloat64Number* Offset;
1657 
1658        // Only works on RGB to RGB
1659        if (T_CHANNELS(*InputFormat) != 3 || T_CHANNELS(*OutputFormat) != 3) return FALSE;
1660 
1661        // Only works on 8 bit input
1662        if (!_cmsFormatterIs8bit(*InputFormat)) return FALSE;
1663 
1664        // Seems suitable, proceed
1665        Src = *Lut;
1666 
1667        // Check for:
1668        //
1669        //    shaper-matrix-matrix-shaper
1670        //    shaper-matrix-shaper
1671        //
1672        // Both of those constructs are possible (first because abs. colorimetric).
1673        // additionally, In the first case, the input matrix offset should be zero.
1674 
1675        IdentityMat = FALSE;
1676        if (cmsPipelineCheckAndRetreiveStages(Src, 4,
1677               cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
1678               &Curve1, &Matrix1, &Matrix2, &Curve2)) {
1679 
1680               // Get both matrices
1681               _cmsStageMatrixData* Data1 = (_cmsStageMatrixData*)cmsStageData(Matrix1);
1682               _cmsStageMatrixData* Data2 = (_cmsStageMatrixData*)cmsStageData(Matrix2);
1683 
1684               // Input offset should be zero
1685               if (Data1->Offset != NULL) return FALSE;
1686 
1687               // Multiply both matrices to get the result
1688               _cmsMAT3per(&res, (cmsMAT3*)Data2->Double, (cmsMAT3*)Data1->Double);
1689 
1690               // Only 2nd matrix has offset, or it is zero
1691               Offset = Data2->Offset;
1692 
1693               // Now the result is in res + Data2 -> Offset. Maybe is a plain identity?
1694               if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
1695 
1696                      // We can get rid of full matrix
1697                      IdentityMat = TRUE;
1698               }
1699 
1700        }
1701        else {
1702 
1703               if (cmsPipelineCheckAndRetreiveStages(Src, 3,
1704                      cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
1705                      &Curve1, &Matrix1, &Curve2)) {
1706 
1707                      _cmsStageMatrixData* Data = (_cmsStageMatrixData*)cmsStageData(Matrix1);
1708 
1709                      // Copy the matrix to our result
1710                      memcpy(&res, Data->Double, sizeof(res));
1711 
1712                      // Preserve the Odffset (may be NULL as a zero offset)
1713                      Offset = Data->Offset;
1714 
1715                      if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
1716 
1717                             // We can get rid of full matrix
1718                             IdentityMat = TRUE;
1719                      }
1720               }
1721               else
1722                      return FALSE; // Not optimizeable this time
1723 
1724        }
1725 
1726       // Allocate an empty LUT
1727     Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
1728     if (!Dest) return FALSE;
1729 
1730     // Assamble the new LUT
1731     if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageDup(Curve1)))
1732         goto Error;
1733 
1734     if (!IdentityMat) {
1735 
1736            if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageAllocMatrix(Dest->ContextID, 3, 3, (const cmsFloat64Number*)&res, Offset)))
1737                   goto Error;
1738     }
1739 
1740     if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2)))
1741         goto Error;
1742 
1743     // If identity on matrix, we can further optimize the curves, so call the join curves routine
1744     if (IdentityMat) {
1745 
1746         OptimizeByJoiningCurves(&Dest, Intent, InputFormat, OutputFormat, dwFlags);
1747     }
1748     else {
1749         _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
1750         _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
1751 
1752         // In this particular optimization, caché does not help as it takes more time to deal with
1753         // the caché that with the pixel handling
1754         *dwFlags |= cmsFLAGS_NOCACHE;
1755 
1756         // Setup the optimizarion routines
1757         SetMatShaper(Dest, mpeC1 ->TheCurves, &res, (cmsVEC3*) Offset, mpeC2->TheCurves, OutputFormat);
1758     }
1759 
1760     cmsPipelineFree(Src);
1761     *Lut = Dest;
1762     return TRUE;
1763 Error:
1764     // Leave Src unchanged
1765     cmsPipelineFree(Dest);
1766     return FALSE;
1767 }
1768 
1769 
1770 // -------------------------------------------------------------------------------------------------------------------------------------
1771 // Optimization plug-ins
1772 
1773 // List of optimizations
1774 typedef struct _cmsOptimizationCollection_st {
1775 
1776     _cmsOPToptimizeFn  OptimizePtr;
1777