1 /*
   2  * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      Image affine transformation with Bicubic filtering
  30  * SYNOPSIS
  31  *      mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
  32  *                                                       mlib_s32 *rightEdges,
  33  *                                                       mlib_s32 *xStarts,
  34  *                                                       mlib_s32 *yStarts,
  35  *                                                       mlib_s32 *sides,
  36  *                                                       mlib_u8  *dstData,
  37  *                                                       mlib_u8  **lineAddr,
  38  *                                                       mlib_s32 dstYStride,
  39  *                                                       mlib_s32 is_affine,
  40  *                                                       mlib_s32 srcYStride,
  41  *                                                       mlib_filter filter)
  42  *
  43  *
  44  * ARGUMENTS
  45  *      leftEdges  array[dstHeight] of xLeft coordinates
  46  *      RightEdges array[dstHeight] of xRight coordinates
  47  *      xStarts    array[dstHeight] of xStart * 65536 coordinates
  48  *      yStarts    array[dstHeight] of yStart * 65536 coordinates
  49  *      sides      output array[4]. sides[0] is yStart, sides[1] is yFinish,
  50  *                 sides[2] is dx * 65536, sides[3] is dy * 65536
  51  *      dstData    pointer to the first pixel on (yStart - 1) line
  52  *      lineAddr   array[srcHeight] of pointers to the first pixel on
  53  *                 the corresponding lines
  54  *      dstYStride stride of destination image
  55  *      is_affine  indicator (Affine - GridWarp)
  56  *      srcYStride stride of source image
  57  *      filter     type of resampling filter
  58  *
  59  * DESCRIPTION
  60  *      The functions step along the lines from xLeft to xRight and apply
  61  *      the bicubic filtering.
  62  *
  63  */
  64 
  65 #include "mlib_ImageAffine.h"
  66 
  67 #define DTYPE  mlib_u8
  68 
  69 #define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bc
  70 
  71 #define FILTER_BITS   8
  72 
  73 /***************************************************************/
  74 #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
  75 
  76 #undef  FILTER_ELEM_BITS
  77 #define FILTER_ELEM_BITS  4
  78 
  79 #ifdef MLIB_USE_FTOI_CLAMPING
  80 
  81 #define SAT8(DST)                                               \
  82   DST = ((mlib_s32)(val0 - sat) >> 24) ^ 0x80
  83 
  84 #else
  85 
  86 #define SAT8(DST)                                               \
  87   val0 -= sat;                                                  \
  88   if (val0 >= MLIB_S32_MAX)                                     \
  89     DST = MLIB_U8_MAX;                                          \
  90   else if (val0 <= MLIB_S32_MIN)                                \
  91     DST = MLIB_U8_MIN;                                          \
  92   else                                                          \
  93     DST = ((mlib_s32)val0 >> 24) ^ 0x80
  94 
  95 #endif /* MLIB_USE_FTOI_CLAMPING */
  96 
  97 /***************************************************************/
  98 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
  99 {
 100   DECLAREVAR_BC();
 101   DTYPE *dstLineEnd;
 102   mlib_d64 sat = (mlib_d64) 0x7F800000;
 103   const mlib_f32 *mlib_filters_table;
 104 
 105   if (filter == MLIB_BICUBIC) {
 106     mlib_filters_table = mlib_filters_u8f_bc;
 107   }
 108   else {
 109     mlib_filters_table = mlib_filters_u8f_bc2;
 110   }
 111 
 112   for (j = yStart; j <= yFinish; j++) {
 113     mlib_d64 xf0, xf1, xf2, xf3;
 114     mlib_d64 yf0, yf1, yf2, yf3;
 115     mlib_d64 c0, c1, c2, c3, val0;
 116     mlib_s32 filterpos;
 117     mlib_f32 *fptr;
 118     mlib_u8 s0, s1, s2, s3;
 119 
 120     CLIP(1);
 121     dstLineEnd = (DTYPE *) dstData + xRight;
 122 
 123     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 124     fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 125 
 126     xf0 = fptr[0];
 127     xf1 = fptr[1];
 128     xf2 = fptr[2];
 129     xf3 = fptr[3];
 130 
 131     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 132     fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 133 
 134     yf0 = fptr[0];
 135     yf1 = fptr[1];
 136     yf2 = fptr[2];
 137     yf3 = fptr[3];
 138 
 139     xSrc = (X >> MLIB_SHIFT) - 1;
 140     ySrc = (Y >> MLIB_SHIFT) - 1;
 141 
 142     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 143     s0 = srcPixelPtr[0];
 144     s1 = srcPixelPtr[1];
 145     s2 = srcPixelPtr[2];
 146     s3 = srcPixelPtr[3];
 147 
 148 #ifdef __SUNPRO_C
 149 #pragma pipeloop(0)
 150 #endif /* __SUNPRO_C */
 151     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
 152       X += dX;
 153       Y += dY;
 154 
 155       c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 156             mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 157       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 158       c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 159             mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 160       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 161       c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 162             mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 163       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 164       c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 165             mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 166 
 167       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 168       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 169 
 170       xf0 = fptr[0];
 171       xf1 = fptr[1];
 172       xf2 = fptr[2];
 173       xf3 = fptr[3];
 174 
 175       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 176 
 177       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 178       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 179 
 180       yf0 = fptr[0];
 181       yf1 = fptr[1];
 182       yf2 = fptr[2];
 183       yf3 = fptr[3];
 184 
 185       SAT8(dstPixelPtr[0]);
 186 
 187       xSrc = (X >> MLIB_SHIFT) - 1;
 188       ySrc = (Y >> MLIB_SHIFT) - 1;
 189 
 190       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 191       s0 = srcPixelPtr[0];
 192       s1 = srcPixelPtr[1];
 193       s2 = srcPixelPtr[2];
 194       s3 = srcPixelPtr[3];
 195     }
 196 
 197     c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 198           mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 199     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 200     c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 201           mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 202     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 203     c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 204           mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 205     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 206     c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 207           mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 208 
 209     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 210 
 211     SAT8(dstPixelPtr[0]);
 212   }
 213 
 214   return MLIB_SUCCESS;
 215 }
 216 
 217 /***************************************************************/
 218 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 219 {
 220   DECLAREVAR_BC();
 221   DTYPE *dstLineEnd;
 222   mlib_d64 sat = (mlib_d64) 0x7F800000;
 223   const mlib_f32 *mlib_filters_table;
 224 
 225   if (filter == MLIB_BICUBIC) {
 226     mlib_filters_table = mlib_filters_u8f_bc;
 227   }
 228   else {
 229     mlib_filters_table = mlib_filters_u8f_bc2;
 230   }
 231 
 232   for (j = yStart; j <= yFinish; j++) {
 233     mlib_d64 xf0, xf1, xf2, xf3;
 234     mlib_d64 yf0, yf1, yf2, yf3;
 235     mlib_d64 c0, c1, c2, c3, val0;
 236     mlib_s32 filterpos, k;
 237     mlib_f32 *fptr;
 238     mlib_u8 s0, s1, s2, s3;
 239 
 240     CLIP(2);
 241     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 242 
 243     for (k = 0; k < 2; k++) {
 244       mlib_s32 X1 = X;
 245       mlib_s32 Y1 = Y;
 246       DTYPE *dPtr = dstPixelPtr + k;
 247 
 248       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 249       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 250 
 251       xf0 = fptr[0];
 252       xf1 = fptr[1];
 253       xf2 = fptr[2];
 254       xf3 = fptr[3];
 255 
 256       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 257       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 258 
 259       yf0 = fptr[0];
 260       yf1 = fptr[1];
 261       yf2 = fptr[2];
 262       yf3 = fptr[3];
 263 
 264       xSrc = (X1 >> MLIB_SHIFT) - 1;
 265       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 266 
 267       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 268       s0 = srcPixelPtr[0];
 269       s1 = srcPixelPtr[2];
 270       s2 = srcPixelPtr[4];
 271       s3 = srcPixelPtr[6];
 272 
 273 #ifdef __SUNPRO_C
 274 #pragma pipeloop(0)
 275 #endif /* __SUNPRO_C */
 276       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
 277         X1 += dX;
 278         Y1 += dY;
 279 
 280         c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 281               mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 282         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 283         c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 284               mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 285         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 286         c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 287               mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 288         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 289         c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 290               mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 291 
 292         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 293         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 294 
 295         xf0 = fptr[0];
 296         xf1 = fptr[1];
 297         xf2 = fptr[2];
 298         xf3 = fptr[3];
 299 
 300         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 301 
 302         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 303         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 304 
 305         yf0 = fptr[0];
 306         yf1 = fptr[1];
 307         yf2 = fptr[2];
 308         yf3 = fptr[3];
 309 
 310         SAT8(dPtr[0]);
 311 
 312         xSrc = (X1 >> MLIB_SHIFT) - 1;
 313         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 314 
 315         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 316         s0 = srcPixelPtr[0];
 317         s1 = srcPixelPtr[2];
 318         s2 = srcPixelPtr[4];
 319         s3 = srcPixelPtr[6];
 320       }
 321 
 322       c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 323             mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 324       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 325       c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 326             mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 327       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 328       c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 329             mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 330       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 331       c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 332             mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 333 
 334       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 335 
 336       SAT8(dPtr[0]);
 337     }
 338   }
 339 
 340   return MLIB_SUCCESS;
 341 }
 342 
 343 /***************************************************************/
 344 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 345 {
 346   DECLAREVAR_BC();
 347   DTYPE *dstLineEnd;
 348   mlib_d64 sat = (mlib_d64) 0x7F800000;
 349   const mlib_f32 *mlib_filters_table;
 350 
 351   if (filter == MLIB_BICUBIC) {
 352     mlib_filters_table = mlib_filters_u8f_bc;
 353   }
 354   else {
 355     mlib_filters_table = mlib_filters_u8f_bc2;
 356   }
 357 
 358   for (j = yStart; j <= yFinish; j++) {
 359     mlib_d64 xf0, xf1, xf2, xf3;
 360     mlib_d64 yf0, yf1, yf2, yf3;
 361     mlib_d64 c0, c1, c2, c3, val0;
 362     mlib_s32 filterpos, k;
 363     mlib_f32 *fptr;
 364     mlib_u8 s0, s1, s2, s3;
 365 
 366     CLIP(3);
 367     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 368 
 369     for (k = 0; k < 3; k++) {
 370       mlib_s32 X1 = X;
 371       mlib_s32 Y1 = Y;
 372       DTYPE *dPtr = dstPixelPtr + k;
 373 
 374       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 375       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 376 
 377       xf0 = fptr[0];
 378       xf1 = fptr[1];
 379       xf2 = fptr[2];
 380       xf3 = fptr[3];
 381 
 382       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 383       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 384 
 385       yf0 = fptr[0];
 386       yf1 = fptr[1];
 387       yf2 = fptr[2];
 388       yf3 = fptr[3];
 389 
 390       xSrc = (X1 >> MLIB_SHIFT) - 1;
 391       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 392 
 393       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 394       s0 = srcPixelPtr[0];
 395       s1 = srcPixelPtr[3];
 396       s2 = srcPixelPtr[6];
 397       s3 = srcPixelPtr[9];
 398 
 399 #ifdef __SUNPRO_C
 400 #pragma pipeloop(0)
 401 #endif /* __SUNPRO_C */
 402       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
 403         X1 += dX;
 404         Y1 += dY;
 405 
 406         c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 407               mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 408         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 409         c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 410               mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 411         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 412         c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 413               mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 414         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 415         c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 416               mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 417 
 418         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 419         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 420 
 421         xf0 = fptr[0];
 422         xf1 = fptr[1];
 423         xf2 = fptr[2];
 424         xf3 = fptr[3];
 425 
 426         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 427 
 428         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 429         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 430 
 431         yf0 = fptr[0];
 432         yf1 = fptr[1];
 433         yf2 = fptr[2];
 434         yf3 = fptr[3];
 435 
 436         SAT8(dPtr[0]);
 437 
 438         xSrc = (X1 >> MLIB_SHIFT) - 1;
 439         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 440 
 441         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 442         s0 = srcPixelPtr[0];
 443         s1 = srcPixelPtr[3];
 444         s2 = srcPixelPtr[6];
 445         s3 = srcPixelPtr[9];
 446       }
 447 
 448       c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 449             mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 450       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 451       c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 452             mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 453       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 454       c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 455             mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 456       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 457       c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 458             mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 459 
 460       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 461 
 462       SAT8(dPtr[0]);
 463     }
 464   }
 465 
 466   return MLIB_SUCCESS;
 467 }
 468 
 469 /***************************************************************/
 470 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 471 {
 472   DECLAREVAR_BC();
 473   DTYPE *dstLineEnd;
 474   mlib_d64 sat = (mlib_d64) 0x7F800000;
 475   const mlib_f32 *mlib_filters_table;
 476 
 477   if (filter == MLIB_BICUBIC) {
 478     mlib_filters_table = mlib_filters_u8f_bc;
 479   }
 480   else {
 481     mlib_filters_table = mlib_filters_u8f_bc2;
 482   }
 483 
 484   for (j = yStart; j <= yFinish; j++) {
 485     mlib_d64 xf0, xf1, xf2, xf3;
 486     mlib_d64 yf0, yf1, yf2, yf3;
 487     mlib_d64 c0, c1, c2, c3, val0;
 488     mlib_s32 filterpos, k;
 489     mlib_f32 *fptr;
 490     mlib_u8 s0, s1, s2, s3;
 491 
 492     CLIP(4);
 493     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
 494 
 495     for (k = 0; k < 4; k++) {
 496       mlib_s32 X1 = X;
 497       mlib_s32 Y1 = Y;
 498       DTYPE *dPtr = dstPixelPtr + k;
 499 
 500       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 501       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 502 
 503       xf0 = fptr[0];
 504       xf1 = fptr[1];
 505       xf2 = fptr[2];
 506       xf3 = fptr[3];
 507 
 508       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 509       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 510 
 511       yf0 = fptr[0];
 512       yf1 = fptr[1];
 513       yf2 = fptr[2];
 514       yf3 = fptr[3];
 515 
 516       xSrc = (X1 >> MLIB_SHIFT) - 1;
 517       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 518 
 519       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 520       s0 = srcPixelPtr[0];
 521       s1 = srcPixelPtr[4];
 522       s2 = srcPixelPtr[8];
 523       s3 = srcPixelPtr[12];
 524 
 525 #ifdef __SUNPRO_C
 526 #pragma pipeloop(0)
 527 #endif /* __SUNPRO_C */
 528       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
 529         X1 += dX;
 530         Y1 += dY;
 531 
 532         c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 533               mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 534         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 535         c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 536               mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 537         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 538         c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 539               mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 540         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 541         c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 542               mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 543 
 544         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 545         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 546 
 547         xf0 = fptr[0];
 548         xf1 = fptr[1];
 549         xf2 = fptr[2];
 550         xf3 = fptr[3];
 551 
 552         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 553 
 554         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 555         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 556 
 557         yf0 = fptr[0];
 558         yf1 = fptr[1];
 559         yf2 = fptr[2];
 560         yf3 = fptr[3];
 561 
 562         SAT8(dPtr[0]);
 563 
 564         xSrc = (X1 >> MLIB_SHIFT) - 1;
 565         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 566 
 567         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 568         s0 = srcPixelPtr[0];
 569         s1 = srcPixelPtr[4];
 570         s2 = srcPixelPtr[8];
 571         s3 = srcPixelPtr[12];
 572       }
 573 
 574       c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 575             mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 576       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 577       c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 578             mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 579       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 580       c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 581             mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 582       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 583       c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 584             mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 585 
 586       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 587 
 588       SAT8(dPtr[0]);
 589     }
 590   }
 591 
 592   return MLIB_SUCCESS;
 593 }
 594 
 595 #else       /* for x86, using integer multiplies is faster */
 596 
 597 #define SHIFT_X  12
 598 #define ROUND_X  0 /* (1 << (SHIFT_X - 1)) */
 599 
 600 #define SHIFT_Y  (14 + 14 - SHIFT_X)
 601 #define ROUND_Y  (1 << (SHIFT_Y - 1))
 602 
 603 /***************************************************************/
 604 /* Test for the presence of any "1" bit in bits
 605    8 to 31 of val. If present, then val is either
 606    negative or >255. If over/underflows of 8 bits
 607    are uncommon, then this technique can be a win,
 608    since only a single test, rather than two, is
 609    necessary to determine if clamping is needed.
 610    On the other hand, if over/underflows are common,
 611    it adds an extra test.
 612 */
 613 #define S32_TO_U8_SAT(DST)                                      \
 614   if (val0 & 0xffffff00) {                                      \
 615     if (val0 < MLIB_U8_MIN)                                     \
 616       DST = MLIB_U8_MIN;                                        \
 617     else                                                        \
 618       DST = MLIB_U8_MAX;                                        \
 619   } else {                                                      \
 620     DST = (mlib_u8)val0;                                        \
 621   }
 622 
 623 /***************************************************************/
 624 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
 625 {
 626   DECLAREVAR_BC();
 627   DTYPE *dstLineEnd;
 628   const mlib_s16 *mlib_filters_table;
 629 
 630   if (filter == MLIB_BICUBIC) {
 631     mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
 632   }
 633   else {
 634     mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
 635   }
 636 
 637   for (j = yStart; j <= yFinish; j++) {
 638     mlib_s32 xf0, xf1, xf2, xf3;
 639     mlib_s32 yf0, yf1, yf2, yf3;
 640     mlib_s32 c0, c1, c2, c3, val0;
 641     mlib_s32 filterpos;
 642     mlib_s16 *fptr;
 643     mlib_u8 s0, s1, s2, s3;
 644 
 645     CLIP(1);
 646     dstLineEnd = (DTYPE *) dstData + xRight;
 647 
 648     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 649     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 650 
 651     xf0 = fptr[0];
 652     xf1 = fptr[1];
 653     xf2 = fptr[2];
 654     xf3 = fptr[3];
 655 
 656     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 657     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 658 
 659     yf0 = fptr[0];
 660     yf1 = fptr[1];
 661     yf2 = fptr[2];
 662     yf3 = fptr[3];
 663 
 664     xSrc = (X >> MLIB_SHIFT) - 1;
 665     ySrc = (Y >> MLIB_SHIFT) - 1;
 666 
 667     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 668     s0 = srcPixelPtr[0];
 669     s1 = srcPixelPtr[1];
 670     s2 = srcPixelPtr[2];
 671     s3 = srcPixelPtr[3];
 672 
 673 #ifdef __SUNPRO_C
 674 #pragma pipeloop(0)
 675 #endif /* __SUNPRO_C */
 676     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
 677       X += dX;
 678       Y += dY;
 679 
 680       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 681       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 682       c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 683             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 684       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 685       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 686             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 687       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 688       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 689             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 690 
 691       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 692       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 693 
 694       xf0 = fptr[0];
 695       xf1 = fptr[1];
 696       xf2 = fptr[2];
 697       xf3 = fptr[3];
 698 
 699       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 700 
 701       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 702       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 703 
 704       yf0 = fptr[0];
 705       yf1 = fptr[1];
 706       yf2 = fptr[2];
 707       yf3 = fptr[3];
 708 
 709       S32_TO_U8_SAT(dstPixelPtr[0]);
 710 
 711       xSrc = (X >> MLIB_SHIFT) - 1;
 712       ySrc = (Y >> MLIB_SHIFT) - 1;
 713 
 714       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 715       s0 = srcPixelPtr[0];
 716       s1 = srcPixelPtr[1];
 717       s2 = srcPixelPtr[2];
 718       s3 = srcPixelPtr[3];
 719     }
 720 
 721     c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 722     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 723     c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 724           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 725     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 726     c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 727           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 728     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 729     c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 730           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 731 
 732     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 733 
 734     S32_TO_U8_SAT(dstPixelPtr[0]);
 735   }
 736 
 737   return MLIB_SUCCESS;
 738 }
 739 
 740 /***************************************************************/
 741 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 742 {
 743   DECLAREVAR_BC();
 744   DTYPE *dstLineEnd;
 745   const mlib_s16 *mlib_filters_table;
 746 
 747   if (filter == MLIB_BICUBIC) {
 748     mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
 749   }
 750   else {
 751     mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
 752   }
 753 
 754   for (j = yStart; j <= yFinish; j++) {
 755     mlib_s32 xf0, xf1, xf2, xf3;
 756     mlib_s32 yf0, yf1, yf2, yf3;
 757     mlib_s32 c0, c1, c2, c3, val0;
 758     mlib_s32 filterpos, k;
 759     mlib_s16 *fptr;
 760     mlib_u8 s0, s1, s2, s3;
 761 
 762     CLIP(2);
 763     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 764 
 765     for (k = 0; k < 2; k++) {
 766       mlib_s32 X1 = X;
 767       mlib_s32 Y1 = Y;
 768       DTYPE *dPtr = dstPixelPtr + k;
 769 
 770       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 771       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 772 
 773       xf0 = fptr[0];
 774       xf1 = fptr[1];
 775       xf2 = fptr[2];
 776       xf3 = fptr[3];
 777 
 778       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 779       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 780 
 781       yf0 = fptr[0];
 782       yf1 = fptr[1];
 783       yf2 = fptr[2];
 784       yf3 = fptr[3];
 785 
 786       xSrc = (X1 >> MLIB_SHIFT) - 1;
 787       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 788 
 789       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 790       s0 = srcPixelPtr[0];
 791       s1 = srcPixelPtr[2];
 792       s2 = srcPixelPtr[4];
 793       s3 = srcPixelPtr[6];
 794 
 795 #ifdef __SUNPRO_C
 796 #pragma pipeloop(0)
 797 #endif /* __SUNPRO_C */
 798       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
 799         X1 += dX;
 800         Y1 += dY;
 801 
 802         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 803         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 804         c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 805               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 806         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 807         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 808               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 809         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 810         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 811               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 812 
 813         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 814         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 815 
 816         xf0 = fptr[0];
 817         xf1 = fptr[1];
 818         xf2 = fptr[2];
 819         xf3 = fptr[3];
 820 
 821         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 822 
 823         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 824         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 825 
 826         yf0 = fptr[0];
 827         yf1 = fptr[1];
 828         yf2 = fptr[2];
 829         yf3 = fptr[3];
 830 
 831         S32_TO_U8_SAT(dPtr[0]);
 832 
 833         xSrc = (X1 >> MLIB_SHIFT) - 1;
 834         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 835 
 836         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 837         s0 = srcPixelPtr[0];
 838         s1 = srcPixelPtr[2];
 839         s2 = srcPixelPtr[4];
 840         s3 = srcPixelPtr[6];
 841       }
 842 
 843       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 844       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 845       c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 846             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 847       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 848       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 849             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 850       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 851       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 852             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 853 
 854       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 855 
 856       S32_TO_U8_SAT(dPtr[0]);
 857     }
 858   }
 859 
 860   return MLIB_SUCCESS;
 861 }
 862 
 863 /***************************************************************/
 864 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 865 {
 866   DECLAREVAR_BC();
 867   DTYPE *dstLineEnd;
 868   const mlib_s16 *mlib_filters_table;
 869 
 870   if (filter == MLIB_BICUBIC) {
 871     mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
 872   }
 873   else {
 874     mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
 875   }
 876 
 877   for (j = yStart; j <= yFinish; j++) {
 878     mlib_s32 xf0, xf1, xf2, xf3;
 879     mlib_s32 yf0, yf1, yf2, yf3;
 880     mlib_s32 c0, c1, c2, c3, val0;
 881     mlib_s32 filterpos, k;
 882     mlib_s16 *fptr;
 883     mlib_u8 s0, s1, s2, s3;
 884 
 885     CLIP(3);
 886     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 887 
 888     for (k = 0; k < 3; k++) {
 889       mlib_s32 X1 = X;
 890       mlib_s32 Y1 = Y;
 891       DTYPE *dPtr = dstPixelPtr + k;
 892 
 893       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 894       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 895 
 896       xf0 = fptr[0];
 897       xf1 = fptr[1];
 898       xf2 = fptr[2];
 899       xf3 = fptr[3];
 900 
 901       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 902       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 903 
 904       yf0 = fptr[0];
 905       yf1 = fptr[1];
 906       yf2 = fptr[2];
 907       yf3 = fptr[3];
 908 
 909       xSrc = (X1 >> MLIB_SHIFT) - 1;
 910       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 911 
 912       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 913       s0 = srcPixelPtr[0];
 914       s1 = srcPixelPtr[3];
 915       s2 = srcPixelPtr[6];
 916       s3 = srcPixelPtr[9];
 917 
 918 #ifdef __SUNPRO_C
 919 #pragma pipeloop(0)
 920 #endif /* __SUNPRO_C */
 921       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
 922         X1 += dX;
 923         Y1 += dY;
 924 
 925         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 926         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 927         c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 928               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 929         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 930         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 931               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 932         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 933         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 934               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 935 
 936         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 937         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 938 
 939         xf0 = fptr[0];
 940         xf1 = fptr[1];
 941         xf2 = fptr[2];
 942         xf3 = fptr[3];
 943 
 944         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 945 
 946         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 947         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 948 
 949         yf0 = fptr[0];
 950         yf1 = fptr[1];
 951         yf2 = fptr[2];
 952         yf3 = fptr[3];
 953 
 954         S32_TO_U8_SAT(dPtr[0]);
 955 
 956         xSrc = (X1 >> MLIB_SHIFT) - 1;
 957         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 958 
 959         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 960         s0 = srcPixelPtr[0];
 961         s1 = srcPixelPtr[3];
 962         s2 = srcPixelPtr[6];
 963         s3 = srcPixelPtr[9];
 964       }
 965 
 966       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 967       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 968       c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 969             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 970       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 971       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 972             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 973       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 974       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 975             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 976 
 977       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 978 
 979       S32_TO_U8_SAT(dPtr[0]);
 980     }
 981   }
 982 
 983   return MLIB_SUCCESS;
 984 }
 985 
 986 /***************************************************************/
 987 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 988 {
 989   DECLAREVAR_BC();
 990   DTYPE *dstLineEnd;
 991   const mlib_s16 *mlib_filters_table;
 992 
 993   if (filter == MLIB_BICUBIC) {
 994     mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
 995   }
 996   else {
 997     mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
 998   }
 999 
1000   for (j = yStart; j <= yFinish; j++) {
1001     mlib_s32 xf0, xf1, xf2, xf3;
1002     mlib_s32 yf0, yf1, yf2, yf3;
1003     mlib_s32 c0, c1, c2, c3, val0;
1004     mlib_s32 filterpos, k;
1005     mlib_s16 *fptr;
1006     mlib_u8 s0, s1, s2, s3;
1007 
1008     CLIP(4);
1009     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
1010 
1011     for (k = 0; k < 4; k++) {
1012       mlib_s32 X1 = X;
1013       mlib_s32 Y1 = Y;
1014       DTYPE *dPtr = dstPixelPtr + k;
1015 
1016       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1017       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1018 
1019       xf0 = fptr[0];
1020       xf1 = fptr[1];
1021       xf2 = fptr[2];
1022       xf3 = fptr[3];
1023 
1024       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1025       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1026 
1027       yf0 = fptr[0];
1028       yf1 = fptr[1];
1029       yf2 = fptr[2];
1030       yf3 = fptr[3];
1031 
1032       xSrc = (X1 >> MLIB_SHIFT) - 1;
1033       ySrc = (Y1 >> MLIB_SHIFT) - 1;
1034 
1035       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1036       s0 = srcPixelPtr[0];
1037       s1 = srcPixelPtr[4];
1038       s2 = srcPixelPtr[8];
1039       s3 = srcPixelPtr[12];
1040 
1041 #ifdef __SUNPRO_C
1042 #pragma pipeloop(0)
1043 #endif /* __SUNPRO_C */
1044       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
1045         X1 += dX;
1046         Y1 += dY;
1047 
1048         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1049         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1050         c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1051               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1052         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1053         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1054               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1055         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1056         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1057               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1058 
1059         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1060         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1061 
1062         xf0 = fptr[0];
1063         xf1 = fptr[1];
1064         xf2 = fptr[2];
1065         xf3 = fptr[3];
1066 
1067         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1068 
1069         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1070         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1071 
1072         yf0 = fptr[0];
1073         yf1 = fptr[1];
1074         yf2 = fptr[2];
1075         yf3 = fptr[3];
1076 
1077         S32_TO_U8_SAT(dPtr[0]);
1078 
1079         xSrc = (X1 >> MLIB_SHIFT) - 1;
1080         ySrc = (Y1 >> MLIB_SHIFT) - 1;
1081 
1082         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1083         s0 = srcPixelPtr[0];
1084         s1 = srcPixelPtr[4];
1085         s2 = srcPixelPtr[8];
1086         s3 = srcPixelPtr[12];
1087       }
1088 
1089       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1090       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1091       c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1092             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1093       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1094       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1095             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1096       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1097       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1098             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1099 
1100       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1101 
1102       S32_TO_U8_SAT(dPtr[0]);
1103     }
1104   }
1105 
1106   return MLIB_SUCCESS;
1107 }
1108 
1109 #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
1110 
1111 /***************************************************************/