1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      Image affine transformation with Bicubic filtering
  30  * SYNOPSIS
  31  *      mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
  32  *                                                       mlib_s32 *rightEdges,
  33  *                                                       mlib_s32 *xStarts,
  34  *                                                       mlib_s32 *yStarts,
  35  *                                                       mlib_s32 *sides,
  36  *                                                       mlib_u8  *dstData,
  37  *                                                       mlib_u8  **lineAddr,
  38  *                                                       mlib_s32 dstYStride,
  39  *                                                       mlib_s32 is_affine,
  40  *                                                       mlib_s32 srcYStride,
  41  *                                                       mlib_filter filter)
  42  *
  43  * ARGUMENTS
  44  *      leftEdges  array[dstHeight] of xLeft coordinates
  45  *      RightEdges array[dstHeight] of xRight coordinates
  46  *      xStarts    array[dstHeight] of xStart * 65536 coordinates
  47  *      yStarts    array[dstHeight] of yStart * 65536 coordinates
  48  *      sides      output array[4]. sides[0] is yStart, sides[1] is yFinish,
  49  *                 sides[2] is dx * 65536, sides[3] is dy * 65536
  50  *      dstData    pointer to the first pixel on (yStart - 1) line
  51  *      lineAddr   array[srcHeight] of pointers to the first pixel on
  52  *                 the corresponding lines
  53  *      dstYStride stride of destination image
  54  *      is_affine  indicator (Affine - GridWarp)
  55  *      srcYStride stride of source image
  56  *      filter     type of resampling filter
  57  *
  58  * DESCRIPTION
  59  *      The functions step along the lines from xLeft to xRight and apply
  60  *      the bicubic filtering.
  61  *
  62  */
  63 
  64 #include "mlib_ImageAffine.h"
  65 
  66 #define DTYPE  mlib_u16
  67 
  68 #define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bc
  69 
  70 #define FILTER_BITS   9
  71 
  72 /***************************************************************/
  73 #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
  74 
  75 /***************************************************************/
  76 #undef  FILTER_ELEM_BITS
  77 #define FILTER_ELEM_BITS  4
  78 
  79 /***************************************************************/
  80 #ifdef MLIB_USE_FTOI_CLAMPING
  81 
  82 #define SAT_U16(DST)                                            \
  83   DST = ((mlib_s32)(val0 - (mlib_d64)0x7FFF8000) >> 16) ^ 0x8000
  84 
  85 #else
  86 
  87 #define SAT_U16(DST)                                            \
  88   if (val0 >= MLIB_U32_MAX)                                     \
  89     DST = MLIB_U16_MAX;                                         \
  90   else if (val0 <= MLIB_U32_MIN)                                \
  91     DST = MLIB_U16_MIN;                                         \
  92   else                                                          \
  93     DST = ((mlib_u32)val0) >> 16
  94 
  95 #endif /* MLIB_USE_FTOI_CLAMPING */
  96 
  97 /***************************************************************/
  98 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
  99 {
 100   DECLAREVAR_BC();
 101   DTYPE *dstLineEnd;
 102   const mlib_f32 *mlib_filters_table;
 103 
 104   if (filter == MLIB_BICUBIC) {
 105     mlib_filters_table = mlib_filters_s16f_bc;
 106   }
 107   else {
 108     mlib_filters_table = mlib_filters_s16f_bc2;
 109   }
 110 
 111   for (j = yStart; j <= yFinish; j++) {
 112     mlib_d64 xf0, xf1, xf2, xf3;
 113     mlib_d64 yf0, yf1, yf2, yf3;
 114     mlib_d64 c0, c1, c2, c3, val0;
 115     mlib_s32 filterpos;
 116     mlib_f32 *fptr;
 117     mlib_s32 s0, s1, s2, s3;
 118     mlib_s32 s4, s5, s6, s7;
 119 
 120     CLIP(1);
 121     dstLineEnd = (DTYPE *) dstData + xRight;
 122 
 123     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 124     fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 125 
 126     xf0 = fptr[0];
 127     xf1 = fptr[1];
 128     xf2 = fptr[2];
 129     xf3 = fptr[3];
 130 
 131     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 132     fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 133 
 134     yf0 = fptr[0];
 135     yf1 = fptr[1];
 136     yf2 = fptr[2];
 137     yf3 = fptr[3];
 138 
 139     xSrc = (X >> MLIB_SHIFT) - 1;
 140     ySrc = (Y >> MLIB_SHIFT) - 1;
 141 
 142     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 143     s0 = srcPixelPtr[0];
 144     s1 = srcPixelPtr[1];
 145     s2 = srcPixelPtr[2];
 146     s3 = srcPixelPtr[3];
 147 
 148     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 149     s4 = srcPixelPtr[0];
 150     s5 = srcPixelPtr[1];
 151     s6 = srcPixelPtr[2];
 152     s7 = srcPixelPtr[3];
 153 
 154     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
 155 
 156       X += dX;
 157       Y += dY;
 158 
 159       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 160       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 161       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 162       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 163             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
 164       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 165       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 166             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
 167 
 168       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 169       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 170 
 171       xf0 = fptr[0];
 172       xf1 = fptr[1];
 173       xf2 = fptr[2];
 174       xf3 = fptr[3];
 175 
 176       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 177 
 178       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 179       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 180 
 181       yf0 = fptr[0];
 182       yf1 = fptr[1];
 183       yf2 = fptr[2];
 184       yf3 = fptr[3];
 185 
 186       SAT_U16(dstPixelPtr[0]);
 187 
 188       xSrc = (X >> MLIB_SHIFT) - 1;
 189       ySrc = (Y >> MLIB_SHIFT) - 1;
 190 
 191       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 192       s0 = srcPixelPtr[0];
 193       s1 = srcPixelPtr[1];
 194       s2 = srcPixelPtr[2];
 195       s3 = srcPixelPtr[3];
 196 
 197       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 198       s4 = srcPixelPtr[0];
 199       s5 = srcPixelPtr[1];
 200       s6 = srcPixelPtr[2];
 201       s7 = srcPixelPtr[3];
 202     }
 203 
 204     c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 205     c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 206     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 207     c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 208           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
 209     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 210     c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 211           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
 212 
 213     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 214     SAT_U16(dstPixelPtr[0]);
 215   }
 216 
 217   return MLIB_SUCCESS;
 218 }
 219 
 220 /***************************************************************/
 221 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 222 {
 223   DECLAREVAR_BC();
 224   DTYPE *dstLineEnd;
 225   const mlib_f32 *mlib_filters_table;
 226 
 227   if (filter == MLIB_BICUBIC) {
 228     mlib_filters_table = mlib_filters_s16f_bc;
 229   }
 230   else {
 231     mlib_filters_table = mlib_filters_s16f_bc2;
 232   }
 233 
 234   for (j = yStart; j <= yFinish; j++) {
 235     mlib_d64 xf0, xf1, xf2, xf3;
 236     mlib_d64 yf0, yf1, yf2, yf3;
 237     mlib_d64 c0, c1, c2, c3, val0;
 238     mlib_s32 filterpos, k;
 239     mlib_f32 *fptr;
 240     mlib_s32 s0, s1, s2, s3;
 241     mlib_s32 s4, s5, s6, s7;
 242 
 243     CLIP(2);
 244     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 245 
 246     for (k = 0; k < 2; k++) {
 247       mlib_s32 X1 = X;
 248       mlib_s32 Y1 = Y;
 249       DTYPE *dPtr = dstPixelPtr + k;
 250 
 251       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 252       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 253 
 254       xf0 = fptr[0];
 255       xf1 = fptr[1];
 256       xf2 = fptr[2];
 257       xf3 = fptr[3];
 258 
 259       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 260       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 261 
 262       yf0 = fptr[0];
 263       yf1 = fptr[1];
 264       yf2 = fptr[2];
 265       yf3 = fptr[3];
 266 
 267       xSrc = (X1 >> MLIB_SHIFT) - 1;
 268       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 269 
 270       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 271       s0 = srcPixelPtr[0];
 272       s1 = srcPixelPtr[2];
 273       s2 = srcPixelPtr[4];
 274       s3 = srcPixelPtr[6];
 275 
 276       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 277       s4 = srcPixelPtr[0];
 278       s5 = srcPixelPtr[2];
 279       s6 = srcPixelPtr[4];
 280       s7 = srcPixelPtr[6];
 281 
 282       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
 283 
 284         X1 += dX;
 285         Y1 += dY;
 286 
 287         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 288         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 289         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 290         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 291               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
 292         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 293         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 294               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
 295 
 296         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 297         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 298 
 299         xf0 = fptr[0];
 300         xf1 = fptr[1];
 301         xf2 = fptr[2];
 302         xf3 = fptr[3];
 303 
 304         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 305 
 306         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 307         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 308 
 309         yf0 = fptr[0];
 310         yf1 = fptr[1];
 311         yf2 = fptr[2];
 312         yf3 = fptr[3];
 313 
 314         SAT_U16(dPtr[0]);
 315 
 316         xSrc = (X1 >> MLIB_SHIFT) - 1;
 317         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 318 
 319         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 320         s0 = srcPixelPtr[0];
 321         s1 = srcPixelPtr[2];
 322         s2 = srcPixelPtr[4];
 323         s3 = srcPixelPtr[6];
 324 
 325         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 326         s4 = srcPixelPtr[0];
 327         s5 = srcPixelPtr[2];
 328         s6 = srcPixelPtr[4];
 329         s7 = srcPixelPtr[6];
 330       }
 331 
 332       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 333       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 334       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 335       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 336             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
 337       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 338       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 339             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
 340 
 341       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 342       SAT_U16(dPtr[0]);
 343     }
 344   }
 345 
 346   return MLIB_SUCCESS;
 347 }
 348 
 349 /***************************************************************/
 350 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 351 {
 352   DECLAREVAR_BC();
 353   DTYPE *dstLineEnd;
 354   const mlib_f32 *mlib_filters_table;
 355 
 356   if (filter == MLIB_BICUBIC) {
 357     mlib_filters_table = mlib_filters_s16f_bc;
 358   }
 359   else {
 360     mlib_filters_table = mlib_filters_s16f_bc2;
 361   }
 362 
 363   for (j = yStart; j <= yFinish; j++) {
 364     mlib_d64 xf0, xf1, xf2, xf3;
 365     mlib_d64 yf0, yf1, yf2, yf3;
 366     mlib_d64 c0, c1, c2, c3, val0;
 367     mlib_s32 filterpos, k;
 368     mlib_f32 *fptr;
 369     mlib_s32 s0, s1, s2, s3;
 370     mlib_s32 s4, s5, s6, s7;
 371 
 372     CLIP(3);
 373     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 374 
 375     for (k = 0; k < 3; k++) {
 376       mlib_s32 X1 = X;
 377       mlib_s32 Y1 = Y;
 378       DTYPE *dPtr = dstPixelPtr + k;
 379 
 380       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 381       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 382 
 383       xf0 = fptr[0];
 384       xf1 = fptr[1];
 385       xf2 = fptr[2];
 386       xf3 = fptr[3];
 387 
 388       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 389       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 390 
 391       yf0 = fptr[0];
 392       yf1 = fptr[1];
 393       yf2 = fptr[2];
 394       yf3 = fptr[3];
 395 
 396       xSrc = (X1 >> MLIB_SHIFT) - 1;
 397       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 398 
 399       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 400       s0 = srcPixelPtr[0];
 401       s1 = srcPixelPtr[3];
 402       s2 = srcPixelPtr[6];
 403       s3 = srcPixelPtr[9];
 404 
 405       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 406       s4 = srcPixelPtr[0];
 407       s5 = srcPixelPtr[3];
 408       s6 = srcPixelPtr[6];
 409       s7 = srcPixelPtr[9];
 410 
 411       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
 412 
 413         X1 += dX;
 414         Y1 += dY;
 415 
 416         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 417         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 418         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 419         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 420               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
 421         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 422         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 423               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
 424 
 425         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 426         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 427 
 428         xf0 = fptr[0];
 429         xf1 = fptr[1];
 430         xf2 = fptr[2];
 431         xf3 = fptr[3];
 432 
 433         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 434 
 435         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 436         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 437 
 438         yf0 = fptr[0];
 439         yf1 = fptr[1];
 440         yf2 = fptr[2];
 441         yf3 = fptr[3];
 442 
 443         SAT_U16(dPtr[0]);
 444 
 445         xSrc = (X1 >> MLIB_SHIFT) - 1;
 446         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 447 
 448         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 449         s0 = srcPixelPtr[0];
 450         s1 = srcPixelPtr[3];
 451         s2 = srcPixelPtr[6];
 452         s3 = srcPixelPtr[9];
 453 
 454         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 455         s4 = srcPixelPtr[0];
 456         s5 = srcPixelPtr[3];
 457         s6 = srcPixelPtr[6];
 458         s7 = srcPixelPtr[9];
 459       }
 460 
 461       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 462       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 463       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 464       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 465             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
 466       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 467       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 468             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
 469 
 470       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 471       SAT_U16(dPtr[0]);
 472     }
 473   }
 474 
 475   return MLIB_SUCCESS;
 476 }
 477 
 478 /***************************************************************/
 479 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 480 {
 481   DECLAREVAR_BC();
 482   DTYPE *dstLineEnd;
 483   const mlib_f32 *mlib_filters_table;
 484 
 485   if (filter == MLIB_BICUBIC) {
 486     mlib_filters_table = mlib_filters_s16f_bc;
 487   }
 488   else {
 489     mlib_filters_table = mlib_filters_s16f_bc2;
 490   }
 491 
 492   for (j = yStart; j <= yFinish; j++) {
 493     mlib_d64 xf0, xf1, xf2, xf3;
 494     mlib_d64 yf0, yf1, yf2, yf3;
 495     mlib_d64 c0, c1, c2, c3, val0;
 496     mlib_s32 filterpos, k;
 497     mlib_f32 *fptr;
 498     mlib_s32 s0, s1, s2, s3;
 499     mlib_s32 s4, s5, s6, s7;
 500 
 501     CLIP(4);
 502     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
 503 
 504     for (k = 0; k < 4; k++) {
 505       mlib_s32 X1 = X;
 506       mlib_s32 Y1 = Y;
 507       DTYPE *dPtr = dstPixelPtr + k;
 508 
 509       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 510       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 511 
 512       xf0 = fptr[0];
 513       xf1 = fptr[1];
 514       xf2 = fptr[2];
 515       xf3 = fptr[3];
 516 
 517       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 518       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 519 
 520       yf0 = fptr[0];
 521       yf1 = fptr[1];
 522       yf2 = fptr[2];
 523       yf3 = fptr[3];
 524 
 525       xSrc = (X1 >> MLIB_SHIFT) - 1;
 526       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 527 
 528       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 529       s0 = srcPixelPtr[0];
 530       s1 = srcPixelPtr[4];
 531       s2 = srcPixelPtr[8];
 532       s3 = srcPixelPtr[12];
 533 
 534       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 535       s4 = srcPixelPtr[0];
 536       s5 = srcPixelPtr[4];
 537       s6 = srcPixelPtr[8];
 538       s7 = srcPixelPtr[12];
 539 
 540       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
 541 
 542         X1 += dX;
 543         Y1 += dY;
 544 
 545         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 546         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 547         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 548         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 549               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
 550         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 551         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 552               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
 553 
 554         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 555         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 556 
 557         xf0 = fptr[0];
 558         xf1 = fptr[1];
 559         xf2 = fptr[2];
 560         xf3 = fptr[3];
 561 
 562         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 563 
 564         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 565         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 566 
 567         yf0 = fptr[0];
 568         yf1 = fptr[1];
 569         yf2 = fptr[2];
 570         yf3 = fptr[3];
 571 
 572         SAT_U16(dPtr[0]);
 573 
 574         xSrc = (X1 >> MLIB_SHIFT) - 1;
 575         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 576 
 577         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 578         s0 = srcPixelPtr[0];
 579         s1 = srcPixelPtr[4];
 580         s2 = srcPixelPtr[8];
 581         s3 = srcPixelPtr[12];
 582 
 583         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 584         s4 = srcPixelPtr[0];
 585         s5 = srcPixelPtr[4];
 586         s6 = srcPixelPtr[8];
 587         s7 = srcPixelPtr[12];
 588       }
 589 
 590       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 591       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 592       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 593       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 594             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
 595       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 596       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 597             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
 598 
 599       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 600       SAT_U16(dPtr[0]);
 601     }
 602   }
 603 
 604   return MLIB_SUCCESS;
 605 }
 606 
 607 #else       /* for x86, using integer multiplies is faster */
 608 
 609 #define SHIFT_X  15
 610 #define ROUND_X  0 /* (1 << (SHIFT_X - 1)) */
 611 
 612 #define SHIFT_Y  14
 613 #define ROUND_Y  (1 << (SHIFT_Y - 1))
 614 
 615 #define S32_TO_U16_SAT(DST)                                     \
 616   if (val0 >= MLIB_U16_MAX)                                     \
 617     DST = MLIB_U16_MAX;                                         \
 618   else if (val0 <= MLIB_U16_MIN)                                \
 619     DST = MLIB_U16_MIN;                                         \
 620   else                                                          \
 621     DST = (mlib_u16)val0
 622 
 623 /***************************************************************/
 624 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
 625 {
 626   DECLAREVAR_BC();
 627   DTYPE *dstLineEnd;
 628   const mlib_s16 *mlib_filters_table;
 629 
 630   if (filter == MLIB_BICUBIC) {
 631     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 632   }
 633   else {
 634     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 635   }
 636 
 637   for (j = yStart; j <= yFinish; j++) {
 638     mlib_s32 xf0, xf1, xf2, xf3;
 639     mlib_s32 yf0, yf1, yf2, yf3;
 640     mlib_s32 c0, c1, c2, c3, val0;
 641     mlib_s32 filterpos;
 642     mlib_s16 *fptr;
 643     mlib_s32 s0, s1, s2, s3;
 644     mlib_s32 s4, s5, s6, s7;
 645 
 646     CLIP(1);
 647     dstLineEnd = (DTYPE *) dstData + xRight;
 648 
 649     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 650     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 651 
 652     xf0 = fptr[0] >> 1;
 653     xf1 = fptr[1] >> 1;
 654     xf2 = fptr[2] >> 1;
 655     xf3 = fptr[3] >> 1;
 656 
 657     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 658     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 659 
 660     yf0 = fptr[0];
 661     yf1 = fptr[1];
 662     yf2 = fptr[2];
 663     yf3 = fptr[3];
 664 
 665     xSrc = (X >> MLIB_SHIFT) - 1;
 666     ySrc = (Y >> MLIB_SHIFT) - 1;
 667 
 668     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 669     s0 = srcPixelPtr[0];
 670     s1 = srcPixelPtr[1];
 671     s2 = srcPixelPtr[2];
 672     s3 = srcPixelPtr[3];
 673 
 674     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 675     s4 = srcPixelPtr[0];
 676     s5 = srcPixelPtr[1];
 677     s6 = srcPixelPtr[2];
 678     s7 = srcPixelPtr[3];
 679 
 680     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
 681 
 682       X += dX;
 683       Y += dY;
 684 
 685       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 686       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 687       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 688       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 689             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 690       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 691       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 692             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 693 
 694       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 695       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 696 
 697       xf0 = fptr[0] >> 1;
 698       xf1 = fptr[1] >> 1;
 699       xf2 = fptr[2] >> 1;
 700       xf3 = fptr[3] >> 1;
 701 
 702       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 703 
 704       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 705       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 706 
 707       yf0 = fptr[0];
 708       yf1 = fptr[1];
 709       yf2 = fptr[2];
 710       yf3 = fptr[3];
 711 
 712       S32_TO_U16_SAT(dstPixelPtr[0]);
 713 
 714       xSrc = (X >> MLIB_SHIFT) - 1;
 715       ySrc = (Y >> MLIB_SHIFT) - 1;
 716 
 717       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 718       s0 = srcPixelPtr[0];
 719       s1 = srcPixelPtr[1];
 720       s2 = srcPixelPtr[2];
 721       s3 = srcPixelPtr[3];
 722 
 723       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 724       s4 = srcPixelPtr[0];
 725       s5 = srcPixelPtr[1];
 726       s6 = srcPixelPtr[2];
 727       s7 = srcPixelPtr[3];
 728     }
 729 
 730     c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 731     c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 732     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 733     c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 734           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 735     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 736     c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 737           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 738 
 739     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 740     S32_TO_U16_SAT(dstPixelPtr[0]);
 741   }
 742 
 743   return MLIB_SUCCESS;
 744 }
 745 
 746 /***************************************************************/
 747 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 748 {
 749   DECLAREVAR_BC();
 750   DTYPE *dstLineEnd;
 751   const mlib_s16 *mlib_filters_table;
 752 
 753   if (filter == MLIB_BICUBIC) {
 754     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 755   }
 756   else {
 757     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 758   }
 759 
 760   for (j = yStart; j <= yFinish; j++) {
 761     mlib_s32 xf0, xf1, xf2, xf3;
 762     mlib_s32 yf0, yf1, yf2, yf3;
 763     mlib_s32 c0, c1, c2, c3, val0;
 764     mlib_s32 filterpos, k;
 765     mlib_s16 *fptr;
 766     mlib_s32 s0, s1, s2, s3;
 767     mlib_s32 s4, s5, s6, s7;
 768 
 769     CLIP(2);
 770     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 771 
 772     for (k = 0; k < 2; k++) {
 773       mlib_s32 X1 = X;
 774       mlib_s32 Y1 = Y;
 775       DTYPE *dPtr = dstPixelPtr + k;
 776 
 777       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 778       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 779 
 780       xf0 = fptr[0] >> 1;
 781       xf1 = fptr[1] >> 1;
 782       xf2 = fptr[2] >> 1;
 783       xf3 = fptr[3] >> 1;
 784 
 785       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 786       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 787 
 788       yf0 = fptr[0];
 789       yf1 = fptr[1];
 790       yf2 = fptr[2];
 791       yf3 = fptr[3];
 792 
 793       xSrc = (X1 >> MLIB_SHIFT) - 1;
 794       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 795 
 796       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 797       s0 = srcPixelPtr[0];
 798       s1 = srcPixelPtr[2];
 799       s2 = srcPixelPtr[4];
 800       s3 = srcPixelPtr[6];
 801 
 802       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 803       s4 = srcPixelPtr[0];
 804       s5 = srcPixelPtr[2];
 805       s6 = srcPixelPtr[4];
 806       s7 = srcPixelPtr[6];
 807 
 808       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
 809 
 810         X1 += dX;
 811         Y1 += dY;
 812 
 813         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 814         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 815         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 816         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 817               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 818         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 819         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 820               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 821 
 822         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 823         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 824 
 825         xf0 = fptr[0] >> 1;
 826         xf1 = fptr[1] >> 1;
 827         xf2 = fptr[2] >> 1;
 828         xf3 = fptr[3] >> 1;
 829 
 830         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 831 
 832         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 833         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 834 
 835         yf0 = fptr[0];
 836         yf1 = fptr[1];
 837         yf2 = fptr[2];
 838         yf3 = fptr[3];
 839 
 840         S32_TO_U16_SAT(dPtr[0]);
 841 
 842         xSrc = (X1 >> MLIB_SHIFT) - 1;
 843         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 844 
 845         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 846         s0 = srcPixelPtr[0];
 847         s1 = srcPixelPtr[2];
 848         s2 = srcPixelPtr[4];
 849         s3 = srcPixelPtr[6];
 850 
 851         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 852         s4 = srcPixelPtr[0];
 853         s5 = srcPixelPtr[2];
 854         s6 = srcPixelPtr[4];
 855         s7 = srcPixelPtr[6];
 856       }
 857 
 858       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 859       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 860       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 861       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 862             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 863       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 864       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 865             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 866 
 867       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 868       S32_TO_U16_SAT(dPtr[0]);
 869     }
 870   }
 871 
 872   return MLIB_SUCCESS;
 873 }
 874 
 875 /***************************************************************/
 876 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 877 {
 878   DECLAREVAR_BC();
 879   DTYPE *dstLineEnd;
 880   const mlib_s16 *mlib_filters_table;
 881 
 882   if (filter == MLIB_BICUBIC) {
 883     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 884   }
 885   else {
 886     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 887   }
 888 
 889   for (j = yStart; j <= yFinish; j++) {
 890     mlib_s32 xf0, xf1, xf2, xf3;
 891     mlib_s32 yf0, yf1, yf2, yf3;
 892     mlib_s32 c0, c1, c2, c3, val0;
 893     mlib_s32 filterpos, k;
 894     mlib_s16 *fptr;
 895     mlib_s32 s0, s1, s2, s3;
 896     mlib_s32 s4, s5, s6, s7;
 897 
 898     CLIP(3);
 899     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 900 
 901     for (k = 0; k < 3; k++) {
 902       mlib_s32 X1 = X;
 903       mlib_s32 Y1 = Y;
 904       DTYPE *dPtr = dstPixelPtr + k;
 905 
 906       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 907       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 908 
 909       xf0 = fptr[0] >> 1;
 910       xf1 = fptr[1] >> 1;
 911       xf2 = fptr[2] >> 1;
 912       xf3 = fptr[3] >> 1;
 913 
 914       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 915       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 916 
 917       yf0 = fptr[0];
 918       yf1 = fptr[1];
 919       yf2 = fptr[2];
 920       yf3 = fptr[3];
 921 
 922       xSrc = (X1 >> MLIB_SHIFT) - 1;
 923       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 924 
 925       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 926       s0 = srcPixelPtr[0];
 927       s1 = srcPixelPtr[3];
 928       s2 = srcPixelPtr[6];
 929       s3 = srcPixelPtr[9];
 930 
 931       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 932       s4 = srcPixelPtr[0];
 933       s5 = srcPixelPtr[3];
 934       s6 = srcPixelPtr[6];
 935       s7 = srcPixelPtr[9];
 936 
 937       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
 938 
 939         X1 += dX;
 940         Y1 += dY;
 941 
 942         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 943         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 944         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 945         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 946               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 947         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 948         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 949               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 950 
 951         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 952         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 953 
 954         xf0 = fptr[0] >> 1;
 955         xf1 = fptr[1] >> 1;
 956         xf2 = fptr[2] >> 1;
 957         xf3 = fptr[3] >> 1;
 958 
 959         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 960 
 961         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 962         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 963 
 964         yf0 = fptr[0];
 965         yf1 = fptr[1];
 966         yf2 = fptr[2];
 967         yf3 = fptr[3];
 968 
 969         S32_TO_U16_SAT(dPtr[0]);
 970 
 971         xSrc = (X1 >> MLIB_SHIFT) - 1;
 972         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 973 
 974         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 975         s0 = srcPixelPtr[0];
 976         s1 = srcPixelPtr[3];
 977         s2 = srcPixelPtr[6];
 978         s3 = srcPixelPtr[9];
 979 
 980         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 981         s4 = srcPixelPtr[0];
 982         s5 = srcPixelPtr[3];
 983         s6 = srcPixelPtr[6];
 984         s7 = srcPixelPtr[9];
 985       }
 986 
 987       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 988       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 989       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 990       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 991             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 992       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 993       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 994             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 995 
 996       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 997       S32_TO_U16_SAT(dPtr[0]);
 998     }
 999   }
1000 
1001   return MLIB_SUCCESS;
1002 }
1003 
1004 /***************************************************************/
1005 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
1006 {
1007   DECLAREVAR_BC();
1008   DTYPE *dstLineEnd;
1009   const mlib_s16 *mlib_filters_table;
1010 
1011   if (filter == MLIB_BICUBIC) {
1012     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
1013   }
1014   else {
1015     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
1016   }
1017 
1018   for (j = yStart; j <= yFinish; j++) {
1019     mlib_s32 xf0, xf1, xf2, xf3;
1020     mlib_s32 yf0, yf1, yf2, yf3;
1021     mlib_s32 c0, c1, c2, c3, val0;
1022     mlib_s32 filterpos, k;
1023     mlib_s16 *fptr;
1024     mlib_s32 s0, s1, s2, s3;
1025     mlib_s32 s4, s5, s6, s7;
1026 
1027     CLIP(4);
1028     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
1029 
1030     for (k = 0; k < 4; k++) {
1031       mlib_s32 X1 = X;
1032       mlib_s32 Y1 = Y;
1033       DTYPE *dPtr = dstPixelPtr + k;
1034 
1035       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1036       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1037 
1038       xf0 = fptr[0] >> 1;
1039       xf1 = fptr[1] >> 1;
1040       xf2 = fptr[2] >> 1;
1041       xf3 = fptr[3] >> 1;
1042 
1043       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1044       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1045 
1046       yf0 = fptr[0];
1047       yf1 = fptr[1];
1048       yf2 = fptr[2];
1049       yf3 = fptr[3];
1050 
1051       xSrc = (X1 >> MLIB_SHIFT) - 1;
1052       ySrc = (Y1 >> MLIB_SHIFT) - 1;
1053 
1054       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1055       s0 = srcPixelPtr[0];
1056       s1 = srcPixelPtr[4];
1057       s2 = srcPixelPtr[8];
1058       s3 = srcPixelPtr[12];
1059 
1060       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1061       s4 = srcPixelPtr[0];
1062       s5 = srcPixelPtr[4];
1063       s6 = srcPixelPtr[8];
1064       s7 = srcPixelPtr[12];
1065 
1066       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
1067 
1068         X1 += dX;
1069         Y1 += dY;
1070 
1071         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1072         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1073         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1074         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1075               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1076         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1077         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1078               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1079 
1080         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1081         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1082 
1083         xf0 = fptr[0] >> 1;
1084         xf1 = fptr[1] >> 1;
1085         xf2 = fptr[2] >> 1;
1086         xf3 = fptr[3] >> 1;
1087 
1088         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1089 
1090         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1091         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1092 
1093         yf0 = fptr[0];
1094         yf1 = fptr[1];
1095         yf2 = fptr[2];
1096         yf3 = fptr[3];
1097 
1098         S32_TO_U16_SAT(dPtr[0]);
1099 
1100         xSrc = (X1 >> MLIB_SHIFT) - 1;
1101         ySrc = (Y1 >> MLIB_SHIFT) - 1;
1102 
1103         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1104         s0 = srcPixelPtr[0];
1105         s1 = srcPixelPtr[4];
1106         s2 = srcPixelPtr[8];
1107         s3 = srcPixelPtr[12];
1108 
1109         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1110         s4 = srcPixelPtr[0];
1111         s5 = srcPixelPtr[4];
1112         s6 = srcPixelPtr[8];
1113         s7 = srcPixelPtr[12];
1114       }
1115 
1116       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1117       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1118       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1119       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1120             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1121       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1122       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1123             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1124 
1125       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1126       S32_TO_U16_SAT(dPtr[0]);
1127     }
1128   }
1129 
1130   return MLIB_SUCCESS;
1131 }
1132 
1133 #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
1134 
1135 /***************************************************************/