1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      Image affine transformation with Bicubic filtering
  30  * SYNOPSIS
  31  *      mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
  32  *                                                       mlib_s32 *rightEdges,
  33  *                                                       mlib_s32 *xStarts,
  34  *                                                       mlib_s32 *yStarts,
  35  *                                                       mlib_s32 *sides,
  36  *                                                       mlib_u8  *dstData,
  37  *                                                       mlib_u8  **lineAddr,
  38  *                                                       mlib_s32 dstYStride,
  39  *                                                       mlib_s32 is_affine,
  40  *                                                       mlib_s32 srcYStride,
  41  *                                                       mlib_filter filter)
  42  *
  43  * ARGUMENTS
  44  *      leftEdges  array[dstHeight] of xLeft coordinates
  45  *      RightEdges array[dstHeight] of xRight coordinates
  46  *      xStarts    array[dstHeight] of xStart * 65536 coordinates
  47  *      yStarts    array[dstHeight] of yStart * 65536 coordinates
  48  *      sides      output array[4]. sides[0] is yStart, sides[1] is yFinish,
  49  *                 sides[2] is dx * 65536, sides[3] is dy * 65536
  50  *      dstData    pointer to the first pixel on (yStart - 1) line
  51  *      lineAddr   array[srcHeight] of pointers to the first pixel on
  52  *                 the corresponding lines
  53  *      dstYStride stride of destination image
  54  *      is_affine  indicator (Affine - GridWarp)
  55  *      srcYStride stride of source image
  56  *      filter     type of resampling filter
  57  *
  58  * DESCRIPTION
  59  *      The functions step along the lines from xLeft to xRight and apply
  60  *      the bicubic filtering.
  61  *
  62  */
  63 
  64 #include "mlib_ImageAffine.h"
  65 
  66 #define DTYPE           mlib_s16
  67 #define FILTER_BITS     9
  68 #define FUN_NAME(CHAN)  mlib_ImageAffine_s16_##CHAN##_bc
  69 
  70 /***************************************************************/
  71 #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
  72 
  73 #undef  FILTER_ELEM_BITS
  74 #define FILTER_ELEM_BITS  4
  75 
  76 #ifdef MLIB_USE_FTOI_CLAMPING
  77 
  78 #define SAT16(DST)                                              \
  79   DST = ((mlib_s32)val0) >> 16
  80 
  81 #else
  82 
  83 #define SAT16(DST)                                              \
  84   if (val0 >= MLIB_S32_MAX)                                     \
  85     DST = MLIB_S16_MAX;                                         \
  86   else if (val0 <= MLIB_S32_MIN)                                \
  87     DST = MLIB_S16_MIN;                                         \
  88   else                                                          \
  89     DST = ((mlib_s32)val0) >> 16
  90 
  91 #endif /* MLIB_USE_FTOI_CLAMPING */
  92 
  93 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
  94 {
  95   DECLAREVAR_BC();
  96   DTYPE *dstLineEnd;
  97   const mlib_f32 *mlib_filters_table;
  98 
  99   if (filter == MLIB_BICUBIC) {
 100     mlib_filters_table = mlib_filters_s16f_bc;
 101   }
 102   else {
 103     mlib_filters_table = mlib_filters_s16f_bc2;
 104   }
 105 
 106   for (j = yStart; j <= yFinish; j++) {
 107     mlib_d64 xf0, xf1, xf2, xf3;
 108     mlib_d64 yf0, yf1, yf2, yf3;
 109     mlib_d64 c0, c1, c2, c3, val0;
 110     mlib_s32 filterpos;
 111     mlib_f32 *fptr;
 112     mlib_s32 s0, s1, s2, s3;
 113     mlib_s32 s4, s5, s6, s7;
 114 
 115     CLIP(1);
 116     dstLineEnd = (DTYPE *) dstData + xRight;
 117 
 118     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 119     fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 120 
 121     xf0 = fptr[0];
 122     xf1 = fptr[1];
 123     xf2 = fptr[2];
 124     xf3 = fptr[3];
 125 
 126     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 127     fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 128 
 129     yf0 = fptr[0];
 130     yf1 = fptr[1];
 131     yf2 = fptr[2];
 132     yf3 = fptr[3];
 133 
 134     xSrc = (X >> MLIB_SHIFT) - 1;
 135     ySrc = (Y >> MLIB_SHIFT) - 1;
 136 
 137     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 138     s0 = srcPixelPtr[0];
 139     s1 = srcPixelPtr[1];
 140     s2 = srcPixelPtr[2];
 141     s3 = srcPixelPtr[3];
 142 
 143     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 144     s4 = srcPixelPtr[0];
 145     s5 = srcPixelPtr[1];
 146     s6 = srcPixelPtr[2];
 147     s7 = srcPixelPtr[3];
 148 
 149     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
 150 
 151       X += dX;
 152       Y += dY;
 153 
 154       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 155       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 156       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 157       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 158             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
 159       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 160       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 161             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
 162 
 163       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 164       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 165 
 166       xf0 = fptr[0];
 167       xf1 = fptr[1];
 168       xf2 = fptr[2];
 169       xf3 = fptr[3];
 170 
 171       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 172 
 173       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 174       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 175 
 176       yf0 = fptr[0];
 177       yf1 = fptr[1];
 178       yf2 = fptr[2];
 179       yf3 = fptr[3];
 180 
 181       SAT16(dstPixelPtr[0]);
 182 
 183       xSrc = (X >> MLIB_SHIFT) - 1;
 184       ySrc = (Y >> MLIB_SHIFT) - 1;
 185 
 186       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 187       s0 = srcPixelPtr[0];
 188       s1 = srcPixelPtr[1];
 189       s2 = srcPixelPtr[2];
 190       s3 = srcPixelPtr[3];
 191 
 192       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 193       s4 = srcPixelPtr[0];
 194       s5 = srcPixelPtr[1];
 195       s6 = srcPixelPtr[2];
 196       s7 = srcPixelPtr[3];
 197     }
 198 
 199     c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 200     c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 201     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 202     c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 203           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
 204     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 205     c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 206           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
 207 
 208     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 209     SAT16(dstPixelPtr[0]);
 210   }
 211 
 212   return MLIB_SUCCESS;
 213 }
 214 
 215 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 216 {
 217   DECLAREVAR_BC();
 218   DTYPE *dstLineEnd;
 219   const mlib_f32 *mlib_filters_table;
 220 
 221   if (filter == MLIB_BICUBIC) {
 222     mlib_filters_table = mlib_filters_s16f_bc;
 223   }
 224   else {
 225     mlib_filters_table = mlib_filters_s16f_bc2;
 226   }
 227 
 228   for (j = yStart; j <= yFinish; j++) {
 229     mlib_d64 xf0, xf1, xf2, xf3;
 230     mlib_d64 yf0, yf1, yf2, yf3;
 231     mlib_d64 c0, c1, c2, c3, val0;
 232     mlib_s32 filterpos, k;
 233     mlib_f32 *fptr;
 234     mlib_s32 s0, s1, s2, s3;
 235     mlib_s32 s4, s5, s6, s7;
 236 
 237     CLIP(2);
 238     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 239 
 240     for (k = 0; k < 2; k++) {
 241       mlib_s32 X1 = X;
 242       mlib_s32 Y1 = Y;
 243       DTYPE *dPtr = dstPixelPtr + k;
 244 
 245       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 246       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 247 
 248       xf0 = fptr[0];
 249       xf1 = fptr[1];
 250       xf2 = fptr[2];
 251       xf3 = fptr[3];
 252 
 253       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 254       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 255 
 256       yf0 = fptr[0];
 257       yf1 = fptr[1];
 258       yf2 = fptr[2];
 259       yf3 = fptr[3];
 260 
 261       xSrc = (X1 >> MLIB_SHIFT) - 1;
 262       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 263 
 264       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 265       s0 = srcPixelPtr[0];
 266       s1 = srcPixelPtr[2];
 267       s2 = srcPixelPtr[4];
 268       s3 = srcPixelPtr[6];
 269 
 270       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 271       s4 = srcPixelPtr[0];
 272       s5 = srcPixelPtr[2];
 273       s6 = srcPixelPtr[4];
 274       s7 = srcPixelPtr[6];
 275 
 276       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
 277 
 278         X1 += dX;
 279         Y1 += dY;
 280 
 281         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 282         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 283         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 284         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 285               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
 286         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 287         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 288               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
 289 
 290         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 291         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 292 
 293         xf0 = fptr[0];
 294         xf1 = fptr[1];
 295         xf2 = fptr[2];
 296         xf3 = fptr[3];
 297 
 298         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 299 
 300         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 301         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 302 
 303         yf0 = fptr[0];
 304         yf1 = fptr[1];
 305         yf2 = fptr[2];
 306         yf3 = fptr[3];
 307 
 308         SAT16(dPtr[0]);
 309 
 310         xSrc = (X1 >> MLIB_SHIFT) - 1;
 311         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 312 
 313         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 314         s0 = srcPixelPtr[0];
 315         s1 = srcPixelPtr[2];
 316         s2 = srcPixelPtr[4];
 317         s3 = srcPixelPtr[6];
 318 
 319         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 320         s4 = srcPixelPtr[0];
 321         s5 = srcPixelPtr[2];
 322         s6 = srcPixelPtr[4];
 323         s7 = srcPixelPtr[6];
 324       }
 325 
 326       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 327       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 328       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 329       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 330             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
 331       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 332       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 333             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
 334 
 335       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 336       SAT16(dPtr[0]);
 337     }
 338   }
 339 
 340   return MLIB_SUCCESS;
 341 }
 342 
 343 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 344 {
 345   DECLAREVAR_BC();
 346   DTYPE *dstLineEnd;
 347   const mlib_f32 *mlib_filters_table;
 348 
 349   if (filter == MLIB_BICUBIC) {
 350     mlib_filters_table = mlib_filters_s16f_bc;
 351   }
 352   else {
 353     mlib_filters_table = mlib_filters_s16f_bc2;
 354   }
 355 
 356   for (j = yStart; j <= yFinish; j++) {
 357     mlib_d64 xf0, xf1, xf2, xf3;
 358     mlib_d64 yf0, yf1, yf2, yf3;
 359     mlib_d64 c0, c1, c2, c3, val0;
 360     mlib_s32 filterpos, k;
 361     mlib_f32 *fptr;
 362     mlib_s32 s0, s1, s2, s3;
 363     mlib_s32 s4, s5, s6, s7;
 364 
 365     CLIP(3);
 366     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 367 
 368     for (k = 0; k < 3; k++) {
 369       mlib_s32 X1 = X;
 370       mlib_s32 Y1 = Y;
 371       DTYPE *dPtr = dstPixelPtr + k;
 372 
 373       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 374       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 375 
 376       xf0 = fptr[0];
 377       xf1 = fptr[1];
 378       xf2 = fptr[2];
 379       xf3 = fptr[3];
 380 
 381       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 382       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 383 
 384       yf0 = fptr[0];
 385       yf1 = fptr[1];
 386       yf2 = fptr[2];
 387       yf3 = fptr[3];
 388 
 389       xSrc = (X1 >> MLIB_SHIFT) - 1;
 390       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 391 
 392       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 393       s0 = srcPixelPtr[0];
 394       s1 = srcPixelPtr[3];
 395       s2 = srcPixelPtr[6];
 396       s3 = srcPixelPtr[9];
 397 
 398       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 399       s4 = srcPixelPtr[0];
 400       s5 = srcPixelPtr[3];
 401       s6 = srcPixelPtr[6];
 402       s7 = srcPixelPtr[9];
 403 
 404       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
 405 
 406         X1 += dX;
 407         Y1 += dY;
 408 
 409         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 410         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 411         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 412         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 413               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
 414         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 415         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 416               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
 417 
 418         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 419         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 420 
 421         xf0 = fptr[0];
 422         xf1 = fptr[1];
 423         xf2 = fptr[2];
 424         xf3 = fptr[3];
 425 
 426         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 427 
 428         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 429         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 430 
 431         yf0 = fptr[0];
 432         yf1 = fptr[1];
 433         yf2 = fptr[2];
 434         yf3 = fptr[3];
 435 
 436         SAT16(dPtr[0]);
 437 
 438         xSrc = (X1 >> MLIB_SHIFT) - 1;
 439         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 440 
 441         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 442         s0 = srcPixelPtr[0];
 443         s1 = srcPixelPtr[3];
 444         s2 = srcPixelPtr[6];
 445         s3 = srcPixelPtr[9];
 446 
 447         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 448         s4 = srcPixelPtr[0];
 449         s5 = srcPixelPtr[3];
 450         s6 = srcPixelPtr[6];
 451         s7 = srcPixelPtr[9];
 452       }
 453 
 454       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 455       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 456       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 457       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 458             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
 459       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 460       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 461             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
 462 
 463       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 464       SAT16(dPtr[0]);
 465     }
 466   }
 467 
 468   return MLIB_SUCCESS;
 469 }
 470 
 471 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 472 {
 473   DECLAREVAR_BC();
 474   DTYPE *dstLineEnd;
 475   const mlib_f32 *mlib_filters_table;
 476 
 477   if (filter == MLIB_BICUBIC) {
 478     mlib_filters_table = mlib_filters_s16f_bc;
 479   }
 480   else {
 481     mlib_filters_table = mlib_filters_s16f_bc2;
 482   }
 483 
 484   for (j = yStart; j <= yFinish; j++) {
 485     mlib_d64 xf0, xf1, xf2, xf3;
 486     mlib_d64 yf0, yf1, yf2, yf3;
 487     mlib_d64 c0, c1, c2, c3, val0;
 488     mlib_s32 filterpos, k;
 489     mlib_f32 *fptr;
 490     mlib_s32 s0, s1, s2, s3;
 491     mlib_s32 s4, s5, s6, s7;
 492 
 493     CLIP(4);
 494     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
 495 
 496     for (k = 0; k < 4; k++) {
 497       mlib_s32 X1 = X;
 498       mlib_s32 Y1 = Y;
 499       DTYPE *dPtr = dstPixelPtr + k;
 500 
 501       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 502       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 503 
 504       xf0 = fptr[0];
 505       xf1 = fptr[1];
 506       xf2 = fptr[2];
 507       xf3 = fptr[3];
 508 
 509       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 510       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 511 
 512       yf0 = fptr[0];
 513       yf1 = fptr[1];
 514       yf2 = fptr[2];
 515       yf3 = fptr[3];
 516 
 517       xSrc = (X1 >> MLIB_SHIFT) - 1;
 518       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 519 
 520       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 521       s0 = srcPixelPtr[0];
 522       s1 = srcPixelPtr[4];
 523       s2 = srcPixelPtr[8];
 524       s3 = srcPixelPtr[12];
 525 
 526       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 527       s4 = srcPixelPtr[0];
 528       s5 = srcPixelPtr[4];
 529       s6 = srcPixelPtr[8];
 530       s7 = srcPixelPtr[12];
 531 
 532       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
 533 
 534         X1 += dX;
 535         Y1 += dY;
 536 
 537         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 538         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 539         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 540         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 541               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
 542         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 543         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 544               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
 545 
 546         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 547         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 548 
 549         xf0 = fptr[0];
 550         xf1 = fptr[1];
 551         xf2 = fptr[2];
 552         xf3 = fptr[3];
 553 
 554         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 555 
 556         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 557         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 558 
 559         yf0 = fptr[0];
 560         yf1 = fptr[1];
 561         yf2 = fptr[2];
 562         yf3 = fptr[3];
 563 
 564         SAT16(dPtr[0]);
 565 
 566         xSrc = (X1 >> MLIB_SHIFT) - 1;
 567         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 568 
 569         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 570         s0 = srcPixelPtr[0];
 571         s1 = srcPixelPtr[4];
 572         s2 = srcPixelPtr[8];
 573         s3 = srcPixelPtr[12];
 574 
 575         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 576         s4 = srcPixelPtr[0];
 577         s5 = srcPixelPtr[4];
 578         s6 = srcPixelPtr[8];
 579         s7 = srcPixelPtr[12];
 580       }
 581 
 582       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
 583       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
 584       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 585       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 586             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
 587       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 588       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 589             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
 590 
 591       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 592       SAT16(dPtr[0]);
 593     }
 594   }
 595 
 596   return MLIB_SUCCESS;
 597 }
 598 
 599 #else       /* for x86, using integer multiplies is faster */
 600 
 601 #define SHIFT_X  15
 602 #define ROUND_X  0 /* (1 << (SHIFT_X - 1)) */
 603 
 604 #define SHIFT_Y  (15 + 15 - SHIFT_X)
 605 #define ROUND_Y  (1 << (SHIFT_Y - 1))
 606 
 607 #define S32_TO_S16_SAT(DST)                                     \
 608   if (val0 >= MLIB_S16_MAX)                                     \
 609     DST = MLIB_S16_MAX;                                         \
 610   else if (val0 <= MLIB_S16_MIN)                                \
 611     DST = MLIB_S16_MIN;                                         \
 612   else                                                          \
 613     DST = (mlib_s16)val0
 614 
 615 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
 616 {
 617   DECLAREVAR_BC();
 618   DTYPE *dstLineEnd;
 619   const mlib_s16 *mlib_filters_table;
 620 
 621   if (filter == MLIB_BICUBIC) {
 622     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 623   }
 624   else {
 625     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 626   }
 627 
 628   for (j = yStart; j <= yFinish; j++) {
 629     mlib_s32 xf0, xf1, xf2, xf3;
 630     mlib_s32 yf0, yf1, yf2, yf3;
 631     mlib_s32 c0, c1, c2, c3, val0;
 632     mlib_s32 filterpos;
 633     mlib_s16 *fptr;
 634     mlib_s32 s0, s1, s2, s3;
 635     mlib_s32 s4, s5, s6, s7;
 636 
 637     CLIP(1);
 638     dstLineEnd = (DTYPE *) dstData + xRight;
 639 
 640     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 641     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 642 
 643     xf0 = fptr[0];
 644     xf1 = fptr[1];
 645     xf2 = fptr[2];
 646     xf3 = fptr[3];
 647 
 648     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 649     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 650 
 651     yf0 = fptr[0];
 652     yf1 = fptr[1];
 653     yf2 = fptr[2];
 654     yf3 = fptr[3];
 655 
 656     xSrc = (X >> MLIB_SHIFT) - 1;
 657     ySrc = (Y >> MLIB_SHIFT) - 1;
 658 
 659     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 660     s0 = srcPixelPtr[0];
 661     s1 = srcPixelPtr[1];
 662     s2 = srcPixelPtr[2];
 663     s3 = srcPixelPtr[3];
 664 
 665     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 666     s4 = srcPixelPtr[0];
 667     s5 = srcPixelPtr[1];
 668     s6 = srcPixelPtr[2];
 669     s7 = srcPixelPtr[3];
 670 
 671     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
 672 
 673       X += dX;
 674       Y += dY;
 675 
 676       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 677       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 678       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 679       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 680             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 681       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 682       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 683             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 684 
 685       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 686       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 687 
 688       xf0 = fptr[0];
 689       xf1 = fptr[1];
 690       xf2 = fptr[2];
 691       xf3 = fptr[3];
 692 
 693       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 694 
 695       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 696       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 697 
 698       yf0 = fptr[0];
 699       yf1 = fptr[1];
 700       yf2 = fptr[2];
 701       yf3 = fptr[3];
 702 
 703       S32_TO_S16_SAT(dstPixelPtr[0]);
 704 
 705       xSrc = (X >> MLIB_SHIFT) - 1;
 706       ySrc = (Y >> MLIB_SHIFT) - 1;
 707 
 708       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 709       s0 = srcPixelPtr[0];
 710       s1 = srcPixelPtr[1];
 711       s2 = srcPixelPtr[2];
 712       s3 = srcPixelPtr[3];
 713 
 714       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 715       s4 = srcPixelPtr[0];
 716       s5 = srcPixelPtr[1];
 717       s6 = srcPixelPtr[2];
 718       s7 = srcPixelPtr[3];
 719     }
 720 
 721     c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 722     c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 723     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 724     c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 725           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 726     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 727     c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 728           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 729 
 730     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 731     S32_TO_S16_SAT(dstPixelPtr[0]);
 732   }
 733 
 734   return MLIB_SUCCESS;
 735 }
 736 
 737 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 738 {
 739   DECLAREVAR_BC();
 740   DTYPE *dstLineEnd;
 741   const mlib_s16 *mlib_filters_table;
 742 
 743   if (filter == MLIB_BICUBIC) {
 744     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 745   }
 746   else {
 747     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 748   }
 749 
 750   for (j = yStart; j <= yFinish; j++) {
 751     mlib_s32 xf0, xf1, xf2, xf3;
 752     mlib_s32 yf0, yf1, yf2, yf3;
 753     mlib_s32 c0, c1, c2, c3, val0;
 754     mlib_s32 filterpos, k;
 755     mlib_s16 *fptr;
 756     mlib_s32 s0, s1, s2, s3;
 757     mlib_s32 s4, s5, s6, s7;
 758 
 759     CLIP(2);
 760     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 761 
 762     for (k = 0; k < 2; k++) {
 763       mlib_s32 X1 = X;
 764       mlib_s32 Y1 = Y;
 765       DTYPE *dPtr = dstPixelPtr + k;
 766 
 767       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 768       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 769 
 770       xf0 = fptr[0];
 771       xf1 = fptr[1];
 772       xf2 = fptr[2];
 773       xf3 = fptr[3];
 774 
 775       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 776       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 777 
 778       yf0 = fptr[0];
 779       yf1 = fptr[1];
 780       yf2 = fptr[2];
 781       yf3 = fptr[3];
 782 
 783       xSrc = (X1 >> MLIB_SHIFT) - 1;
 784       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 785 
 786       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 787       s0 = srcPixelPtr[0];
 788       s1 = srcPixelPtr[2];
 789       s2 = srcPixelPtr[4];
 790       s3 = srcPixelPtr[6];
 791 
 792       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 793       s4 = srcPixelPtr[0];
 794       s5 = srcPixelPtr[2];
 795       s6 = srcPixelPtr[4];
 796       s7 = srcPixelPtr[6];
 797 
 798       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
 799 
 800         X1 += dX;
 801         Y1 += dY;
 802 
 803         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 804         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 805         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 806         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 807               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 808         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 809         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 810               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 811 
 812         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 813         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 814 
 815         xf0 = fptr[0];
 816         xf1 = fptr[1];
 817         xf2 = fptr[2];
 818         xf3 = fptr[3];
 819 
 820         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 821 
 822         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 823         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 824 
 825         yf0 = fptr[0];
 826         yf1 = fptr[1];
 827         yf2 = fptr[2];
 828         yf3 = fptr[3];
 829 
 830         S32_TO_S16_SAT(dPtr[0]);
 831 
 832         xSrc = (X1 >> MLIB_SHIFT) - 1;
 833         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 834 
 835         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 836         s0 = srcPixelPtr[0];
 837         s1 = srcPixelPtr[2];
 838         s2 = srcPixelPtr[4];
 839         s3 = srcPixelPtr[6];
 840 
 841         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 842         s4 = srcPixelPtr[0];
 843         s5 = srcPixelPtr[2];
 844         s6 = srcPixelPtr[4];
 845         s7 = srcPixelPtr[6];
 846       }
 847 
 848       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 849       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 850       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 851       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 852             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 853       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 854       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 855             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 856 
 857       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 858       S32_TO_S16_SAT(dPtr[0]);
 859     }
 860   }
 861 
 862   return MLIB_SUCCESS;
 863 }
 864 
 865 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 866 {
 867   DECLAREVAR_BC();
 868   DTYPE *dstLineEnd;
 869   const mlib_s16 *mlib_filters_table;
 870 
 871   if (filter == MLIB_BICUBIC) {
 872     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 873   }
 874   else {
 875     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 876   }
 877 
 878   for (j = yStart; j <= yFinish; j++) {
 879     mlib_s32 xf0, xf1, xf2, xf3;
 880     mlib_s32 yf0, yf1, yf2, yf3;
 881     mlib_s32 c0, c1, c2, c3, val0;
 882     mlib_s32 filterpos, k;
 883     mlib_s16 *fptr;
 884     mlib_s32 s0, s1, s2, s3;
 885     mlib_s32 s4, s5, s6, s7;
 886 
 887     CLIP(3);
 888     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 889 
 890     for (k = 0; k < 3; k++) {
 891       mlib_s32 X1 = X;
 892       mlib_s32 Y1 = Y;
 893       DTYPE *dPtr = dstPixelPtr + k;
 894 
 895       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 896       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 897 
 898       xf0 = fptr[0];
 899       xf1 = fptr[1];
 900       xf2 = fptr[2];
 901       xf3 = fptr[3];
 902 
 903       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 904       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 905 
 906       yf0 = fptr[0];
 907       yf1 = fptr[1];
 908       yf2 = fptr[2];
 909       yf3 = fptr[3];
 910 
 911       xSrc = (X1 >> MLIB_SHIFT) - 1;
 912       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 913 
 914       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 915       s0 = srcPixelPtr[0];
 916       s1 = srcPixelPtr[3];
 917       s2 = srcPixelPtr[6];
 918       s3 = srcPixelPtr[9];
 919 
 920       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 921       s4 = srcPixelPtr[0];
 922       s5 = srcPixelPtr[3];
 923       s6 = srcPixelPtr[6];
 924       s7 = srcPixelPtr[9];
 925 
 926       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
 927 
 928         X1 += dX;
 929         Y1 += dY;
 930 
 931         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 932         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 933         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 934         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 935               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 936         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 937         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 938               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 939 
 940         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 941         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 942 
 943         xf0 = fptr[0];
 944         xf1 = fptr[1];
 945         xf2 = fptr[2];
 946         xf3 = fptr[3];
 947 
 948         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 949 
 950         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 951         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 952 
 953         yf0 = fptr[0];
 954         yf1 = fptr[1];
 955         yf2 = fptr[2];
 956         yf3 = fptr[3];
 957 
 958         S32_TO_S16_SAT(dPtr[0]);
 959 
 960         xSrc = (X1 >> MLIB_SHIFT) - 1;
 961         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 962 
 963         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 964         s0 = srcPixelPtr[0];
 965         s1 = srcPixelPtr[3];
 966         s2 = srcPixelPtr[6];
 967         s3 = srcPixelPtr[9];
 968 
 969         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 970         s4 = srcPixelPtr[0];
 971         s5 = srcPixelPtr[3];
 972         s6 = srcPixelPtr[6];
 973         s7 = srcPixelPtr[9];
 974       }
 975 
 976       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 977       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 978       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 979       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 980             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 981       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 982       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 983             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 984 
 985       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 986       S32_TO_S16_SAT(dPtr[0]);
 987     }
 988   }
 989 
 990   return MLIB_SUCCESS;
 991 }
 992 
 993 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 994 {
 995   DECLAREVAR_BC();
 996   DTYPE *dstLineEnd;
 997   const mlib_s16 *mlib_filters_table;
 998 
 999   if (filter == MLIB_BICUBIC) {
1000     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
1001   }
1002   else {
1003     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
1004   }
1005 
1006   for (j = yStart; j <= yFinish; j++) {
1007     mlib_s32 xf0, xf1, xf2, xf3;
1008     mlib_s32 yf0, yf1, yf2, yf3;
1009     mlib_s32 c0, c1, c2, c3, val0;
1010     mlib_s32 filterpos, k;
1011     mlib_s16 *fptr;
1012     mlib_s32 s0, s1, s2, s3;
1013     mlib_s32 s4, s5, s6, s7;
1014 
1015     CLIP(4);
1016     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
1017 
1018     for (k = 0; k < 4; k++) {
1019       mlib_s32 X1 = X;
1020       mlib_s32 Y1 = Y;
1021       DTYPE *dPtr = dstPixelPtr + k;
1022 
1023       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1024       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1025 
1026       xf0 = fptr[0];
1027       xf1 = fptr[1];
1028       xf2 = fptr[2];
1029       xf3 = fptr[3];
1030 
1031       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1032       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1033 
1034       yf0 = fptr[0];
1035       yf1 = fptr[1];
1036       yf2 = fptr[2];
1037       yf3 = fptr[3];
1038 
1039       xSrc = (X1 >> MLIB_SHIFT) - 1;
1040       ySrc = (Y1 >> MLIB_SHIFT) - 1;
1041 
1042       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1043       s0 = srcPixelPtr[0];
1044       s1 = srcPixelPtr[4];
1045       s2 = srcPixelPtr[8];
1046       s3 = srcPixelPtr[12];
1047 
1048       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1049       s4 = srcPixelPtr[0];
1050       s5 = srcPixelPtr[4];
1051       s6 = srcPixelPtr[8];
1052       s7 = srcPixelPtr[12];
1053 
1054       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
1055 
1056         X1 += dX;
1057         Y1 += dY;
1058 
1059         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1060         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1061         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1062         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1063               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1064         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1065         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1066               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1067 
1068         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1069         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1070 
1071         xf0 = fptr[0];
1072         xf1 = fptr[1];
1073         xf2 = fptr[2];
1074         xf3 = fptr[3];
1075 
1076         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1077 
1078         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1079         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1080 
1081         yf0 = fptr[0];
1082         yf1 = fptr[1];
1083         yf2 = fptr[2];
1084         yf3 = fptr[3];
1085 
1086         S32_TO_S16_SAT(dPtr[0]);
1087 
1088         xSrc = (X1 >> MLIB_SHIFT) - 1;
1089         ySrc = (Y1 >> MLIB_SHIFT) - 1;
1090 
1091         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1092         s0 = srcPixelPtr[0];
1093         s1 = srcPixelPtr[4];
1094         s2 = srcPixelPtr[8];
1095         s3 = srcPixelPtr[12];
1096 
1097         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1098         s4 = srcPixelPtr[0];
1099         s5 = srcPixelPtr[4];
1100         s6 = srcPixelPtr[8];
1101         s7 = srcPixelPtr[12];
1102       }
1103 
1104       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1105       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1106       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1107       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1108             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1109       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1110       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1111             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1112 
1113       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1114       S32_TO_S16_SAT(dPtr[0]);
1115     }
1116   }
1117 
1118   return MLIB_SUCCESS;
1119 }
1120 
1121 #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
1122 
1123 /***************************************************************/