1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      Image affine transformation with Bicubic filtering
  30  * SYNOPSIS
  31  *      mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
  32  *                                                       mlib_s32 *rightEdges,
  33  *                                                       mlib_s32 *xStarts,
  34  *                                                       mlib_s32 *yStarts,
  35  *                                                       mlib_s32 *sides,
  36  *                                                       mlib_u8  *dstData,
  37  *                                                       mlib_u8  **lineAddr,
  38  *                                                       mlib_s32 dstYStride,
  39  *                                                       mlib_s32 is_affine,
  40  *                                                       mlib_s32 srcYStride,
  41  *                                                       mlib_filter filter)
  42  *
  43  * ARGUMENTS
  44  *      leftEdges  array[dstHeight] of xLeft coordinates
  45  *      RightEdges array[dstHeight] of xRight coordinates
  46  *      xStarts    array[dstHeight] of xStart * 65536 coordinates
  47  *      yStarts    array[dstHeight] of yStart * 65536 coordinates
  48  *      sides      output array[4]. sides[0] is yStart, sides[1] is yFinish,
  49  *                 sides[2] is dx * 65536, sides[3] is dy * 65536
  50  *      dstData    pointer to the first pixel on (yStart - 1) line
  51  *      lineAddr   array[srcHeight] of pointers to the first pixel on
  52  *                 the corresponding lines
  53  *      dstYStride stride of destination image
  54  *      is_affine  indicator (Affine - GridWarp)
  55  *      srcYStride stride of source image
  56  *      filter     type of resampling filter
  57  *
  58  * DESCRIPTION
  59  *      The functions step along the lines from xLeft to xRight and apply
  60  *      the bicubic filtering.
  61  *
  62  */
  63 
  64 #include "mlib_ImageAffine.h"
  65 
  66 #define DTYPE  mlib_u16
  67 
  68 #define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bc
  69 
  70 #define FILTER_BITS   9
  71 
  72 /***************************************************************/
  73 /* for x86, using integer multiplies is faster */
  74 
  75 #define SHIFT_X  15
  76 #define ROUND_X  0 /* (1 << (SHIFT_X - 1)) */
  77 
  78 #define SHIFT_Y  14
  79 #define ROUND_Y  (1 << (SHIFT_Y - 1))
  80 
  81 #define S32_TO_U16_SAT(DST)                                     \
  82   if (val0 >= MLIB_U16_MAX)                                     \
  83     DST = MLIB_U16_MAX;                                         \
  84   else if (val0 <= MLIB_U16_MIN)                                \
  85     DST = MLIB_U16_MIN;                                         \
  86   else                                                          \
  87     DST = (mlib_u16)val0
  88 
  89 /***************************************************************/
  90 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
  91 {
  92   DECLAREVAR_BC();
  93   DTYPE *dstLineEnd;
  94   const mlib_s16 *mlib_filters_table;
  95 
  96   if (filter == MLIB_BICUBIC) {
  97     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
  98   }
  99   else {
 100     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 101   }
 102 
 103   for (j = yStart; j <= yFinish; j++) {
 104     mlib_s32 xf0, xf1, xf2, xf3;
 105     mlib_s32 yf0, yf1, yf2, yf3;
 106     mlib_s32 c0, c1, c2, c3, val0;
 107     mlib_s32 filterpos;
 108     mlib_s16 *fptr;
 109     mlib_s32 s0, s1, s2, s3;
 110     mlib_s32 s4, s5, s6, s7;
 111 
 112     CLIP(1);
 113     dstLineEnd = (DTYPE *) dstData + xRight;
 114 
 115     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 116     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 117 
 118     xf0 = fptr[0] >> 1;
 119     xf1 = fptr[1] >> 1;
 120     xf2 = fptr[2] >> 1;
 121     xf3 = fptr[3] >> 1;
 122 
 123     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 124     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 125 
 126     yf0 = fptr[0];
 127     yf1 = fptr[1];
 128     yf2 = fptr[2];
 129     yf3 = fptr[3];
 130 
 131     xSrc = (X >> MLIB_SHIFT) - 1;
 132     ySrc = (Y >> MLIB_SHIFT) - 1;
 133 
 134     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 135     s0 = srcPixelPtr[0];
 136     s1 = srcPixelPtr[1];
 137     s2 = srcPixelPtr[2];
 138     s3 = srcPixelPtr[3];
 139 
 140     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 141     s4 = srcPixelPtr[0];
 142     s5 = srcPixelPtr[1];
 143     s6 = srcPixelPtr[2];
 144     s7 = srcPixelPtr[3];
 145 
 146     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
 147 
 148       X += dX;
 149       Y += dY;
 150 
 151       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 152       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 153       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 154       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 155             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 156       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 157       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 158             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 159 
 160       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 161       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 162 
 163       xf0 = fptr[0] >> 1;
 164       xf1 = fptr[1] >> 1;
 165       xf2 = fptr[2] >> 1;
 166       xf3 = fptr[3] >> 1;
 167 
 168       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 169 
 170       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 171       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 172 
 173       yf0 = fptr[0];
 174       yf1 = fptr[1];
 175       yf2 = fptr[2];
 176       yf3 = fptr[3];
 177 
 178       S32_TO_U16_SAT(dstPixelPtr[0]);
 179 
 180       xSrc = (X >> MLIB_SHIFT) - 1;
 181       ySrc = (Y >> MLIB_SHIFT) - 1;
 182 
 183       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 184       s0 = srcPixelPtr[0];
 185       s1 = srcPixelPtr[1];
 186       s2 = srcPixelPtr[2];
 187       s3 = srcPixelPtr[3];
 188 
 189       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 190       s4 = srcPixelPtr[0];
 191       s5 = srcPixelPtr[1];
 192       s6 = srcPixelPtr[2];
 193       s7 = srcPixelPtr[3];
 194     }
 195 
 196     c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 197     c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 198     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 199     c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 200           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 201     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 202     c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 203           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 204 
 205     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 206     S32_TO_U16_SAT(dstPixelPtr[0]);
 207   }
 208 
 209   return MLIB_SUCCESS;
 210 }
 211 
 212 /***************************************************************/
 213 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 214 {
 215   DECLAREVAR_BC();
 216   DTYPE *dstLineEnd;
 217   const mlib_s16 *mlib_filters_table;
 218 
 219   if (filter == MLIB_BICUBIC) {
 220     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 221   }
 222   else {
 223     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 224   }
 225 
 226   for (j = yStart; j <= yFinish; j++) {
 227     mlib_s32 xf0, xf1, xf2, xf3;
 228     mlib_s32 yf0, yf1, yf2, yf3;
 229     mlib_s32 c0, c1, c2, c3, val0;
 230     mlib_s32 filterpos, k;
 231     mlib_s16 *fptr;
 232     mlib_s32 s0, s1, s2, s3;
 233     mlib_s32 s4, s5, s6, s7;
 234 
 235     CLIP(2);
 236     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 237 
 238     for (k = 0; k < 2; k++) {
 239       mlib_s32 X1 = X;
 240       mlib_s32 Y1 = Y;
 241       DTYPE *dPtr = dstPixelPtr + k;
 242 
 243       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 244       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 245 
 246       xf0 = fptr[0] >> 1;
 247       xf1 = fptr[1] >> 1;
 248       xf2 = fptr[2] >> 1;
 249       xf3 = fptr[3] >> 1;
 250 
 251       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 252       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 253 
 254       yf0 = fptr[0];
 255       yf1 = fptr[1];
 256       yf2 = fptr[2];
 257       yf3 = fptr[3];
 258 
 259       xSrc = (X1 >> MLIB_SHIFT) - 1;
 260       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 261 
 262       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 263       s0 = srcPixelPtr[0];
 264       s1 = srcPixelPtr[2];
 265       s2 = srcPixelPtr[4];
 266       s3 = srcPixelPtr[6];
 267 
 268       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 269       s4 = srcPixelPtr[0];
 270       s5 = srcPixelPtr[2];
 271       s6 = srcPixelPtr[4];
 272       s7 = srcPixelPtr[6];
 273 
 274       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
 275 
 276         X1 += dX;
 277         Y1 += dY;
 278 
 279         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 280         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 281         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 282         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 283               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 284         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 285         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 286               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 287 
 288         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 289         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 290 
 291         xf0 = fptr[0] >> 1;
 292         xf1 = fptr[1] >> 1;
 293         xf2 = fptr[2] >> 1;
 294         xf3 = fptr[3] >> 1;
 295 
 296         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 297 
 298         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 299         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 300 
 301         yf0 = fptr[0];
 302         yf1 = fptr[1];
 303         yf2 = fptr[2];
 304         yf3 = fptr[3];
 305 
 306         S32_TO_U16_SAT(dPtr[0]);
 307 
 308         xSrc = (X1 >> MLIB_SHIFT) - 1;
 309         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 310 
 311         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 312         s0 = srcPixelPtr[0];
 313         s1 = srcPixelPtr[2];
 314         s2 = srcPixelPtr[4];
 315         s3 = srcPixelPtr[6];
 316 
 317         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 318         s4 = srcPixelPtr[0];
 319         s5 = srcPixelPtr[2];
 320         s6 = srcPixelPtr[4];
 321         s7 = srcPixelPtr[6];
 322       }
 323 
 324       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 325       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 326       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 327       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 328             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 329       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 330       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 331             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 332 
 333       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 334       S32_TO_U16_SAT(dPtr[0]);
 335     }
 336   }
 337 
 338   return MLIB_SUCCESS;
 339 }
 340 
 341 /***************************************************************/
 342 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 343 {
 344   DECLAREVAR_BC();
 345   DTYPE *dstLineEnd;
 346   const mlib_s16 *mlib_filters_table;
 347 
 348   if (filter == MLIB_BICUBIC) {
 349     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 350   }
 351   else {
 352     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 353   }
 354 
 355   for (j = yStart; j <= yFinish; j++) {
 356     mlib_s32 xf0, xf1, xf2, xf3;
 357     mlib_s32 yf0, yf1, yf2, yf3;
 358     mlib_s32 c0, c1, c2, c3, val0;
 359     mlib_s32 filterpos, k;
 360     mlib_s16 *fptr;
 361     mlib_s32 s0, s1, s2, s3;
 362     mlib_s32 s4, s5, s6, s7;
 363 
 364     CLIP(3);
 365     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 366 
 367     for (k = 0; k < 3; k++) {
 368       mlib_s32 X1 = X;
 369       mlib_s32 Y1 = Y;
 370       DTYPE *dPtr = dstPixelPtr + k;
 371 
 372       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 373       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 374 
 375       xf0 = fptr[0] >> 1;
 376       xf1 = fptr[1] >> 1;
 377       xf2 = fptr[2] >> 1;
 378       xf3 = fptr[3] >> 1;
 379 
 380       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 381       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 382 
 383       yf0 = fptr[0];
 384       yf1 = fptr[1];
 385       yf2 = fptr[2];
 386       yf3 = fptr[3];
 387 
 388       xSrc = (X1 >> MLIB_SHIFT) - 1;
 389       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 390 
 391       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 392       s0 = srcPixelPtr[0];
 393       s1 = srcPixelPtr[3];
 394       s2 = srcPixelPtr[6];
 395       s3 = srcPixelPtr[9];
 396 
 397       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 398       s4 = srcPixelPtr[0];
 399       s5 = srcPixelPtr[3];
 400       s6 = srcPixelPtr[6];
 401       s7 = srcPixelPtr[9];
 402 
 403       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
 404 
 405         X1 += dX;
 406         Y1 += dY;
 407 
 408         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 409         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 410         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 411         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 412               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 413         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 414         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 415               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 416 
 417         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 418         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 419 
 420         xf0 = fptr[0] >> 1;
 421         xf1 = fptr[1] >> 1;
 422         xf2 = fptr[2] >> 1;
 423         xf3 = fptr[3] >> 1;
 424 
 425         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 426 
 427         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 428         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 429 
 430         yf0 = fptr[0];
 431         yf1 = fptr[1];
 432         yf2 = fptr[2];
 433         yf3 = fptr[3];
 434 
 435         S32_TO_U16_SAT(dPtr[0]);
 436 
 437         xSrc = (X1 >> MLIB_SHIFT) - 1;
 438         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 439 
 440         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 441         s0 = srcPixelPtr[0];
 442         s1 = srcPixelPtr[3];
 443         s2 = srcPixelPtr[6];
 444         s3 = srcPixelPtr[9];
 445 
 446         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 447         s4 = srcPixelPtr[0];
 448         s5 = srcPixelPtr[3];
 449         s6 = srcPixelPtr[6];
 450         s7 = srcPixelPtr[9];
 451       }
 452 
 453       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 454       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 455       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 456       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 457             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 458       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 459       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 460             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 461 
 462       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 463       S32_TO_U16_SAT(dPtr[0]);
 464     }
 465   }
 466 
 467   return MLIB_SUCCESS;
 468 }
 469 
 470 /***************************************************************/
 471 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 472 {
 473   DECLAREVAR_BC();
 474   DTYPE *dstLineEnd;
 475   const mlib_s16 *mlib_filters_table;
 476 
 477   if (filter == MLIB_BICUBIC) {
 478     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 479   }
 480   else {
 481     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 482   }
 483 
 484   for (j = yStart; j <= yFinish; j++) {
 485     mlib_s32 xf0, xf1, xf2, xf3;
 486     mlib_s32 yf0, yf1, yf2, yf3;
 487     mlib_s32 c0, c1, c2, c3, val0;
 488     mlib_s32 filterpos, k;
 489     mlib_s16 *fptr;
 490     mlib_s32 s0, s1, s2, s3;
 491     mlib_s32 s4, s5, s6, s7;
 492 
 493     CLIP(4);
 494     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
 495 
 496     for (k = 0; k < 4; k++) {
 497       mlib_s32 X1 = X;
 498       mlib_s32 Y1 = Y;
 499       DTYPE *dPtr = dstPixelPtr + k;
 500 
 501       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 502       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 503 
 504       xf0 = fptr[0] >> 1;
 505       xf1 = fptr[1] >> 1;
 506       xf2 = fptr[2] >> 1;
 507       xf3 = fptr[3] >> 1;
 508 
 509       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 510       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 511 
 512       yf0 = fptr[0];
 513       yf1 = fptr[1];
 514       yf2 = fptr[2];
 515       yf3 = fptr[3];
 516 
 517       xSrc = (X1 >> MLIB_SHIFT) - 1;
 518       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 519 
 520       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 521       s0 = srcPixelPtr[0];
 522       s1 = srcPixelPtr[4];
 523       s2 = srcPixelPtr[8];
 524       s3 = srcPixelPtr[12];
 525 
 526       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 527       s4 = srcPixelPtr[0];
 528       s5 = srcPixelPtr[4];
 529       s6 = srcPixelPtr[8];
 530       s7 = srcPixelPtr[12];
 531 
 532       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
 533 
 534         X1 += dX;
 535         Y1 += dY;
 536 
 537         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 538         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 539         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 540         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 541               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 542         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 543         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 544               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 545 
 546         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 547         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 548 
 549         xf0 = fptr[0] >> 1;
 550         xf1 = fptr[1] >> 1;
 551         xf2 = fptr[2] >> 1;
 552         xf3 = fptr[3] >> 1;
 553 
 554         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 555 
 556         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 557         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 558 
 559         yf0 = fptr[0];
 560         yf1 = fptr[1];
 561         yf2 = fptr[2];
 562         yf3 = fptr[3];
 563 
 564         S32_TO_U16_SAT(dPtr[0]);
 565 
 566         xSrc = (X1 >> MLIB_SHIFT) - 1;
 567         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 568 
 569         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 570         s0 = srcPixelPtr[0];
 571         s1 = srcPixelPtr[4];
 572         s2 = srcPixelPtr[8];
 573         s3 = srcPixelPtr[12];
 574 
 575         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 576         s4 = srcPixelPtr[0];
 577         s5 = srcPixelPtr[4];
 578         s6 = srcPixelPtr[8];
 579         s7 = srcPixelPtr[12];
 580       }
 581 
 582       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 583       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 584       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 585       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 586             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 587       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 588       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 589             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 590 
 591       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 592       S32_TO_U16_SAT(dPtr[0]);
 593     }
 594   }
 595 
 596   return MLIB_SUCCESS;
 597 }
 598 
 599 /***************************************************************/