1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      Image affine transformation with Bicubic filtering
  30  * SYNOPSIS
  31  *      mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
  32  *                                                       mlib_s32 *rightEdges,
  33  *                                                       mlib_s32 *xStarts,
  34  *                                                       mlib_s32 *yStarts,
  35  *                                                       mlib_s32 *sides,
  36  *                                                       mlib_u8  *dstData,
  37  *                                                       mlib_u8  **lineAddr,
  38  *                                                       mlib_s32 dstYStride,
  39  *                                                       mlib_s32 is_affine,
  40  *                                                       mlib_s32 srcYStride,
  41  *                                                       mlib_filter filter)
  42  *
  43  * ARGUMENTS
  44  *      leftEdges  array[dstHeight] of xLeft coordinates
  45  *      RightEdges array[dstHeight] of xRight coordinates
  46  *      xStarts    array[dstHeight] of xStart * 65536 coordinates
  47  *      yStarts    array[dstHeight] of yStart * 65536 coordinates
  48  *      sides      output array[4]. sides[0] is yStart, sides[1] is yFinish,
  49  *                 sides[2] is dx * 65536, sides[3] is dy * 65536
  50  *      dstData    pointer to the first pixel on (yStart - 1) line
  51  *      lineAddr   array[srcHeight] of pointers to the first pixel on
  52  *                 the corresponding lines
  53  *      dstYStride stride of destination image
  54  *      is_affine  indicator (Affine - GridWarp)
  55  *      srcYStride stride of source image
  56  *      filter     type of resampling filter
  57  *
  58  * DESCRIPTION
  59  *      The functions step along the lines from xLeft to xRight and apply
  60  *      the bicubic filtering.
  61  *
  62  */
  63 
  64 #include "mlib_ImageAffine.h"
  65 
  66 #define DTYPE           mlib_s16
  67 #define FILTER_BITS     9
  68 #define FUN_NAME(CHAN)  mlib_ImageAffine_s16_##CHAN##_bc
  69 
  70 /***************************************************************/
  71 /* for x86, using integer multiplies is faster */
  72 
  73 #define SHIFT_X  15
  74 #define ROUND_X  0 /* (1 << (SHIFT_X - 1)) */
  75 
  76 #define SHIFT_Y  (15 + 15 - SHIFT_X)
  77 #define ROUND_Y  (1 << (SHIFT_Y - 1))
  78 
  79 #define S32_TO_S16_SAT(DST)                                     \
  80   if (val0 >= MLIB_S16_MAX)                                     \
  81     DST = MLIB_S16_MAX;                                         \
  82   else if (val0 <= MLIB_S16_MIN)                                \
  83     DST = MLIB_S16_MIN;                                         \
  84   else                                                          \
  85     DST = (mlib_s16)val0
  86 
  87 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
  88 {
  89   DECLAREVAR_BC();
  90   DTYPE *dstLineEnd;
  91   const mlib_s16 *mlib_filters_table;
  92 
  93   if (filter == MLIB_BICUBIC) {
  94     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
  95   }
  96   else {
  97     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
  98   }
  99 
 100   for (j = yStart; j <= yFinish; j++) {
 101     mlib_s32 xf0, xf1, xf2, xf3;
 102     mlib_s32 yf0, yf1, yf2, yf3;
 103     mlib_s32 c0, c1, c2, c3, val0;
 104     mlib_s32 filterpos;
 105     mlib_s16 *fptr;
 106     mlib_s32 s0, s1, s2, s3;
 107     mlib_s32 s4, s5, s6, s7;
 108 
 109     CLIP(1);
 110     dstLineEnd = (DTYPE *) dstData + xRight;
 111 
 112     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 113     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 114 
 115     xf0 = fptr[0];
 116     xf1 = fptr[1];
 117     xf2 = fptr[2];
 118     xf3 = fptr[3];
 119 
 120     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 121     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 122 
 123     yf0 = fptr[0];
 124     yf1 = fptr[1];
 125     yf2 = fptr[2];
 126     yf3 = fptr[3];
 127 
 128     xSrc = (X >> MLIB_SHIFT) - 1;
 129     ySrc = (Y >> MLIB_SHIFT) - 1;
 130 
 131     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 132     s0 = srcPixelPtr[0];
 133     s1 = srcPixelPtr[1];
 134     s2 = srcPixelPtr[2];
 135     s3 = srcPixelPtr[3];
 136 
 137     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 138     s4 = srcPixelPtr[0];
 139     s5 = srcPixelPtr[1];
 140     s6 = srcPixelPtr[2];
 141     s7 = srcPixelPtr[3];
 142 
 143     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
 144 
 145       X += dX;
 146       Y += dY;
 147 
 148       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 149       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 150       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 151       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 152             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 153       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 154       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 155             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 156 
 157       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 158       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 159 
 160       xf0 = fptr[0];
 161       xf1 = fptr[1];
 162       xf2 = fptr[2];
 163       xf3 = fptr[3];
 164 
 165       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 166 
 167       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 168       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 169 
 170       yf0 = fptr[0];
 171       yf1 = fptr[1];
 172       yf2 = fptr[2];
 173       yf3 = fptr[3];
 174 
 175       S32_TO_S16_SAT(dstPixelPtr[0]);
 176 
 177       xSrc = (X >> MLIB_SHIFT) - 1;
 178       ySrc = (Y >> MLIB_SHIFT) - 1;
 179 
 180       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 181       s0 = srcPixelPtr[0];
 182       s1 = srcPixelPtr[1];
 183       s2 = srcPixelPtr[2];
 184       s3 = srcPixelPtr[3];
 185 
 186       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 187       s4 = srcPixelPtr[0];
 188       s5 = srcPixelPtr[1];
 189       s6 = srcPixelPtr[2];
 190       s7 = srcPixelPtr[3];
 191     }
 192 
 193     c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 194     c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 195     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 196     c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 197           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 198     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 199     c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
 200           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
 201 
 202     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 203     S32_TO_S16_SAT(dstPixelPtr[0]);
 204   }
 205 
 206   return MLIB_SUCCESS;
 207 }
 208 
 209 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 210 {
 211   DECLAREVAR_BC();
 212   DTYPE *dstLineEnd;
 213   const mlib_s16 *mlib_filters_table;
 214 
 215   if (filter == MLIB_BICUBIC) {
 216     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 217   }
 218   else {
 219     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 220   }
 221 
 222   for (j = yStart; j <= yFinish; j++) {
 223     mlib_s32 xf0, xf1, xf2, xf3;
 224     mlib_s32 yf0, yf1, yf2, yf3;
 225     mlib_s32 c0, c1, c2, c3, val0;
 226     mlib_s32 filterpos, k;
 227     mlib_s16 *fptr;
 228     mlib_s32 s0, s1, s2, s3;
 229     mlib_s32 s4, s5, s6, s7;
 230 
 231     CLIP(2);
 232     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 233 
 234     for (k = 0; k < 2; k++) {
 235       mlib_s32 X1 = X;
 236       mlib_s32 Y1 = Y;
 237       DTYPE *dPtr = dstPixelPtr + k;
 238 
 239       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 240       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 241 
 242       xf0 = fptr[0];
 243       xf1 = fptr[1];
 244       xf2 = fptr[2];
 245       xf3 = fptr[3];
 246 
 247       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 248       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 249 
 250       yf0 = fptr[0];
 251       yf1 = fptr[1];
 252       yf2 = fptr[2];
 253       yf3 = fptr[3];
 254 
 255       xSrc = (X1 >> MLIB_SHIFT) - 1;
 256       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 257 
 258       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 259       s0 = srcPixelPtr[0];
 260       s1 = srcPixelPtr[2];
 261       s2 = srcPixelPtr[4];
 262       s3 = srcPixelPtr[6];
 263 
 264       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 265       s4 = srcPixelPtr[0];
 266       s5 = srcPixelPtr[2];
 267       s6 = srcPixelPtr[4];
 268       s7 = srcPixelPtr[6];
 269 
 270       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
 271 
 272         X1 += dX;
 273         Y1 += dY;
 274 
 275         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 276         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 277         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 278         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 279               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 280         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 281         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 282               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 283 
 284         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 285         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 286 
 287         xf0 = fptr[0];
 288         xf1 = fptr[1];
 289         xf2 = fptr[2];
 290         xf3 = fptr[3];
 291 
 292         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 293 
 294         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 295         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 296 
 297         yf0 = fptr[0];
 298         yf1 = fptr[1];
 299         yf2 = fptr[2];
 300         yf3 = fptr[3];
 301 
 302         S32_TO_S16_SAT(dPtr[0]);
 303 
 304         xSrc = (X1 >> MLIB_SHIFT) - 1;
 305         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 306 
 307         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 308         s0 = srcPixelPtr[0];
 309         s1 = srcPixelPtr[2];
 310         s2 = srcPixelPtr[4];
 311         s3 = srcPixelPtr[6];
 312 
 313         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 314         s4 = srcPixelPtr[0];
 315         s5 = srcPixelPtr[2];
 316         s6 = srcPixelPtr[4];
 317         s7 = srcPixelPtr[6];
 318       }
 319 
 320       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 321       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 322       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 323       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 324             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 325       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 326       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
 327             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
 328 
 329       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 330       S32_TO_S16_SAT(dPtr[0]);
 331     }
 332   }
 333 
 334   return MLIB_SUCCESS;
 335 }
 336 
 337 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 338 {
 339   DECLAREVAR_BC();
 340   DTYPE *dstLineEnd;
 341   const mlib_s16 *mlib_filters_table;
 342 
 343   if (filter == MLIB_BICUBIC) {
 344     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 345   }
 346   else {
 347     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 348   }
 349 
 350   for (j = yStart; j <= yFinish; j++) {
 351     mlib_s32 xf0, xf1, xf2, xf3;
 352     mlib_s32 yf0, yf1, yf2, yf3;
 353     mlib_s32 c0, c1, c2, c3, val0;
 354     mlib_s32 filterpos, k;
 355     mlib_s16 *fptr;
 356     mlib_s32 s0, s1, s2, s3;
 357     mlib_s32 s4, s5, s6, s7;
 358 
 359     CLIP(3);
 360     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 361 
 362     for (k = 0; k < 3; k++) {
 363       mlib_s32 X1 = X;
 364       mlib_s32 Y1 = Y;
 365       DTYPE *dPtr = dstPixelPtr + k;
 366 
 367       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 368       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 369 
 370       xf0 = fptr[0];
 371       xf1 = fptr[1];
 372       xf2 = fptr[2];
 373       xf3 = fptr[3];
 374 
 375       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 376       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 377 
 378       yf0 = fptr[0];
 379       yf1 = fptr[1];
 380       yf2 = fptr[2];
 381       yf3 = fptr[3];
 382 
 383       xSrc = (X1 >> MLIB_SHIFT) - 1;
 384       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 385 
 386       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 387       s0 = srcPixelPtr[0];
 388       s1 = srcPixelPtr[3];
 389       s2 = srcPixelPtr[6];
 390       s3 = srcPixelPtr[9];
 391 
 392       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 393       s4 = srcPixelPtr[0];
 394       s5 = srcPixelPtr[3];
 395       s6 = srcPixelPtr[6];
 396       s7 = srcPixelPtr[9];
 397 
 398       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
 399 
 400         X1 += dX;
 401         Y1 += dY;
 402 
 403         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 404         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 405         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 406         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 407               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 408         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 409         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 410               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 411 
 412         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 413         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 414 
 415         xf0 = fptr[0];
 416         xf1 = fptr[1];
 417         xf2 = fptr[2];
 418         xf3 = fptr[3];
 419 
 420         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 421 
 422         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 423         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 424 
 425         yf0 = fptr[0];
 426         yf1 = fptr[1];
 427         yf2 = fptr[2];
 428         yf3 = fptr[3];
 429 
 430         S32_TO_S16_SAT(dPtr[0]);
 431 
 432         xSrc = (X1 >> MLIB_SHIFT) - 1;
 433         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 434 
 435         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 436         s0 = srcPixelPtr[0];
 437         s1 = srcPixelPtr[3];
 438         s2 = srcPixelPtr[6];
 439         s3 = srcPixelPtr[9];
 440 
 441         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 442         s4 = srcPixelPtr[0];
 443         s5 = srcPixelPtr[3];
 444         s6 = srcPixelPtr[6];
 445         s7 = srcPixelPtr[9];
 446       }
 447 
 448       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 449       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 450       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 451       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 452             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 453       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 454       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
 455             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
 456 
 457       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 458       S32_TO_S16_SAT(dPtr[0]);
 459     }
 460   }
 461 
 462   return MLIB_SUCCESS;
 463 }
 464 
 465 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 466 {
 467   DECLAREVAR_BC();
 468   DTYPE *dstLineEnd;
 469   const mlib_s16 *mlib_filters_table;
 470 
 471   if (filter == MLIB_BICUBIC) {
 472     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
 473   }
 474   else {
 475     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
 476   }
 477 
 478   for (j = yStart; j <= yFinish; j++) {
 479     mlib_s32 xf0, xf1, xf2, xf3;
 480     mlib_s32 yf0, yf1, yf2, yf3;
 481     mlib_s32 c0, c1, c2, c3, val0;
 482     mlib_s32 filterpos, k;
 483     mlib_s16 *fptr;
 484     mlib_s32 s0, s1, s2, s3;
 485     mlib_s32 s4, s5, s6, s7;
 486 
 487     CLIP(4);
 488     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
 489 
 490     for (k = 0; k < 4; k++) {
 491       mlib_s32 X1 = X;
 492       mlib_s32 Y1 = Y;
 493       DTYPE *dPtr = dstPixelPtr + k;
 494 
 495       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 496       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 497 
 498       xf0 = fptr[0];
 499       xf1 = fptr[1];
 500       xf2 = fptr[2];
 501       xf3 = fptr[3];
 502 
 503       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 504       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 505 
 506       yf0 = fptr[0];
 507       yf1 = fptr[1];
 508       yf2 = fptr[2];
 509       yf3 = fptr[3];
 510 
 511       xSrc = (X1 >> MLIB_SHIFT) - 1;
 512       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 513 
 514       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 515       s0 = srcPixelPtr[0];
 516       s1 = srcPixelPtr[4];
 517       s2 = srcPixelPtr[8];
 518       s3 = srcPixelPtr[12];
 519 
 520       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 521       s4 = srcPixelPtr[0];
 522       s5 = srcPixelPtr[4];
 523       s6 = srcPixelPtr[8];
 524       s7 = srcPixelPtr[12];
 525 
 526       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
 527 
 528         X1 += dX;
 529         Y1 += dY;
 530 
 531         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 532         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 533         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 534         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 535               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 536         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 537         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 538               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 539 
 540         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 541         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 542 
 543         xf0 = fptr[0];
 544         xf1 = fptr[1];
 545         xf2 = fptr[2];
 546         xf3 = fptr[3];
 547 
 548         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 549 
 550         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 551         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 552 
 553         yf0 = fptr[0];
 554         yf1 = fptr[1];
 555         yf2 = fptr[2];
 556         yf3 = fptr[3];
 557 
 558         S32_TO_S16_SAT(dPtr[0]);
 559 
 560         xSrc = (X1 >> MLIB_SHIFT) - 1;
 561         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 562 
 563         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 564         s0 = srcPixelPtr[0];
 565         s1 = srcPixelPtr[4];
 566         s2 = srcPixelPtr[8];
 567         s3 = srcPixelPtr[12];
 568 
 569         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 570         s4 = srcPixelPtr[0];
 571         s5 = srcPixelPtr[4];
 572         s6 = srcPixelPtr[8];
 573         s7 = srcPixelPtr[12];
 574       }
 575 
 576       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 577       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
 578       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 579       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 580             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 581       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 582       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 583             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 584 
 585       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 586       S32_TO_S16_SAT(dPtr[0]);
 587     }
 588   }
 589 
 590   return MLIB_SUCCESS;
 591 }
 592 
 593 /***************************************************************/