1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 
  28 /*
  29  *      The functions step along the lines from xLeft to xRight and apply
  30  *      the bilinear filtering.
  31  *
  32  */
  33 
  34 #include "vis_proto.h"
  35 #include "mlib_image.h"
  36 #include "mlib_ImageCopy.h"
  37 #include "mlib_ImageAffine.h"
  38 #include "mlib_v_ImageFilters.h"
  39 #include "mlib_v_ImageChannelExtract.h"
  40 #include "mlib_v_ImageAffine_BL_S16.h"
  41 
  42 /*#define MLIB_VIS2*/
  43 
  44 /***************************************************************/
  45 #define DTYPE mlib_s16
  46 
  47 #define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bl
  48 
  49 /***************************************************************/
  50 mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param);
  51 mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param);
  52 
  53 /***************************************************************/
  54 #define XOR_8000(x) x = vis_fxor(x, mask_8000)
  55 
  56 /***************************************************************/
  57 #ifdef MLIB_VIS2
  58 #define MLIB_WRITE_BMASK(bmask) vis_write_bmask(bmask, 0)
  59 #else
  60 #define MLIB_WRITE_BMASK(bmask)
  61 #endif /* MLIB_VIS2 */
  62 
  63 /***************************************************************/
  64 #undef  DECLAREVAR
  65 #define DECLAREVAR()                                            \
  66   DECLAREVAR0();                                                \
  67   mlib_s32  *warp_tbl   = param -> warp_tbl;                    \
  68   mlib_s32  srcYStride = param -> srcYStride;                   \
  69   mlib_u8   *dl;                                                \
  70   mlib_s32  i, size;                                            \
  71   mlib_d64  mask_8000 = vis_to_double_dup(0x80008000);          \
  72   mlib_d64  mask_7fff = vis_to_double_dup(0x7FFF7FFF);          \
  73   mlib_d64  dx64, dy64, deltax, deltay, delta1_x, delta1_y;     \
  74   mlib_d64  s0, s1, s2, s3;                                     \
  75   mlib_d64  d0, d1, d2, d3, dd
  76 
  77 /***************************************************************/
  78 
  79 /* arguments (x, y) are swapped to prevent overflow */
  80 #define FMUL_16x16(x, y)                        \
  81   vis_fpadd16(vis_fmul8sux16(y, x),             \
  82               vis_fmul8ulx16(y, x))
  83 
  84 /***************************************************************/
  85 #define BUF_SIZE  512
  86 
  87 /***************************************************************/
  88 #define DOUBLE_4U16(x0, x1, x2, x3)                                 \
  89   vis_to_double(((((x0) & 0xFFFE) << 15) | (((x1) & 0xFFFE) >> 1)), \
  90                 ((((x2) & 0xFFFE) << 15) | (((x3) & 0xFFFE) >> 1)))
  91 
  92 /***************************************************************/
  93 #define BL_SUM()                                                \
  94   XOR_8000(s0);                                                 \
  95   XOR_8000(s1);                                                 \
  96   XOR_8000(s2);                                                 \
  97   XOR_8000(s3);                                                 \
  98                                                                 \
  99   delta1_x = vis_fpsub16(mask_7fff, deltax);                    \
 100   delta1_y = vis_fpsub16(mask_7fff, deltay);                    \
 101                                                                 \
 102   d0 = FMUL_16x16(s0, delta1_x);                                \
 103   d1 = FMUL_16x16(s1, deltax);                                  \
 104   d0 = vis_fpadd16(d0, d1);                                     \
 105   d0 = vis_fpadd16(d0, d0);                                     \
 106   d0 = FMUL_16x16(d0, delta1_y);                                \
 107                                                                 \
 108   d2 = FMUL_16x16(s2, delta1_x);                                \
 109   d3 = FMUL_16x16(s3, deltax);                                  \
 110   d2 = vis_fpadd16(d2, d3);                                     \
 111   d2 = vis_fpadd16(d2, d2);                                     \
 112   d2 = FMUL_16x16(d2, deltay);                                  \
 113                                                                 \
 114   dd = vis_fpadd16(d0, d2);                                     \
 115   dd = vis_fpadd16(dd, dd);                                     \
 116   XOR_8000(dd);                                                 \
 117                                                                 \
 118   deltax = vis_fpadd16(deltax, dx64);                           \
 119   deltay = vis_fpadd16(deltay, dy64);                           \
 120   deltax = vis_fand(deltax, mask_7fff);                         \
 121   deltay = vis_fand(deltay, mask_7fff)
 122 
 123 /***************************************************************/
 124 #define BL_SUM_3CH()                                            \
 125   XOR_8000(s0);                                                 \
 126   XOR_8000(s1);                                                 \
 127   XOR_8000(s2);                                                 \
 128   XOR_8000(s3);                                                 \
 129                                                                 \
 130   delta1_x = vis_fpsub16(mask_7fff, deltax);                    \
 131   delta1_y = vis_fpsub16(mask_7fff, deltay);                    \
 132                                                                 \
 133   d0 = FMUL_16x16(s0, delta1_y);                                \
 134   d2 = FMUL_16x16(s2, deltay);                                  \
 135   d0 = vis_fpadd16(d0, d2);                                     \
 136   d0 = vis_fpadd16(d0, d0);                                     \
 137   d0 = FMUL_16x16(d0, delta1_x);                                \
 138                                                                 \
 139   d1 = FMUL_16x16(s1, delta1_y);                                \
 140   d3 = FMUL_16x16(s3, deltay);                                  \
 141   d1 = vis_fpadd16(d1, d3);                                     \
 142   d1 = vis_fpadd16(d1, d1);                                     \
 143   d1 = FMUL_16x16(d1, deltax);                                  \
 144                                                                 \
 145   vis_alignaddr((void*)0, 2);                                   \
 146   d0 = vis_faligndata(d0, d0);                                  \
 147   dd = vis_fpadd16(d0, d1);                                     \
 148   dd = vis_fpadd16(dd, dd);                                     \
 149   XOR_8000(dd);                                                 \
 150                                                                 \
 151   deltax = vis_fpadd16(deltax, dx64);                           \
 152   deltay = vis_fpadd16(deltay, dy64);                           \
 153   deltax = vis_fand(deltax, mask_7fff);                         \
 154   deltay = vis_fand(deltay, mask_7fff)
 155 
 156 /***************************************************************/
 157 #define LD_U16(sp, ind) vis_ld_u16(sp + ind)
 158 
 159 /***************************************************************/
 160 #ifndef MLIB_VIS2
 161 
 162 #define LOAD_1CH()                                              \
 163   s0 = vis_faligndata(LD_U16(sp3, 0), mask_7fff);               \
 164   s1 = vis_faligndata(LD_U16(sp3, 2), mask_7fff);               \
 165   s2 = vis_faligndata(LD_U16(sp3, srcYStride), mask_7fff);      \
 166   s3 = vis_faligndata(LD_U16(sp3, srcYStride + 2), mask_7fff);  \
 167                                                                 \
 168   s0 = vis_faligndata(LD_U16(sp2, 0), s0);                      \
 169   s1 = vis_faligndata(LD_U16(sp2, 2), s1);                      \
 170   s2 = vis_faligndata(LD_U16(sp2, srcYStride), s2);             \
 171   s3 = vis_faligndata(LD_U16(sp2, srcYStride + 2), s3);         \
 172                                                                 \
 173   s0 = vis_faligndata(LD_U16(sp1, 0), s0);                      \
 174   s1 = vis_faligndata(LD_U16(sp1, 2), s1);                      \
 175   s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2);             \
 176   s3 = vis_faligndata(LD_U16(sp1, srcYStride + 2), s3);         \
 177                                                                 \
 178   s0 = vis_faligndata(LD_U16(sp0, 0), s0);                      \
 179   s1 = vis_faligndata(LD_U16(sp0, 2), s1);                      \
 180   s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2);             \
 181   s3 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s3)
 182 
 183 #else
 184 
 185 #define LOAD_1CH()                                                             \
 186   s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp2, 0));                           \
 187   s1 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp2, 2));                           \
 188   s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp2, srcYStride));         \
 189   s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp2, srcYStride + 2)); \
 190                                                                                \
 191   t0 = vis_bshuffle(LD_U16(sp1, 0), LD_U16(sp3, 0));                           \
 192   t1 = vis_bshuffle(LD_U16(sp1, 2), LD_U16(sp3, 2));                           \
 193   t2 = vis_bshuffle(LD_U16(sp1, srcYStride), LD_U16(sp3, srcYStride));         \
 194   t3 = vis_bshuffle(LD_U16(sp1, srcYStride + 2), LD_U16(sp3, srcYStride + 2)); \
 195                                                                                \
 196   s0 = vis_bshuffle(s0, t0);                                                   \
 197   s1 = vis_bshuffle(s1, t1);                                                   \
 198   s2 = vis_bshuffle(s2, t2);                                                   \
 199   s3 = vis_bshuffle(s3, t3)
 200 
 201 #endif /* MLIB_VIS2 */
 202 
 203 /***************************************************************/
 204 #define GET_POINTER(sp)                                                       \
 205   sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 2*(X >> MLIB_SHIFT); \
 206   X += dX;                                                                    \
 207   Y += dY
 208 
 209 /***************************************************************/
 210 #undef  PREPARE_DELTAS
 211 #define PREPARE_DELTAS                                                             \
 212   if (warp_tbl != NULL) {                                                          \
 213     dX = warp_tbl[2*j    ];                                                        \
 214     dY = warp_tbl[2*j + 1];                                                        \
 215     dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF)); \
 216     dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF)); \
 217   }
 218 
 219 /***************************************************************/
 220 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
 221 {
 222   DECLAREVAR();
 223   mlib_s32 off;
 224   mlib_s32 x0, x1, x2, x3, y0, y1, y2, y3;
 225 #ifdef MLIB_VIS2
 226   mlib_d64 t0, t1, t2, t3;
 227   vis_write_bmask(0x45CD67EF, 0);
 228 #else
 229   vis_alignaddr((void*)0, 6);
 230 #endif /* MLIB_VIS2 */
 231 
 232   dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF));
 233   dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF));
 234 
 235   for (j = yStart; j <= yFinish; j++) {
 236     mlib_u8  *sp0, *sp1, *sp2, *sp3;
 237     mlib_d64 *dp, dmask;
 238 
 239     NEW_LINE(1);
 240 
 241     off = (mlib_s32)dl & 7;
 242     dp = (mlib_d64*)(dl - off);
 243     off >>= 1;
 244 
 245     x0 = X - off*dX; y0 = Y - off*dY;
 246     x1 = x0 + dX;    y1 = y0 + dY;
 247     x2 = x1 + dX;    y2 = y1 + dY;
 248     x3 = x2 + dX;    y3 = y2 + dY;
 249 
 250     deltax = DOUBLE_4U16(x0, x1, x2, x3);
 251     deltay = DOUBLE_4U16(y0, y1, y2, y3);
 252 
 253     if (off) {
 254       mlib_s32 emask = vis_edge16((void*)(2*off), (void*)(2*(off + size - 1)));
 255 
 256       off = 4 - off;
 257       GET_POINTER(sp3);
 258       sp0 = sp1 = sp2 = sp3;
 259 
 260       if (off > 1 && size > 1) {
 261         GET_POINTER(sp3);
 262       }
 263 
 264       if (off > 2) {
 265         sp2 = sp3;
 266 
 267         if (size > 2) {
 268           GET_POINTER(sp3);
 269         }
 270       }
 271 
 272       LOAD_1CH();
 273       BL_SUM();
 274 
 275       dmask = ((mlib_d64*)mlib_dmask_arr)[emask];
 276       *dp++ = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[0]));
 277 
 278       size -= off;
 279 
 280       if (size < 0) size = 0;
 281     }
 282 
 283 #pragma pipeloop(0)
 284     for (i = 0; i < size/4; i++) {
 285       GET_POINTER(sp0);
 286       GET_POINTER(sp1);
 287       GET_POINTER(sp2);
 288       GET_POINTER(sp3);
 289 
 290       LOAD_1CH();
 291       BL_SUM();
 292 
 293       dp[i] = dd;
 294     }
 295 
 296     off = size & 3;
 297 
 298     if (off) {
 299       GET_POINTER(sp0);
 300       sp1 = sp2 = sp3 = sp0;
 301 
 302       if (off > 1) {
 303         GET_POINTER(sp1);
 304       }
 305 
 306       if (off > 2) {
 307         GET_POINTER(sp2);
 308       }
 309 
 310       LOAD_1CH();
 311       BL_SUM();
 312 
 313       dmask = ((mlib_d64*)mlib_dmask_arr)[(0xF0 >> off) & 0x0F];
 314       dp[i] = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[i]));
 315     }
 316   }
 317 
 318   return MLIB_SUCCESS;
 319 }
 320 
 321 /***************************************************************/
 322 #undef  GET_POINTER
 323 #define GET_POINTER(sp)                                                      \
 324   sp = *(mlib_f32**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); \
 325   X += dX;                                                                   \
 326   Y += dY
 327 
 328 /***************************************************************/
 329 #define LOAD_2CH()                                              \
 330   s0 = vis_freg_pair(sp0[0], sp1[0]);                           \
 331   s1 = vis_freg_pair(sp0[1], sp1[1]);                           \
 332   s2 = vis_freg_pair(sp0[srcYStride], sp1[srcYStride]);         \
 333   s3 = vis_freg_pair(sp0[srcYStride + 1], sp1[srcYStride + 1])
 334 
 335 /***************************************************************/
 336 #undef  PREPARE_DELTAS
 337 #define PREPARE_DELTAS                                               \
 338   if (warp_tbl != NULL) {                                            \
 339     dX = warp_tbl[2*j    ];                                          \
 340     dY = warp_tbl[2*j + 1];                                          \
 341     dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); \
 342     dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); \
 343   }
 344 
 345 /***************************************************************/
 346 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 347 {
 348   DECLAREVAR();
 349   mlib_s32 off;
 350   mlib_s32 x0, x1, y0, y1;
 351 
 352   if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 3) {
 353     return FUN_NAME(2ch_na)(param);
 354   }
 355 
 356   srcYStride >>= 2;
 357 
 358   dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF));
 359   dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF));
 360 
 361   for (j = yStart; j <= yFinish; j++) {
 362     mlib_f32 *sp0, *sp1;
 363     mlib_d64 *dp;
 364 
 365     NEW_LINE(2);
 366 
 367     off = (mlib_s32)dl & 7;
 368     dp = (mlib_d64*)(dl - off);
 369 
 370     if (off) {
 371       x0 = X - dX; y0 = Y - dY;
 372       x1 = X;      y1 = Y;
 373     } else {
 374       x0 = X;      y0 = Y;
 375       x1 = X + dX; y1 = Y + dY;
 376     }
 377 
 378     deltax = DOUBLE_4U16(x0, x0, x1, x1);
 379     deltay = DOUBLE_4U16(y0, y0, y1, y1);
 380 
 381     if (off) {
 382       GET_POINTER(sp1);
 383       sp0 = sp1;
 384       LOAD_2CH();
 385 
 386       BL_SUM();
 387 
 388       ((mlib_f32*)dp)[1] = vis_read_lo(dd);
 389       dp++;
 390       size--;
 391     }
 392 
 393 #pragma pipeloop(0)
 394     for (i = 0; i < size/2; i++) {
 395       GET_POINTER(sp0);
 396       GET_POINTER(sp1);
 397       LOAD_2CH();
 398 
 399       BL_SUM();
 400 
 401       *dp++ = dd;
 402     }
 403 
 404     if (size & 1) {
 405       GET_POINTER(sp0);
 406       sp1 = sp0;
 407       LOAD_2CH();
 408 
 409       BL_SUM();
 410 
 411       ((mlib_f32*)dp)[0] = vis_read_hi(dd);
 412     }
 413   }
 414 
 415   return MLIB_SUCCESS;
 416 }
 417 
 418 /***************************************************************/
 419 #undef  GET_POINTER
 420 #define GET_POINTER(sp)                                                       \
 421   sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 4*(X >> MLIB_SHIFT); \
 422   X += dX;                                                                    \
 423   Y += dY
 424 
 425 /***************************************************************/
 426 #ifndef MLIB_VIS2
 427 
 428 #define LOAD_2CH_NA()                                           \
 429   s0 = vis_faligndata(LD_U16(sp1, 2), mask_7fff);               \
 430   s1 = vis_faligndata(LD_U16(sp1, 6), mask_7fff);               \
 431   s2 = vis_faligndata(LD_U16(sp1, srcYStride + 2), mask_7fff);  \
 432   s3 = vis_faligndata(LD_U16(sp1, srcYStride + 6), mask_7fff);  \
 433                                                                 \
 434   s0 = vis_faligndata(LD_U16(sp1, 0), s0);                      \
 435   s1 = vis_faligndata(LD_U16(sp1, 4), s1);                      \
 436   s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2);             \
 437   s3 = vis_faligndata(LD_U16(sp1, srcYStride + 4), s3);         \
 438                                                                 \
 439   s0 = vis_faligndata(LD_U16(sp0, 2), s0);                      \
 440   s1 = vis_faligndata(LD_U16(sp0, 6), s1);                      \
 441   s2 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s2);         \
 442   s3 = vis_faligndata(LD_U16(sp0, srcYStride + 6), s3);         \
 443                                                                 \
 444   s0 = vis_faligndata(LD_U16(sp0, 0), s0);                      \
 445   s1 = vis_faligndata(LD_U16(sp0, 4), s1);                      \
 446   s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2);             \
 447   s3 = vis_faligndata(LD_U16(sp0, srcYStride + 4), s3)
 448 
 449 #else
 450 
 451 #define LOAD_2CH_NA()                                                          \
 452   s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp1, 0));                           \
 453   s1 = vis_bshuffle(LD_U16(sp0, 4), LD_U16(sp1, 4));                           \
 454   s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp1, srcYStride));         \
 455   s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 4), LD_U16(sp1, srcYStride + 4)); \
 456                                                                                \
 457   t0 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp1, 2));                           \
 458   t1 = vis_bshuffle(LD_U16(sp0, 6), LD_U16(sp1, 6));                           \
 459   t2 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp1, srcYStride + 2)); \
 460   t3 = vis_bshuffle(LD_U16(sp0, srcYStride + 6), LD_U16(sp1, srcYStride + 6)); \
 461                                                                                \
 462   s0 = vis_bshuffle(s0, t0);                                                   \
 463   s1 = vis_bshuffle(s1, t1);                                                   \
 464   s2 = vis_bshuffle(s2, t2);                                                   \
 465   s3 = vis_bshuffle(s3, t3)
 466 
 467 #endif /* MLIB_VIS2 */
 468 
 469 /***************************************************************/
 470 mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param)
 471 {
 472   DECLAREVAR();
 473   mlib_s32 max_xsize = param -> max_xsize, bsize;
 474   mlib_s32 x0, x1, y0, y1;
 475   mlib_d64 buff[BUF_SIZE], *pbuff = buff;
 476 #ifdef MLIB_VIS2
 477   mlib_d64 t0, t1, t2, t3;
 478 #endif /* MLIB_VIS2 */
 479 
 480   bsize = (max_xsize + 1)/2;
 481 
 482   if (bsize > BUF_SIZE) {
 483     pbuff = mlib_malloc(bsize*sizeof(mlib_d64));
 484 
 485     if (pbuff == NULL) return MLIB_FAILURE;
 486   }
 487 
 488   MLIB_WRITE_BMASK(0x45CD67EF);
 489 
 490   dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF));
 491   dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF));
 492 
 493   for (j = yStart; j <= yFinish; j++) {
 494     mlib_u8 *sp0, *sp1;
 495 
 496 #ifndef MLIB_VIS2
 497     vis_alignaddr((void*)0, 6);
 498 #endif /* MLIB_VIS2 */
 499 
 500     NEW_LINE(2);
 501 
 502     x0 = X;      y0 = Y;
 503     x1 = X + dX; y1 = Y + dY;
 504 
 505     deltax = DOUBLE_4U16(x0, x0, x1, x1);
 506     deltay = DOUBLE_4U16(y0, y0, y1, y1);
 507 
 508 #pragma pipeloop(0)
 509     for (i = 0; i < size/2; i++) {
 510       GET_POINTER(sp0);
 511       GET_POINTER(sp1);
 512       LOAD_2CH_NA();
 513 
 514       BL_SUM();
 515 
 516       pbuff[i] = dd;
 517     }
 518 
 519     if (size & 1) {
 520       GET_POINTER(sp0);
 521       sp1 = sp0;
 522       LOAD_2CH_NA();
 523 
 524       BL_SUM();
 525 
 526       pbuff[i] = dd;
 527     }
 528 
 529     mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 4*size);
 530   }
 531 
 532   if (pbuff != buff) {
 533     mlib_free(pbuff);
 534   }
 535 
 536   return MLIB_SUCCESS;
 537 }
 538 
 539 /***************************************************************/
 540 #undef  PREPARE_DELTAS
 541 #define PREPARE_DELTAS                                                             \
 542   if (warp_tbl != NULL) {                                                          \
 543     dX = warp_tbl[2*j    ];                                                        \
 544     dY = warp_tbl[2*j + 1];                                                        \
 545     dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */                       \
 546     dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */                       \
 547     dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); \
 548     dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); \
 549   }
 550 
 551 /***************************************************************/
 552 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 553 {
 554   DECLAREVAR();
 555   mlib_s32 max_xsize = param -> max_xsize;
 556   mlib_d64 buff[BUF_SIZE], *pbuff = buff;
 557 
 558   if (max_xsize > BUF_SIZE) {
 559     pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64));
 560 
 561     if (pbuff == NULL) return MLIB_FAILURE;
 562   }
 563 
 564   dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
 565   dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
 566   dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
 567   dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
 568 
 569   for (j = yStart; j <= yFinish; j++) {
 570     mlib_u8  *sp;
 571     mlib_d64 *sp0, *sp1;
 572 
 573     NEW_LINE(3);
 574 
 575     deltax = DOUBLE_4U16(X, X, X, X);
 576     deltay = DOUBLE_4U16(Y, Y, Y, Y);
 577 
 578 #pragma pipeloop(0)
 579     for (i = 0; i < size; i++) {
 580       sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 6*(X >> MLIB_SHIFT) - 2;
 581 
 582       vis_alignaddr(sp, 0);
 583       sp0 = AL_ADDR(sp, 0);
 584       s0 = vis_faligndata(sp0[0], sp0[1]);
 585       s1 = vis_faligndata(sp0[1], sp0[2]);
 586 
 587       vis_alignaddr(sp, srcYStride);
 588       sp1 = AL_ADDR(sp, srcYStride);
 589       s2 = vis_faligndata(sp1[0], sp1[1]);
 590       s3 = vis_faligndata(sp1[1], sp1[2]);
 591 
 592       BL_SUM_3CH();
 593 
 594       pbuff[i] = dd;
 595       X += dX;
 596       Y += dY;
 597     }
 598 
 599     mlib_v_ImageChannelExtract_S16_43L_D1((void *)pbuff, (void *)dl, size);
 600   }
 601 
 602   if (pbuff != buff) {
 603     mlib_free(pbuff);
 604   }
 605 
 606   return MLIB_SUCCESS;
 607 }
 608 
 609 /***************************************************************/
 610 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 611 {
 612   DECLAREVAR();
 613 
 614   if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 7) {
 615     return FUN_NAME(4ch_na)(param);
 616   }
 617 
 618   srcYStride >>= 3;
 619 
 620   dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
 621   dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
 622   dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
 623   dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
 624 
 625   for (j = yStart; j <= yFinish; j++) {
 626     mlib_d64 *sp;
 627 
 628     NEW_LINE(4);
 629 
 630     deltax = DOUBLE_4U16(X, X, X, X);
 631     deltay = DOUBLE_4U16(Y, Y, Y, Y);
 632 
 633 #pragma pipeloop(0)
 634     for (i = 0; i < size; i++) {
 635       sp = *(mlib_d64**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT);
 636       s0 = sp[0];
 637       s1 = sp[1];
 638       s2 = sp[srcYStride];
 639       s3 = sp[srcYStride + 1];
 640 
 641       BL_SUM();
 642 
 643       ((mlib_d64*)dl)[i] = dd;
 644       X += dX;
 645       Y += dY;
 646     }
 647   }
 648 
 649   return MLIB_SUCCESS;
 650 }
 651 
 652 /***************************************************************/
 653 mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param)
 654 {
 655   DECLAREVAR();
 656   mlib_s32 max_xsize = param -> max_xsize;
 657   mlib_d64 buff[BUF_SIZE], *pbuff = buff;
 658 
 659   if (max_xsize > BUF_SIZE) {
 660     pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64));
 661 
 662     if (pbuff == NULL) return MLIB_FAILURE;
 663   }
 664 
 665   dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
 666   dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
 667   dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
 668   dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
 669 
 670   for (j = yStart; j <= yFinish; j++) {
 671     mlib_u8  *sp;
 672     mlib_d64 *sp0, *sp1;
 673 
 674     NEW_LINE(4);
 675 
 676     deltax = DOUBLE_4U16(X, X, X, X);
 677     deltay = DOUBLE_4U16(Y, Y, Y, Y);
 678 
 679 #pragma pipeloop(0)
 680     for (i = 0; i < size; i++) {
 681       sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 8*(X >> MLIB_SHIFT);
 682 
 683       vis_alignaddr(sp, 0);
 684       sp0 = AL_ADDR(sp, 0);
 685       s0 = vis_faligndata(sp0[0], sp0[1]);
 686       s1 = vis_faligndata(sp0[1], sp0[2]);
 687 
 688       vis_alignaddr(sp, srcYStride);
 689       sp1 = AL_ADDR(sp, srcYStride);
 690       s2 = vis_faligndata(sp1[0], sp1[1]);
 691       s3 = vis_faligndata(sp1[1], sp1[2]);
 692 
 693       BL_SUM();
 694 
 695       pbuff[i] = dd;
 696       X += dX;
 697       Y += dY;
 698     }
 699 
 700     mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 8*size);
 701   }
 702 
 703   if (pbuff != buff) {
 704     mlib_free(pbuff);
 705   }
 706 
 707   return MLIB_SUCCESS;
 708 }
 709 
 710 /***************************************************************/