1 /*
   2  * Copyright (c) 1997, 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      mlib_ImageAffine_u8_1ch_bl
  30  *      mlib_ImageAffine_u8_2ch_bl
  31  *      mlib_ImageAffine_u8_3ch_bl
  32  *      mlib_ImageAffine_u8_4ch_bl
  33  *        - image affine transformation with Bilinear filtering
  34  * SYNOPSIS
  35  *      mlib_status mlib_ImageAffine_u8_?ch_bl(mlib_s32 *leftEdges,
  36  *                                             mlib_s32 *rightEdges,
  37  *                                             mlib_s32 *xStarts,
  38  *                                             mlib_s32 *yStarts,
  39  *                                             mlib_s32 *sides,
  40  *                                             mlib_u8  *dstData,
  41  *                                             mlib_u8  **lineAddr,
  42  *                                             mlib_s32 dstYStride,
  43  *                                             mlib_s32 is_affine,
  44  *                                             mlib_s32 srcYStride)
  45  *
  46  * ARGUMENTS
  47  *      leftEdges  array[dstHeight] of xLeft coordinates
  48  *      RightEdges array[dstHeight] of xRight coordinates
  49  *      xStarts    array[dstHeight] of xStart * 65536 coordinates
  50  *      yStarts    array[dstHeight] of yStart * 65536 coordinates
  51  *      sides      output array[4]. sides[0] is yStart, sides[1] is yFinish,
  52  *                 sides[2] is dx * 65536, sides[3] is dy * 65536
  53  *      dstData    pointer to the first pixel on (yStart - 1) line
  54  *      lineAddr   array[srcHeight] of pointers to the first pixel on
  55  *                 the corresponding lines
  56  *      dstYStride stride of destination image
  57  *      is_affine  indicator (Affine - GridWarp)
  58  *      srcYStride stride of source image
  59  *
  60  * DESCRIPTION
  61  *      The functions step along the lines from xLeft to xRight and apply
  62  *      the bilinear filtering.
  63  *
  64  */
  65 
  66 #include "mlib_ImageAffine.h"
  67 
  68 /***************************************************************/
  69 #define DTYPE  mlib_u8
  70 #define FTYPE  mlib_f32
  71 
  72 /***************************************************************/
  73 #define TTYPE    mlib_f32
  74 #define I2F(x)   mlib_U82F32[x]
  75 #define ROUND(x) ((x) + 0.5f)
  76 
  77 #define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bl
  78 
  79 /***************************************************************/
  80 #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
  81 
  82 /***************************************************************/
  83 #define GET_POINTERS(ind)                                       \
  84   fdx = (FTYPE)(X & MLIB_MASK) * scale;                         \
  85   fdy = (FTYPE)(Y & MLIB_MASK) * scale;                         \
  86   ySrc = MLIB_POINTER_SHIFT(Y);  Y += dY;                       \
  87   xSrc = X >> MLIB_SHIFT;  X += dX;                             \
  88   srcPixelPtr = MLIB_POINTER_GET(lineAddr, ySrc) + ind * xSrc;  \
  89   srcPixelPtr2 = (DTYPE *)((mlib_u8 *)srcPixelPtr + srcYStride)
  90 
  91 /***************************************************************/
  92 #define COUNT(ind)                                              \
  93   pix0_##ind = a00_##ind + fdy * (a10_##ind - a00_##ind);       \
  94   pix1_##ind = a01_##ind + fdy * (a11_##ind - a01_##ind);       \
  95   res##ind = ROUND(pix0_##ind + fdx * (pix1_##ind - pix0_##ind))
  96 
  97 /***************************************************************/
  98 #define LOAD(ind, ind1, ind2)                                   \
  99   a00_##ind = I2F(srcPixelPtr[ind1]);                           \
 100   a01_##ind = I2F(srcPixelPtr[ind2]);                           \
 101   a10_##ind = I2F(srcPixelPtr2[ind1]);                          \
 102   a11_##ind = I2F(srcPixelPtr2[ind2])
 103 
 104 /***************************************************************/
 105 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
 106 {
 107   DECLAREVAR_BL();
 108   DTYPE *dstLineEnd;
 109   DTYPE *srcPixelPtr2;
 110   FTYPE scale = (FTYPE) 1.0 / MLIB_PREC;
 111 
 112   for (j = yStart; j <= yFinish; j++) {
 113     FTYPE fdx, fdy;
 114     TTYPE a00_0, a01_0, a10_0, a11_0;
 115     FTYPE pix0_0, pix1_0, res0;
 116 
 117     CLIP(1);
 118     dstLineEnd = (DTYPE *) dstData + xRight;
 119 
 120     GET_POINTERS(1);
 121     LOAD(0, 0, 1);
 122 
 123 #ifdef __SUNPRO_C
 124 #pragma pipeloop(0)
 125 #endif /* __SUNPRO_C */
 126     for (; dstPixelPtr < dstLineEnd; dstPixelPtr++) {
 127       COUNT(0);
 128       GET_POINTERS(1);
 129       LOAD(0, 0, 1);
 130       dstPixelPtr[0] = (DTYPE) res0;
 131     }
 132 
 133     COUNT(0);
 134     dstPixelPtr[0] = (DTYPE) res0;
 135   }
 136 
 137   return MLIB_SUCCESS;
 138 }
 139 
 140 /***************************************************************/
 141 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 142 {
 143   DECLAREVAR_BL();
 144   DTYPE *dstLineEnd;
 145   DTYPE *srcPixelPtr2;
 146   FTYPE scale = (FTYPE) 1.0 / MLIB_PREC;
 147 
 148   for (j = yStart; j <= yFinish; j++) {
 149     FTYPE fdx, fdy;
 150     TTYPE a00_0, a01_0, a10_0, a11_0;
 151     TTYPE a00_1, a01_1, a10_1, a11_1;
 152     FTYPE pix0_0, pix1_0, res0;
 153     FTYPE pix0_1, pix1_1, res1;
 154 
 155     CLIP(2);
 156     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 157 
 158     GET_POINTERS(2);
 159     LOAD(0, 0, 2);
 160     LOAD(1, 1, 3);
 161 
 162 #ifdef __SUNPRO_C
 163 #pragma pipeloop(0)
 164 #endif /* __SUNPRO_C */
 165     for (; dstPixelPtr < dstLineEnd; dstPixelPtr += 2) {
 166       COUNT(0);
 167       COUNT(1);
 168       GET_POINTERS(2);
 169       LOAD(0, 0, 2);
 170       LOAD(1, 1, 3);
 171       dstPixelPtr[0] = (DTYPE) res0;
 172       dstPixelPtr[1] = (DTYPE) res1;
 173     }
 174 
 175     COUNT(0);
 176     COUNT(1);
 177     dstPixelPtr[0] = (DTYPE) res0;
 178     dstPixelPtr[1] = (DTYPE) res1;
 179   }
 180 
 181   return MLIB_SUCCESS;
 182 }
 183 
 184 /***************************************************************/
 185 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 186 {
 187   DECLAREVAR_BL();
 188   DTYPE *dstLineEnd;
 189   DTYPE *srcPixelPtr2;
 190   FTYPE scale = (FTYPE) 1.0 / MLIB_PREC;
 191 
 192   for (j = yStart; j <= yFinish; j++) {
 193     FTYPE fdx, fdy;
 194     FTYPE a00_0, a01_0, a10_0, a11_0;
 195     FTYPE a00_1, a01_1, a10_1, a11_1;
 196     FTYPE a00_2, a01_2, a10_2, a11_2;
 197     FTYPE pix0_0, pix1_0, res0;
 198     FTYPE pix0_1, pix1_1, res1;
 199     FTYPE pix0_2, pix1_2, res2;
 200 
 201     CLIP(3);
 202     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 203 
 204     GET_POINTERS(3);
 205     LOAD(0, 0, 3);
 206     LOAD(1, 1, 4);
 207     LOAD(2, 2, 5);
 208 
 209 #ifdef __SUNPRO_C
 210 #pragma pipeloop(0)
 211 #endif /* __SUNPRO_C */
 212     for (; dstPixelPtr < dstLineEnd; dstPixelPtr += 3) {
 213       COUNT(0);
 214       COUNT(1);
 215       COUNT(2);
 216       GET_POINTERS(3);
 217       LOAD(0, 0, 3);
 218       LOAD(1, 1, 4);
 219       LOAD(2, 2, 5);
 220       dstPixelPtr[0] = (DTYPE) res0;
 221       dstPixelPtr[1] = (DTYPE) res1;
 222       dstPixelPtr[2] = (DTYPE) res2;
 223     }
 224 
 225     COUNT(0);
 226     COUNT(1);
 227     COUNT(2);
 228     dstPixelPtr[0] = (DTYPE) res0;
 229     dstPixelPtr[1] = (DTYPE) res1;
 230     dstPixelPtr[2] = (DTYPE) res2;
 231   }
 232 
 233   return MLIB_SUCCESS;
 234 }
 235 
 236 /***************************************************************/
 237 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 238 {
 239   DECLAREVAR_BL();
 240   DTYPE *dstLineEnd;
 241   DTYPE *srcPixelPtr2;
 242   FTYPE scale = (FTYPE) 1.0 / MLIB_PREC;
 243 
 244   for (j = yStart; j <= yFinish; j++) {
 245     FTYPE fdx, fdy;
 246     TTYPE a00_0, a01_0, a10_0, a11_0;
 247     TTYPE a00_1, a01_1, a10_1, a11_1;
 248     TTYPE a00_2, a01_2, a10_2, a11_2;
 249     TTYPE a00_3, a01_3, a10_3, a11_3;
 250     FTYPE pix0_0, pix1_0, res0;
 251     FTYPE pix0_1, pix1_1, res1;
 252     FTYPE pix0_2, pix1_2, res2;
 253     FTYPE pix0_3, pix1_3, res3;
 254 
 255     CLIP(4);
 256     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
 257 
 258     GET_POINTERS(4);
 259     LOAD(0, 0, 4);
 260     LOAD(1, 1, 5);
 261     LOAD(2, 2, 6);
 262     LOAD(3, 3, 7);
 263 
 264 #ifdef __SUNPRO_C
 265 #pragma pipeloop(0)
 266 #endif /* __SUNPRO_C */
 267     for (; dstPixelPtr < dstLineEnd; dstPixelPtr += 4) {
 268       COUNT(0);
 269       COUNT(1);
 270       COUNT(2);
 271       COUNT(3);
 272       GET_POINTERS(4);
 273       LOAD(0, 0, 4);
 274       LOAD(1, 1, 5);
 275       LOAD(2, 2, 6);
 276       LOAD(3, 3, 7);
 277       dstPixelPtr[0] = (DTYPE) res0;
 278       dstPixelPtr[1] = (DTYPE) res1;
 279       dstPixelPtr[2] = (DTYPE) res2;
 280       dstPixelPtr[3] = (DTYPE) res3;
 281     }
 282 
 283     COUNT(0);
 284     COUNT(1);
 285     COUNT(2);
 286     COUNT(3);
 287     dstPixelPtr[0] = (DTYPE) res0;
 288     dstPixelPtr[1] = (DTYPE) res1;
 289     dstPixelPtr[2] = (DTYPE) res2;
 290     dstPixelPtr[3] = (DTYPE) res3;
 291   }
 292 
 293   return MLIB_SUCCESS;
 294 }
 295 
 296 #else       /* for x86, using integer multiplies is faster */
 297 
 298 /* for SHORT/USHORT decrease MLIB_SHIFT due to
 299  * overflow in multiplies like fdy * (a10 - a00)
 300  */
 301 /*
 302 #undef  MLIB_SHIFT
 303 #define MLIB_SHIFT  15
 304 */
 305 
 306 #define MLIB_ROUND   (1 << (MLIB_SHIFT - 1))
 307 
 308 /***************************************************************/
 309 #define GET_POINTERS(ind)                                        \
 310   fdx = X & MLIB_MASK;                                           \
 311   fdy = Y & MLIB_MASK;                                           \
 312   ySrc = MLIB_POINTER_SHIFT(Y);                                  \
 313   xSrc = X >> MLIB_SHIFT;                                        \
 314   srcPixelPtr = MLIB_POINTER_GET(lineAddr, ySrc) + ind * xSrc;   \
 315   srcPixelPtr2 = (DTYPE *)((mlib_u8 *)srcPixelPtr + srcYStride); \
 316   X += dX;                                                       \
 317   Y += dY
 318 
 319 /***************************************************************/
 320 #define COUNT(ind)                                                                       \
 321   pix0_##ind = a00_##ind + ((fdy * (a10_##ind - a00_##ind) + MLIB_ROUND) >> MLIB_SHIFT); \
 322   pix1_##ind = a01_##ind + ((fdy * (a11_##ind - a01_##ind) + MLIB_ROUND) >> MLIB_SHIFT); \
 323   res##ind = pix0_##ind + ((fdx * (pix1_##ind - pix0_##ind) + MLIB_ROUND) >> MLIB_SHIFT)
 324 
 325 /***************************************************************/
 326 #define LOAD(ind, ind1, ind2)                                   \
 327   a00_##ind = srcPixelPtr[ind1];                                \
 328   a01_##ind = srcPixelPtr[ind2];                                \
 329   a10_##ind = srcPixelPtr2[ind1];                               \
 330   a11_##ind = srcPixelPtr2[ind2]
 331 
 332 /***************************************************************/
 333 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
 334 {
 335   DECLAREVAR_BL();
 336   DTYPE *dstLineEnd;
 337   DTYPE *srcPixelPtr2;
 338 
 339 #if MLIB_SHIFT == 15
 340   dX = (dX + 1) >> 1;
 341   dY = (dY + 1) >> 1;
 342 #endif /* MLIB_SHIFT == 15 */
 343 
 344   for (j = yStart; j <= yFinish; j++) {
 345     mlib_s32 fdx, fdy;
 346     mlib_s32 a00_0, a01_0, a10_0, a11_0;
 347     mlib_s32 pix0_0, pix1_0, res0;
 348 
 349     CLIP(1);
 350     dstLineEnd = (DTYPE *) dstData + xRight;
 351 #if MLIB_SHIFT == 15
 352     X = X >> 1;
 353     Y = Y >> 1;
 354 #endif /* MLIB_SHIFT == 15 */
 355 
 356     GET_POINTERS(1);
 357     LOAD(0, 0, 1);
 358 
 359 #ifdef __SUNPRO_C
 360 #pragma pipeloop(0)
 361 #endif /* __SUNPRO_C */
 362     for (; dstPixelPtr < dstLineEnd; dstPixelPtr++) {
 363       COUNT(0);
 364       GET_POINTERS(1);
 365       LOAD(0, 0, 1);
 366       dstPixelPtr[0] = (DTYPE) res0;
 367     }
 368 
 369     COUNT(0);
 370     dstPixelPtr[0] = (DTYPE) res0;
 371   }
 372 
 373   return MLIB_SUCCESS;
 374 }
 375 
 376 /***************************************************************/
 377 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 378 {
 379   DECLAREVAR_BL();
 380   DTYPE *dstLineEnd;
 381   DTYPE *srcPixelPtr2;
 382 
 383 #if MLIB_SHIFT == 15
 384   dX = (dX + 1) >> 1;
 385   dY = (dY + 1) >> 1;
 386 #endif /* MLIB_SHIFT == 15 */
 387 
 388   for (j = yStart; j <= yFinish; j++) {
 389     mlib_s32 fdx, fdy;
 390     mlib_s32 a00_0, a01_0, a10_0, a11_0;
 391     mlib_s32 a00_1, a01_1, a10_1, a11_1;
 392     mlib_s32 pix0_0, pix1_0, res0;
 393     mlib_s32 pix0_1, pix1_1, res1;
 394 
 395     CLIP(2);
 396     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 397 #if MLIB_SHIFT == 15
 398     X = X >> 1;
 399     Y = Y >> 1;
 400 #endif /* MLIB_SHIFT == 15 */
 401 
 402     GET_POINTERS(2);
 403     LOAD(0, 0, 2);
 404     LOAD(1, 1, 3);
 405 
 406 #ifdef __SUNPRO_C
 407 #pragma pipeloop(0)
 408 #endif /* __SUNPRO_C */
 409     for (; dstPixelPtr < dstLineEnd; dstPixelPtr += 2) {
 410       COUNT(0);
 411       COUNT(1);
 412       GET_POINTERS(2);
 413       LOAD(0, 0, 2);
 414       LOAD(1, 1, 3);
 415       dstPixelPtr[0] = (DTYPE) res0;
 416       dstPixelPtr[1] = (DTYPE) res1;
 417     }
 418 
 419     COUNT(0);
 420     COUNT(1);
 421     dstPixelPtr[0] = (DTYPE) res0;
 422     dstPixelPtr[1] = (DTYPE) res1;
 423   }
 424 
 425   return MLIB_SUCCESS;
 426 }
 427 
 428 /***************************************************************/
 429 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 430 {
 431   DECLAREVAR_BL();
 432   DTYPE *dstLineEnd;
 433   DTYPE *srcPixelPtr2;
 434 
 435 #if MLIB_SHIFT == 15
 436   dX = (dX + 1) >> 1;
 437   dY = (dY + 1) >> 1;
 438 #endif /* MLIB_SHIFT == 15 */
 439 
 440   for (j = yStart; j <= yFinish; j++) {
 441     mlib_s32 fdx, fdy;
 442     mlib_s32 a00_0, a01_0, a10_0, a11_0;
 443     mlib_s32 a00_1, a01_1, a10_1, a11_1;
 444     mlib_s32 a00_2, a01_2, a10_2, a11_2;
 445     mlib_s32 pix0_0, pix1_0, res0;
 446     mlib_s32 pix0_1, pix1_1, res1;
 447     mlib_s32 pix0_2, pix1_2, res2;
 448 
 449     CLIP(3);
 450     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 451 #if MLIB_SHIFT == 15
 452     X = X >> 1;
 453     Y = Y >> 1;
 454 #endif /* MLIB_SHIFT == 15 */
 455 
 456     GET_POINTERS(3);
 457     LOAD(0, 0, 3);
 458     LOAD(1, 1, 4);
 459     LOAD(2, 2, 5);
 460 
 461 #ifdef __SUNPRO_C
 462 #pragma pipeloop(0)
 463 #endif /* __SUNPRO_C */
 464     for (; dstPixelPtr < dstLineEnd; dstPixelPtr += 3) {
 465       COUNT(0);
 466       COUNT(1);
 467       COUNT(2);
 468       GET_POINTERS(3);
 469       LOAD(0, 0, 3);
 470       LOAD(1, 1, 4);
 471       LOAD(2, 2, 5);
 472       dstPixelPtr[0] = (DTYPE) res0;
 473       dstPixelPtr[1] = (DTYPE) res1;
 474       dstPixelPtr[2] = (DTYPE) res2;
 475     }
 476 
 477     COUNT(0);
 478     COUNT(1);
 479     COUNT(2);
 480     dstPixelPtr[0] = (DTYPE) res0;
 481     dstPixelPtr[1] = (DTYPE) res1;
 482     dstPixelPtr[2] = (DTYPE) res2;
 483   }
 484 
 485   return MLIB_SUCCESS;
 486 }
 487 
 488 /***************************************************************/
 489 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 490 {
 491   DECLAREVAR_BL();
 492   DTYPE *dstLineEnd;
 493   DTYPE *srcPixelPtr2;
 494 
 495 #if MLIB_SHIFT == 15
 496   dX = (dX + 1) >> 1;
 497   dY = (dY + 1) >> 1;
 498 #endif /* MLIB_SHIFT == 15 */
 499 
 500   for (j = yStart; j <= yFinish; j++) {
 501     mlib_s32 fdx, fdy;
 502     mlib_s32 a00_0, a01_0, a10_0, a11_0;
 503     mlib_s32 a00_1, a01_1, a10_1, a11_1;
 504     mlib_s32 a00_2, a01_2, a10_2, a11_2;
 505     mlib_s32 a00_3, a01_3, a10_3, a11_3;
 506     mlib_s32 pix0_0, pix1_0, res0;
 507     mlib_s32 pix0_1, pix1_1, res1;
 508     mlib_s32 pix0_2, pix1_2, res2;
 509     mlib_s32 pix0_3, pix1_3, res3;
 510 
 511     CLIP(4);
 512     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
 513 #if MLIB_SHIFT == 15
 514     X = X >> 1;
 515     Y = Y >> 1;
 516 #endif /* MLIB_SHIFT == 15 */
 517 
 518     GET_POINTERS(4);
 519     LOAD(0, 0, 4);
 520     LOAD(1, 1, 5);
 521     LOAD(2, 2, 6);
 522     LOAD(3, 3, 7);
 523 
 524 #ifdef __SUNPRO_C
 525 #pragma pipeloop(0)
 526 #endif /* __SUNPRO_C */
 527     for (; dstPixelPtr < dstLineEnd; dstPixelPtr += 4) {
 528       COUNT(0);
 529       COUNT(1);
 530       COUNT(2);
 531       COUNT(3);
 532       GET_POINTERS(4);
 533       LOAD(0, 0, 4);
 534       LOAD(1, 1, 5);
 535       LOAD(2, 2, 6);
 536       LOAD(3, 3, 7);
 537       dstPixelPtr[0] = (DTYPE) res0;
 538       dstPixelPtr[1] = (DTYPE) res1;
 539       dstPixelPtr[2] = (DTYPE) res2;
 540       dstPixelPtr[3] = (DTYPE) res3;
 541     }
 542 
 543     COUNT(0);
 544     COUNT(1);
 545     COUNT(2);
 546     COUNT(3);
 547     dstPixelPtr[0] = (DTYPE) res0;
 548     dstPixelPtr[1] = (DTYPE) res1;
 549     dstPixelPtr[2] = (DTYPE) res2;
 550     dstPixelPtr[3] = (DTYPE) res3;
 551   }
 552 
 553   return MLIB_SUCCESS;
 554 }
 555 
 556 #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
 557 
 558 /***************************************************************/