< prev index next >

src/java.desktop/share/native/libmlib_image/mlib_c_ImageAffine_BC.c

Print this page
rev 59106 : imported patch client


  54  *      dstYStride stride of destination image
  55  *      is_affine  indicator (Affine - GridWarp)
  56  *      srcYStride stride of source image
  57  *      filter     type of resampling filter
  58  *
  59  * DESCRIPTION
  60  *      The functions step along the lines from xLeft to xRight and apply
  61  *      the bicubic filtering.
  62  *
  63  */
  64 
  65 #include "mlib_ImageAffine.h"
  66 
  67 #define DTYPE  mlib_u8
  68 
  69 #define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bc
  70 
  71 #define FILTER_BITS   8
  72 
  73 /***************************************************************/
  74 #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
  75 
  76 #undef  FILTER_ELEM_BITS
  77 #define FILTER_ELEM_BITS  4
  78 
  79 #ifdef MLIB_USE_FTOI_CLAMPING
  80 
  81 #define SAT8(DST)                                               \
  82   DST = ((mlib_s32)(val0 - sat) >> 24) ^ 0x80
  83 
  84 #else
  85 
  86 #define SAT8(DST)                                               \
  87   val0 -= sat;                                                  \
  88   if (val0 >= MLIB_S32_MAX)                                     \
  89     DST = MLIB_U8_MAX;                                          \
  90   else if (val0 <= MLIB_S32_MIN)                                \
  91     DST = MLIB_U8_MIN;                                          \
  92   else                                                          \
  93     DST = ((mlib_s32)val0 >> 24) ^ 0x80
  94 
  95 #endif /* MLIB_USE_FTOI_CLAMPING */
  96 
  97 /***************************************************************/
  98 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
  99 {
 100   DECLAREVAR_BC();
 101   DTYPE *dstLineEnd;
 102   mlib_d64 sat = (mlib_d64) 0x7F800000;
 103   const mlib_f32 *mlib_filters_table;
 104 
 105   if (filter == MLIB_BICUBIC) {
 106     mlib_filters_table = mlib_filters_u8f_bc;
 107   }
 108   else {
 109     mlib_filters_table = mlib_filters_u8f_bc2;
 110   }
 111 
 112   for (j = yStart; j <= yFinish; j++) {
 113     mlib_d64 xf0, xf1, xf2, xf3;
 114     mlib_d64 yf0, yf1, yf2, yf3;
 115     mlib_d64 c0, c1, c2, c3, val0;
 116     mlib_s32 filterpos;
 117     mlib_f32 *fptr;
 118     mlib_u8 s0, s1, s2, s3;
 119 
 120     CLIP(1);
 121     dstLineEnd = (DTYPE *) dstData + xRight;
 122 
 123     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 124     fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 125 
 126     xf0 = fptr[0];
 127     xf1 = fptr[1];
 128     xf2 = fptr[2];
 129     xf3 = fptr[3];
 130 
 131     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 132     fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 133 
 134     yf0 = fptr[0];
 135     yf1 = fptr[1];
 136     yf2 = fptr[2];
 137     yf3 = fptr[3];
 138 
 139     xSrc = (X >> MLIB_SHIFT) - 1;
 140     ySrc = (Y >> MLIB_SHIFT) - 1;
 141 
 142     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 143     s0 = srcPixelPtr[0];
 144     s1 = srcPixelPtr[1];
 145     s2 = srcPixelPtr[2];
 146     s3 = srcPixelPtr[3];
 147 
 148 #ifdef __SUNPRO_C
 149 #pragma pipeloop(0)
 150 #endif /* __SUNPRO_C */
 151     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
 152       X += dX;
 153       Y += dY;
 154 
 155       c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 156             mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 157       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 158       c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 159             mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 160       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 161       c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 162             mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 163       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 164       c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 165             mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 166 
 167       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
 168       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 169 
 170       xf0 = fptr[0];
 171       xf1 = fptr[1];
 172       xf2 = fptr[2];
 173       xf3 = fptr[3];
 174 
 175       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 176 
 177       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
 178       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 179 
 180       yf0 = fptr[0];
 181       yf1 = fptr[1];
 182       yf2 = fptr[2];
 183       yf3 = fptr[3];
 184 
 185       SAT8(dstPixelPtr[0]);
 186 
 187       xSrc = (X >> MLIB_SHIFT) - 1;
 188       ySrc = (Y >> MLIB_SHIFT) - 1;
 189 
 190       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
 191       s0 = srcPixelPtr[0];
 192       s1 = srcPixelPtr[1];
 193       s2 = srcPixelPtr[2];
 194       s3 = srcPixelPtr[3];
 195     }
 196 
 197     c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 198           mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 199     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 200     c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 201           mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 202     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 203     c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 204           mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 205     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 206     c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
 207           mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
 208 
 209     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 210 
 211     SAT8(dstPixelPtr[0]);
 212   }
 213 
 214   return MLIB_SUCCESS;
 215 }
 216 
 217 /***************************************************************/
 218 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
 219 {
 220   DECLAREVAR_BC();
 221   DTYPE *dstLineEnd;
 222   mlib_d64 sat = (mlib_d64) 0x7F800000;
 223   const mlib_f32 *mlib_filters_table;
 224 
 225   if (filter == MLIB_BICUBIC) {
 226     mlib_filters_table = mlib_filters_u8f_bc;
 227   }
 228   else {
 229     mlib_filters_table = mlib_filters_u8f_bc2;
 230   }
 231 
 232   for (j = yStart; j <= yFinish; j++) {
 233     mlib_d64 xf0, xf1, xf2, xf3;
 234     mlib_d64 yf0, yf1, yf2, yf3;
 235     mlib_d64 c0, c1, c2, c3, val0;
 236     mlib_s32 filterpos, k;
 237     mlib_f32 *fptr;
 238     mlib_u8 s0, s1, s2, s3;
 239 
 240     CLIP(2);
 241     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
 242 
 243     for (k = 0; k < 2; k++) {
 244       mlib_s32 X1 = X;
 245       mlib_s32 Y1 = Y;
 246       DTYPE *dPtr = dstPixelPtr + k;
 247 
 248       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 249       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 250 
 251       xf0 = fptr[0];
 252       xf1 = fptr[1];
 253       xf2 = fptr[2];
 254       xf3 = fptr[3];
 255 
 256       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 257       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 258 
 259       yf0 = fptr[0];
 260       yf1 = fptr[1];
 261       yf2 = fptr[2];
 262       yf3 = fptr[3];
 263 
 264       xSrc = (X1 >> MLIB_SHIFT) - 1;
 265       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 266 
 267       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 268       s0 = srcPixelPtr[0];
 269       s1 = srcPixelPtr[2];
 270       s2 = srcPixelPtr[4];
 271       s3 = srcPixelPtr[6];
 272 
 273 #ifdef __SUNPRO_C
 274 #pragma pipeloop(0)
 275 #endif /* __SUNPRO_C */
 276       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
 277         X1 += dX;
 278         Y1 += dY;
 279 
 280         c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 281               mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 282         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 283         c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 284               mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 285         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 286         c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 287               mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 288         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 289         c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 290               mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 291 
 292         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 293         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 294 
 295         xf0 = fptr[0];
 296         xf1 = fptr[1];
 297         xf2 = fptr[2];
 298         xf3 = fptr[3];
 299 
 300         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 301 
 302         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 303         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 304 
 305         yf0 = fptr[0];
 306         yf1 = fptr[1];
 307         yf2 = fptr[2];
 308         yf3 = fptr[3];
 309 
 310         SAT8(dPtr[0]);
 311 
 312         xSrc = (X1 >> MLIB_SHIFT) - 1;
 313         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 314 
 315         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
 316         s0 = srcPixelPtr[0];
 317         s1 = srcPixelPtr[2];
 318         s2 = srcPixelPtr[4];
 319         s3 = srcPixelPtr[6];
 320       }
 321 
 322       c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 323             mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 324       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 325       c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 326             mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 327       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 328       c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 329             mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 330       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 331       c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
 332             mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
 333 
 334       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 335 
 336       SAT8(dPtr[0]);
 337     }
 338   }
 339 
 340   return MLIB_SUCCESS;
 341 }
 342 
 343 /***************************************************************/
 344 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
 345 {
 346   DECLAREVAR_BC();
 347   DTYPE *dstLineEnd;
 348   mlib_d64 sat = (mlib_d64) 0x7F800000;
 349   const mlib_f32 *mlib_filters_table;
 350 
 351   if (filter == MLIB_BICUBIC) {
 352     mlib_filters_table = mlib_filters_u8f_bc;
 353   }
 354   else {
 355     mlib_filters_table = mlib_filters_u8f_bc2;
 356   }
 357 
 358   for (j = yStart; j <= yFinish; j++) {
 359     mlib_d64 xf0, xf1, xf2, xf3;
 360     mlib_d64 yf0, yf1, yf2, yf3;
 361     mlib_d64 c0, c1, c2, c3, val0;
 362     mlib_s32 filterpos, k;
 363     mlib_f32 *fptr;
 364     mlib_u8 s0, s1, s2, s3;
 365 
 366     CLIP(3);
 367     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
 368 
 369     for (k = 0; k < 3; k++) {
 370       mlib_s32 X1 = X;
 371       mlib_s32 Y1 = Y;
 372       DTYPE *dPtr = dstPixelPtr + k;
 373 
 374       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 375       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 376 
 377       xf0 = fptr[0];
 378       xf1 = fptr[1];
 379       xf2 = fptr[2];
 380       xf3 = fptr[3];
 381 
 382       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 383       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 384 
 385       yf0 = fptr[0];
 386       yf1 = fptr[1];
 387       yf2 = fptr[2];
 388       yf3 = fptr[3];
 389 
 390       xSrc = (X1 >> MLIB_SHIFT) - 1;
 391       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 392 
 393       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 394       s0 = srcPixelPtr[0];
 395       s1 = srcPixelPtr[3];
 396       s2 = srcPixelPtr[6];
 397       s3 = srcPixelPtr[9];
 398 
 399 #ifdef __SUNPRO_C
 400 #pragma pipeloop(0)
 401 #endif /* __SUNPRO_C */
 402       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
 403         X1 += dX;
 404         Y1 += dY;
 405 
 406         c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 407               mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 408         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 409         c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 410               mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 411         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 412         c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 413               mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 414         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 415         c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 416               mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 417 
 418         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 419         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 420 
 421         xf0 = fptr[0];
 422         xf1 = fptr[1];
 423         xf2 = fptr[2];
 424         xf3 = fptr[3];
 425 
 426         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 427 
 428         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 429         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 430 
 431         yf0 = fptr[0];
 432         yf1 = fptr[1];
 433         yf2 = fptr[2];
 434         yf3 = fptr[3];
 435 
 436         SAT8(dPtr[0]);
 437 
 438         xSrc = (X1 >> MLIB_SHIFT) - 1;
 439         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 440 
 441         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
 442         s0 = srcPixelPtr[0];
 443         s1 = srcPixelPtr[3];
 444         s2 = srcPixelPtr[6];
 445         s3 = srcPixelPtr[9];
 446       }
 447 
 448       c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 449             mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 450       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 451       c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 452             mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 453       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 454       c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 455             mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 456       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 457       c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
 458             mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
 459 
 460       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 461 
 462       SAT8(dPtr[0]);
 463     }
 464   }
 465 
 466   return MLIB_SUCCESS;
 467 }
 468 
 469 /***************************************************************/
 470 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
 471 {
 472   DECLAREVAR_BC();
 473   DTYPE *dstLineEnd;
 474   mlib_d64 sat = (mlib_d64) 0x7F800000;
 475   const mlib_f32 *mlib_filters_table;
 476 
 477   if (filter == MLIB_BICUBIC) {
 478     mlib_filters_table = mlib_filters_u8f_bc;
 479   }
 480   else {
 481     mlib_filters_table = mlib_filters_u8f_bc2;
 482   }
 483 
 484   for (j = yStart; j <= yFinish; j++) {
 485     mlib_d64 xf0, xf1, xf2, xf3;
 486     mlib_d64 yf0, yf1, yf2, yf3;
 487     mlib_d64 c0, c1, c2, c3, val0;
 488     mlib_s32 filterpos, k;
 489     mlib_f32 *fptr;
 490     mlib_u8 s0, s1, s2, s3;
 491 
 492     CLIP(4);
 493     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
 494 
 495     for (k = 0; k < 4; k++) {
 496       mlib_s32 X1 = X;
 497       mlib_s32 Y1 = Y;
 498       DTYPE *dPtr = dstPixelPtr + k;
 499 
 500       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 501       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 502 
 503       xf0 = fptr[0];
 504       xf1 = fptr[1];
 505       xf2 = fptr[2];
 506       xf3 = fptr[3];
 507 
 508       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 509       fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 510 
 511       yf0 = fptr[0];
 512       yf1 = fptr[1];
 513       yf2 = fptr[2];
 514       yf3 = fptr[3];
 515 
 516       xSrc = (X1 >> MLIB_SHIFT) - 1;
 517       ySrc = (Y1 >> MLIB_SHIFT) - 1;
 518 
 519       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 520       s0 = srcPixelPtr[0];
 521       s1 = srcPixelPtr[4];
 522       s2 = srcPixelPtr[8];
 523       s3 = srcPixelPtr[12];
 524 
 525 #ifdef __SUNPRO_C
 526 #pragma pipeloop(0)
 527 #endif /* __SUNPRO_C */
 528       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
 529         X1 += dX;
 530         Y1 += dY;
 531 
 532         c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 533               mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 534         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 535         c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 536               mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 537         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 538         c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 539               mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 540         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 541         c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 542               mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 543 
 544         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
 545         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 546 
 547         xf0 = fptr[0];
 548         xf1 = fptr[1];
 549         xf2 = fptr[2];
 550         xf3 = fptr[3];
 551 
 552         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 553 
 554         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
 555         fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
 556 
 557         yf0 = fptr[0];
 558         yf1 = fptr[1];
 559         yf2 = fptr[2];
 560         yf3 = fptr[3];
 561 
 562         SAT8(dPtr[0]);
 563 
 564         xSrc = (X1 >> MLIB_SHIFT) - 1;
 565         ySrc = (Y1 >> MLIB_SHIFT) - 1;
 566 
 567         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
 568         s0 = srcPixelPtr[0];
 569         s1 = srcPixelPtr[4];
 570         s2 = srcPixelPtr[8];
 571         s3 = srcPixelPtr[12];
 572       }
 573 
 574       c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
 575             mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
 576       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 577       c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 578             mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 579       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 580       c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 581             mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 582       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 583       c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
 584             mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
 585 
 586       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
 587 
 588       SAT8(dPtr[0]);
 589     }
 590   }
 591 
 592   return MLIB_SUCCESS;
 593 }
 594 
 595 #else       /* for x86, using integer multiplies is faster */
 596 
 597 #define SHIFT_X  12
 598 #define ROUND_X  0 /* (1 << (SHIFT_X - 1)) */
 599 
 600 #define SHIFT_Y  (14 + 14 - SHIFT_X)
 601 #define ROUND_Y  (1 << (SHIFT_Y - 1))
 602 
 603 /***************************************************************/
 604 /* Test for the presence of any "1" bit in bits
 605    8 to 31 of val. If present, then val is either
 606    negative or >255. If over/underflows of 8 bits
 607    are uncommon, then this technique can be a win,
 608    since only a single test, rather than two, is
 609    necessary to determine if clamping is needed.
 610    On the other hand, if over/underflows are common,
 611    it adds an extra test.
 612 */
 613 #define S32_TO_U8_SAT(DST)                                      \
 614   if (val0 & 0xffffff00) {                                      \
 615     if (val0 < MLIB_U8_MIN)                                     \


1088 
1089       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1090       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1091       c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1092             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1093       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1094       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1095             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1096       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1097       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1098             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1099 
1100       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1101 
1102       S32_TO_U8_SAT(dPtr[0]);
1103     }
1104   }
1105 
1106   return MLIB_SUCCESS;
1107 }
1108 
1109 #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
1110 
1111 /***************************************************************/


  54  *      dstYStride stride of destination image
  55  *      is_affine  indicator (Affine - GridWarp)
  56  *      srcYStride stride of source image
  57  *      filter     type of resampling filter
  58  *
  59  * DESCRIPTION
  60  *      The functions step along the lines from xLeft to xRight and apply
  61  *      the bicubic filtering.
  62  *
  63  */
  64 
  65 #include "mlib_ImageAffine.h"
  66 
  67 #define DTYPE  mlib_u8
  68 
  69 #define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bc
  70 
  71 #define FILTER_BITS   8
  72 
  73 /***************************************************************/
  74 /* for x86, using integer multiplies is faster */









































































































































































































































































































































































































































































































































  75 
  76 #define SHIFT_X  12
  77 #define ROUND_X  0 /* (1 << (SHIFT_X - 1)) */
  78 
  79 #define SHIFT_Y  (14 + 14 - SHIFT_X)
  80 #define ROUND_Y  (1 << (SHIFT_Y - 1))
  81 
  82 /***************************************************************/
  83 /* Test for the presence of any "1" bit in bits
  84    8 to 31 of val. If present, then val is either
  85    negative or >255. If over/underflows of 8 bits
  86    are uncommon, then this technique can be a win,
  87    since only a single test, rather than two, is
  88    necessary to determine if clamping is needed.
  89    On the other hand, if over/underflows are common,
  90    it adds an extra test.
  91 */
  92 #define S32_TO_U8_SAT(DST)                                      \
  93   if (val0 & 0xffffff00) {                                      \
  94     if (val0 < MLIB_U8_MIN)                                     \


 567 
 568       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
 569       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 570       c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 571             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 572       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 573       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 574             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 575       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
 576       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
 577             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
 578 
 579       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
 580 
 581       S32_TO_U8_SAT(dPtr[0]);
 582     }
 583   }
 584 
 585   return MLIB_SUCCESS;
 586 }


 587 
 588 /***************************************************************/
< prev index next >