1 /*
   2  * Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <stdlib.h>
  27 #include "jni_util.h"
  28 #include "math.h"
  29 
  30 #include "GraphicsPrimitiveMgr.h"
  31 #include "Region.h"
  32 
  33 #include "sun_java2d_loops_TransformHelper.h"
  34 #include "java_awt_image_AffineTransformOp.h"
  35 
  36 /*
  37  * The stub functions replace the bilinear and bicubic interpolation
  38  * functions with NOP versions so that the performance of the helper
  39  * functions that fetch the data can be more directly tested.  They
  40  * are not compiled or enabled by default.  Change the following
  41  * #undef to a #define to build the stub functions.
  42  *
  43  * When compiled, they are enabled by the environment variable TXSTUB.
  44  * When compiled, there is also code to disable the VIS versions and
  45  * use the C versions in this file in their place by defining the TXNOVIS
  46  * environment variable.
  47  */
  48 #undef MAKE_STUBS
  49 
  50 /* The number of IntArgbPre samples to store in the temporary buffer. */
  51 #define LINE_SIZE       2048
  52 
  53 /* The size of a stack allocated buffer to hold edge coordinates (see below). */
  54 #define MAXEDGES 1024
  55 
  56 /* Declare the software interpolation functions. */
  57 static TransformInterpFunc BilinearInterp;
  58 static TransformInterpFunc BicubicInterp;
  59 
  60 #ifdef MAKE_STUBS
  61 /* Optionally Declare the stub interpolation functions. */
  62 static TransformInterpFunc BilinearInterpStub;
  63 static TransformInterpFunc BicubicInterpStub;
  64 #endif /* MAKE_STUBS */
  65 
  66 /*
  67  * Initially choose the software interpolation functions.
  68  * These choices can be overridden by platform code that runs during the
  69  * primitive registration phase of initialization by storing pointers to
  70  * better functions in these pointers.
  71  * Compiling the stubs also turns on code below that can re-install the
  72  * software functions or stub functions on the first call to this primitive.
  73  */
  74 TransformInterpFunc *pBilinearFunc = BilinearInterp;
  75 TransformInterpFunc *pBicubicFunc = BicubicInterp;
  76 
  77 /*
  78  * The dxydxy parameters of the inverse transform determine how
  79  * quickly we step through the source image.  For tiny scale
  80  * factors (on the order of 1E-16 or so) the stepping distances
  81  * are huge.  The image has been scaled so small that stepping
  82  * a single pixel in device space moves the sampling point by
  83  * billions (or more) pixels in the source image space.  These
  84  * huge stepping values can overflow the whole part of the longs
  85  * we use for the fixed point stepping equations and so we need
  86  * a more robust solution.  We could simply iterate over every
  87  * device pixel, use the inverse transform to transform it back
  88  * into the source image coordinate system and then test it for
  89  * being in range and sample pixel-by-pixel, but that is quite
  90  * a bit more expensive.  Fortunately, if the scale factors are
  91  * so tiny that we overflow our long values then the number of
  92  * pixels we are planning to visit should be very tiny.  The only
  93  * exception to that rule is if the scale factor along one
  94  * dimension is tiny (creating the huge stepping values), and
  95  * the scale factor along the other dimension is fairly regular
  96  * or an up-scale.  In that case we have a lot of pixels along
  97  * the direction of the larger axis to sample, but few along the
  98  * smaller axis.  Though, pessimally, with an added shear factor
  99  * such a linearly tiny image could have bounds that cover a large
 100  * number of pixels.  Such odd transformations should be very
 101  * rare and the absolute limit on calculations would involve a
 102  * single reverse transform of every pixel in the output image
 103  * which is not fast, but it should not cause an undue stall
 104  * of the rendering software.
 105  *
 106  * The specific test we will use is to calculate the inverse
 107  * transformed values of every corner of the destination bounds
 108  * (in order to be user-clip independent) and if we can
 109  * perform a fixed-point-long inverse transform of all of
 110  * those points without overflowing we will use the fast
 111  * fixed point algorithm.  Otherwise we will use the safe
 112  * per-pixel transform algorithm.
 113  * The 4 corners are 0,0, 0,dsth, dstw,0, dstw,dsth
 114  * Transformed they are:
 115  *     tx,               ty
 116  *     tx       +dxdy*H, ty       +dydy*H
 117  *     tx+dxdx*W,        ty+dydx*W
 118  *     tx+dxdx*W+dxdy*H, ty+dydx*W+dydy*H
 119  */
 120 /* We reject coordinates not less than 1<<30 so that the distance between */
 121 /* any 2 of them is less than 1<<31 which would overflow into the sign */
 122 /* bit of a signed long value used to represent fixed point coordinates. */
 123 #define TX_FIXED_UNSAFE(v)  (fabs(v) >= (1<<30))
 124 static jboolean
 125 checkOverflow(jint dxoff, jint dyoff,
 126               SurfaceDataBounds *pBounds,
 127               TransformInfo *pItxInfo,
 128               jdouble *retx, jdouble *rety)
 129 {
 130     jdouble x, y;
 131 
 132     x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
 133     y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
 134     Transform_transform(pItxInfo, &x, &y);
 135     *retx = x;
 136     *rety = y;
 137     if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
 138         return JNI_TRUE;
 139     }
 140 
 141     x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
 142     y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
 143     Transform_transform(pItxInfo, &x, &y);
 144     if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
 145         return JNI_TRUE;
 146     }
 147 
 148     x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
 149     y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
 150     Transform_transform(pItxInfo, &x, &y);
 151     if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
 152         return JNI_TRUE;
 153     }
 154 
 155     x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
 156     y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
 157     Transform_transform(pItxInfo, &x, &y);
 158     if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
 159         return JNI_TRUE;
 160     }
 161 
 162     return JNI_FALSE;
 163 }
 164 
 165 /*
 166  * Fill the edge buffer with pairs of coordinates representing the maximum
 167  * left and right pixels of the destination surface that should be processed
 168  * on each scanline, clipped to the bounds parameter.
 169  * The number of scanlines to calculate is implied by the bounds parameter.
 170  * Only pixels that map back through the specified (inverse) transform to a
 171  * source coordinate that falls within the (0, 0, sw, sh) bounds of the
 172  * source image should be processed.
 173  * pEdges points to an array of jints that holds 2 + numedges*2 values where
 174  * numedges should match (pBounds->y2 - pBounds->y1).
 175  * The first two jints in pEdges should be set to y1 and y2 and every pair
 176  * of jints after that represent the xmin,xmax of all pixels in range of
 177  * the transformed blit for the corresponding scanline.
 178  */
 179 static void
 180 calculateEdges(jint *pEdges,
 181                SurfaceDataBounds *pBounds,
 182                TransformInfo *pItxInfo,
 183                jlong xbase, jlong ybase,
 184                juint sw, juint sh)
 185 {
 186     jlong dxdxlong, dydxlong;
 187     jlong dxdylong, dydylong;
 188     jlong drowxlong, drowylong;
 189     jint dx1, dy1, dx2, dy2;
 190 
 191     dxdxlong = DblToLong(pItxInfo->dxdx);
 192     dydxlong = DblToLong(pItxInfo->dydx);
 193     dxdylong = DblToLong(pItxInfo->dxdy);
 194     dydylong = DblToLong(pItxInfo->dydy);
 195 
 196     dx1 = pBounds->x1;
 197     dy1 = pBounds->y1;
 198     dx2 = pBounds->x2;
 199     dy2 = pBounds->y2;
 200     *pEdges++ = dy1;
 201     *pEdges++ = dy2;
 202 
 203     drowxlong = (dx2-dx1-1) * dxdxlong;
 204     drowylong = (dx2-dx1-1) * dydxlong;
 205 
 206     while (dy1 < dy2) {
 207         jlong xlong, ylong;
 208 
 209         dx1 = pBounds->x1;
 210         dx2 = pBounds->x2;
 211 
 212         xlong = xbase;
 213         ylong = ybase;
 214         while (dx1 < dx2 &&
 215                (((juint) WholeOfLong(ylong)) >= sh ||
 216                 ((juint) WholeOfLong(xlong)) >= sw))
 217         {
 218             dx1++;
 219             xlong += dxdxlong;
 220             ylong += dydxlong;
 221         }
 222 
 223         xlong = xbase + drowxlong;
 224         ylong = ybase + drowylong;
 225         while (dx2 > dx1 &&
 226                (((juint) WholeOfLong(ylong)) >= sh ||
 227                 ((juint) WholeOfLong(xlong)) >= sw))
 228         {
 229             dx2--;
 230             xlong -= dxdxlong;
 231             ylong -= dydxlong;
 232         }
 233 
 234         *pEdges++ = dx1;
 235         *pEdges++ = dx2;
 236 
 237         /* Increment to next scanline */
 238         xbase += dxdylong;
 239         ybase += dydylong;
 240         dy1++;
 241     }
 242 }
 243 
 244 static void
 245 Transform_SafeHelper(JNIEnv *env,
 246                      SurfaceDataOps *srcOps,
 247                      SurfaceDataOps *dstOps,
 248                      SurfaceDataRasInfo *pSrcInfo,
 249                      SurfaceDataRasInfo *pDstInfo,
 250                      NativePrimitive *pMaskBlitPrim,
 251                      CompositeInfo *pCompInfo,
 252                      TransformHelperFunc *pHelperFunc,
 253                      TransformInterpFunc *pInterpFunc,
 254                      RegionData *pClipInfo, TransformInfo *pItxInfo,
 255                      jint *pData, jint *pEdges,
 256                      jint dxoff, jint dyoff, jint sw, jint sh);
 257 
 258 /*
 259  * Class:     sun_java2d_loops_TransformHelper
 260  * Method:    Transform
 261  * Signature: (Lsun/java2d/loops/MaskBlit;Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;Ljava/awt/geom/AffineTransform;IIIIIIIII[I)V
 262  */
 263 JNIEXPORT void JNICALL
 264 Java_sun_java2d_loops_TransformHelper_Transform
 265     (JNIEnv *env, jobject self,
 266      jobject maskblit,
 267      jobject srcData, jobject dstData,
 268      jobject comp, jobject clip,
 269      jobject itxform, jint txtype,
 270      jint sx1, jint sy1, jint sx2, jint sy2,
 271      jint dx1, jint dy1, jint dx2, jint dy2,
 272      jintArray edgeArray, jint dxoff, jint dyoff)
 273 {
 274     SurfaceDataOps *srcOps;
 275     SurfaceDataOps *dstOps;
 276     SurfaceDataRasInfo srcInfo;
 277     SurfaceDataRasInfo dstInfo;
 278     NativePrimitive *pHelperPrim;
 279     NativePrimitive *pMaskBlitPrim;
 280     CompositeInfo compInfo;
 281     RegionData clipInfo;
 282     TransformInfo itxInfo;
 283     jint maxlinepix;
 284     TransformHelperFunc *pHelperFunc;
 285     TransformInterpFunc *pInterpFunc;
 286     jdouble xorig, yorig;
 287     jlong numedges;
 288     jint *pEdges;
 289     jint edgebuf[2 + MAXEDGES * 2];
 290     union {
 291         jlong align;
 292         jint data[LINE_SIZE];
 293     } rgb;
 294 
 295 #ifdef MAKE_STUBS
 296     static int th_initialized;
 297 
 298     /* For debugging only - used to swap in alternate funcs for perf testing */
 299     if (!th_initialized) {
 300         if (getenv("TXSTUB") != 0) {
 301             pBilinearFunc = BilinearInterpStub;
 302             pBicubicFunc = BicubicInterpStub;
 303         } else if (getenv("TXNOVIS") != 0) {
 304             pBilinearFunc = BilinearInterp;
 305             pBicubicFunc = BicubicInterp;
 306         }
 307         th_initialized = 1;
 308     }
 309 #endif /* MAKE_STUBS */
 310 
 311     pHelperPrim = GetNativePrim(env, self);
 312     if (pHelperPrim == NULL) {
 313         /* Should never happen... */
 314         return;
 315     }
 316     pMaskBlitPrim = GetNativePrim(env, maskblit);
 317     if (pMaskBlitPrim == NULL) {
 318         /* Exception was thrown by GetNativePrim */
 319         return;
 320     }
 321     if (pMaskBlitPrim->pCompType->getCompInfo != NULL) {
 322         (*pMaskBlitPrim->pCompType->getCompInfo)(env, &compInfo, comp);
 323     }
 324     if (Region_GetInfo(env, clip, &clipInfo)) {
 325         return;
 326     }
 327 
 328     srcOps = SurfaceData_GetOps(env, srcData);
 329     if (srcOps == 0) {
 330         return;
 331     }
 332     dstOps = SurfaceData_GetOps(env, dstData);
 333     if (dstOps == 0) {
 334         return;
 335     }
 336 
 337     /*
 338      * Grab the appropriate pointer to the helper and interpolation
 339      * routines and calculate the maximum number of destination pixels
 340      * that can be processed in one intermediate buffer based on the
 341      * size of the buffer and the number of samples needed per pixel.
 342      */
 343     switch (txtype) {
 344     case java_awt_image_AffineTransformOp_TYPE_NEAREST_NEIGHBOR:
 345         pHelperFunc = pHelperPrim->funcs.transformhelpers->nnHelper;
 346         pInterpFunc = NULL;
 347         maxlinepix = LINE_SIZE;
 348         break;
 349     case java_awt_image_AffineTransformOp_TYPE_BILINEAR:
 350         pHelperFunc = pHelperPrim->funcs.transformhelpers->blHelper;
 351         pInterpFunc = pBilinearFunc;
 352         maxlinepix = LINE_SIZE / 4;
 353         break;
 354     case java_awt_image_AffineTransformOp_TYPE_BICUBIC:
 355         pHelperFunc = pHelperPrim->funcs.transformhelpers->bcHelper;
 356         pInterpFunc = pBicubicFunc;
 357         maxlinepix = LINE_SIZE / 16;
 358         break;
 359     default:
 360         // Should not happen, but just in case.
 361         return;
 362     }
 363 
 364     srcInfo.bounds.x1 = sx1;
 365     srcInfo.bounds.y1 = sy1;
 366     srcInfo.bounds.x2 = sx2;
 367     srcInfo.bounds.y2 = sy2;
 368     dstInfo.bounds.x1 = dx1;
 369     dstInfo.bounds.y1 = dy1;
 370     dstInfo.bounds.x2 = dx2;
 371     dstInfo.bounds.y2 = dy2;
 372     SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
 373     if (srcOps->Lock(env, srcOps, &srcInfo, pHelperPrim->srcflags)
 374         != SD_SUCCESS)
 375     {
 376         /* edgeArray should already contain zeros for min/maxy */
 377         return;
 378     }
 379     if (dstOps->Lock(env, dstOps, &dstInfo, pMaskBlitPrim->dstflags)
 380         != SD_SUCCESS)
 381     {
 382         SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 383         /* edgeArray should already contain zeros for min/maxy */
 384         return;
 385     }
 386     Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
 387     Transform_GetInfo(env, itxform, &itxInfo);
 388 
 389 
 390     numedges = (((jlong) dstInfo.bounds.y2) - ((jlong) dstInfo.bounds.y1));
 391     if (numedges <= 0) {
 392         pEdges = NULL;
 393     } else if (!JNU_IsNull(env, edgeArray)) {
 394         /*
 395          * Ideally Java should allocate an array large enough, but if
 396          * we ever have a miscommunication about the number of edge
 397          * lines, or if the Java array calculation should overflow to
 398          * a positive number and succeed in allocating an array that
 399          * is too small, we need to verify that it can still hold the
 400          * number of integers that we plan to store to be safe.
 401          */
 402         jsize edgesize = (*env)->GetArrayLength(env, edgeArray);
 403         /* (edgesize/2 - 1) should avoid any overflow or underflow. */
 404         pEdges = (((edgesize / 2) - 1) >= numedges)
 405             ? (*env)->GetPrimitiveArrayCritical(env, edgeArray, NULL)
 406             : NULL;
 407     } else if (numedges > MAXEDGES) {
 408         /* numedges variable (jlong) can be at most ((1<<32)-1) */
 409         /* memsize can overflow a jint, but not a jlong */
 410         jlong memsize = ((numedges * 2) + 2) * sizeof(*pEdges);
 411         pEdges = (memsize == ((size_t) memsize))
 412             ? malloc((size_t) memsize)
 413             : NULL;
 414     } else {
 415         pEdges = edgebuf;
 416     }
 417 
 418     if (pEdges == NULL) {
 419         if (!(*env)->ExceptionCheck(env) && numedges > 0) {
 420             JNU_ThrowInternalError(env, "Unable to allocate edge list");
 421         }
 422         SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
 423         SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 424         /* edgeArray should already contain zeros for min/maxy */
 425         return;
 426     }
 427 
 428 
 429     if (!Region_IsEmpty(&clipInfo)) {
 430         srcOps->GetRasInfo(env, srcOps, &srcInfo);
 431         dstOps->GetRasInfo(env, dstOps, &dstInfo);
 432         if (srcInfo.rasBase == NULL || dstInfo.rasBase == NULL) {
 433             pEdges[0] = pEdges[1] = 0;
 434         } else if (checkOverflow(dxoff, dyoff, &dstInfo.bounds,
 435                                  &itxInfo, &xorig, &yorig))
 436         {
 437             Transform_SafeHelper(env, srcOps, dstOps,
 438                                  &srcInfo, &dstInfo,
 439                                  pMaskBlitPrim, &compInfo,
 440                                  pHelperFunc, pInterpFunc,
 441                                  &clipInfo, &itxInfo, rgb.data, pEdges,
 442                                  dxoff, dyoff, sx2-sx1, sy2-sy1);
 443         } else {
 444             SurfaceDataBounds span;
 445             jlong dxdxlong, dydxlong;
 446             jlong dxdylong, dydylong;
 447             jlong xbase, ybase;
 448 
 449             dxdxlong = DblToLong(itxInfo.dxdx);
 450             dydxlong = DblToLong(itxInfo.dydx);
 451             dxdylong = DblToLong(itxInfo.dxdy);
 452             dydylong = DblToLong(itxInfo.dydy);
 453             xbase = DblToLong(xorig);
 454             ybase = DblToLong(yorig);
 455 
 456             calculateEdges(pEdges, &dstInfo.bounds, &itxInfo,
 457                            xbase, ybase, sx2-sx1, sy2-sy1);
 458 
 459             Region_StartIteration(env, &clipInfo);
 460             while (Region_NextIteration(&clipInfo, &span)) {
 461                 jlong rowxlong, rowylong;
 462                 void *pDst;
 463 
 464                 dy1 = span.y1;
 465                 dy2 = span.y2;
 466                 rowxlong = xbase + (dy1 - dstInfo.bounds.y1) * dxdylong;
 467                 rowylong = ybase + (dy1 - dstInfo.bounds.y1) * dydylong;
 468 
 469                 while (dy1 < dy2) {
 470                     jlong xlong, ylong;
 471 
 472                     /* Note - process at most one scanline at a time. */
 473 
 474                     dx1 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 2];
 475                     dx2 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 3];
 476                     if (dx1 < span.x1) dx1 = span.x1;
 477                     if (dx2 > span.x2) dx2 = span.x2;
 478 
 479                     /* All pixels from dx1 to dx2 have centers in bounds */
 480                     while (dx1 < dx2) {
 481                         /* Can process at most one buffer full at a time */
 482                         jint numpix = dx2 - dx1;
 483                         if (numpix > maxlinepix) {
 484                             numpix = maxlinepix;
 485                         }
 486 
 487                         xlong =
 488                             rowxlong + ((dx1 - dstInfo.bounds.x1) * dxdxlong);
 489                         ylong =
 490                             rowylong + ((dx1 - dstInfo.bounds.x1) * dydxlong);
 491 
 492                         /* Get IntArgbPre pixel data from source */
 493                         (*pHelperFunc)(&srcInfo,
 494                                        rgb.data, numpix,
 495                                        xlong, dxdxlong,
 496                                        ylong, dydxlong);
 497 
 498                         /* Interpolate result pixels if needed */
 499                         if (pInterpFunc) {
 500                             (*pInterpFunc)(rgb.data, numpix,
 501                                            FractOfLong(xlong-LongOneHalf),
 502                                            FractOfLong(dxdxlong),
 503                                            FractOfLong(ylong-LongOneHalf),
 504                                            FractOfLong(dydxlong));
 505                         }
 506 
 507                         /* Store/Composite interpolated pixels into dest */
 508                         pDst = PtrCoord(dstInfo.rasBase,
 509                                         dx1, dstInfo.pixelStride,
 510                                         dy1, dstInfo.scanStride);
 511                         (*pMaskBlitPrim->funcs.maskblit)(pDst, rgb.data,
 512                                                          0, 0, 0,
 513                                                          numpix, 1,
 514                                                          &dstInfo, &srcInfo,
 515                                                          pMaskBlitPrim,
 516                                                          &compInfo);
 517 
 518                         /* Increment to next buffer worth of input pixels */
 519                         dx1 += maxlinepix;
 520                     }
 521 
 522                     /* Increment to next scanline */
 523                     rowxlong += dxdylong;
 524                     rowylong += dydylong;
 525                     dy1++;
 526                 }
 527             }
 528             Region_EndIteration(env, &clipInfo);
 529         }
 530         SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
 531         SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
 532     } else {
 533         pEdges[0] = pEdges[1] = 0;
 534     }
 535 
 536     if (!JNU_IsNull(env, edgeArray)) {
 537         (*env)->ReleasePrimitiveArrayCritical(env, edgeArray, pEdges, 0);
 538     } else if (pEdges != edgebuf) {
 539         free(pEdges);
 540     }
 541     SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
 542     SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 543 }
 544 
 545 static void
 546 Transform_SafeHelper(JNIEnv *env,
 547                      SurfaceDataOps *srcOps,
 548                      SurfaceDataOps *dstOps,
 549                      SurfaceDataRasInfo *pSrcInfo,
 550                      SurfaceDataRasInfo *pDstInfo,
 551                      NativePrimitive *pMaskBlitPrim,
 552                      CompositeInfo *pCompInfo,
 553                      TransformHelperFunc *pHelperFunc,
 554                      TransformInterpFunc *pInterpFunc,
 555                      RegionData *pClipInfo, TransformInfo *pItxInfo,
 556                      jint *pData, jint *pEdges,
 557                      jint dxoff, jint dyoff, jint sw, jint sh)
 558 {
 559     SurfaceDataBounds span;
 560     jint dx1, dx2;
 561     jint dy1, dy2;
 562     jint i, iy;
 563 
 564     dy1 = pDstInfo->bounds.y1;
 565     dy2 = pDstInfo->bounds.y2;
 566     dx1 = pDstInfo->bounds.x1;
 567     dx2 = pDstInfo->bounds.x2;
 568     pEdges[0] = dy1;
 569     pEdges[1] = dy2;
 570     for (iy = dy1; iy < dy2; iy++) {
 571         jint i = (iy - dy1) * 2;
 572         /* row spans are set to max,min until we find a pixel in range below */
 573         pEdges[i + 2] = dx2;
 574         pEdges[i + 3] = dx1;
 575     }
 576 
 577     Region_StartIteration(env, pClipInfo);
 578     while (Region_NextIteration(pClipInfo, &span)) {
 579         dy1 = span.y1;
 580         dy2 = span.y2;
 581         while (dy1 < dy2) {
 582             dx1 = span.x1;
 583             dx2 = span.x2;
 584             i = (dy1 - pDstInfo->bounds.y1) * 2;
 585             while (dx1 < dx2) {
 586                 jdouble x, y;
 587                 jlong xlong, ylong;
 588 
 589                 x = dxoff + dx1 + 0.5;
 590                 y = dyoff + dy1 + 0.5;
 591                 Transform_transform(pItxInfo, &x, &y);
 592                 xlong = DblToLong(x);
 593                 ylong = DblToLong(y);
 594 
 595                 /* Process only pixels with centers in bounds
 596                  * Test double values to avoid overflow in conversion
 597                  * to long values and then also test the long values
 598                  * in case they rounded up and out of bounds during
 599                  * the conversion.
 600                  */
 601                 if (x >= 0 && y >= 0 && x < sw && y < sh &&
 602                     WholeOfLong(xlong) < sw &&
 603                     WholeOfLong(ylong) < sh)
 604                 {
 605                     void *pDst;
 606 
 607                     if (pEdges[i + 2] > dx1) {
 608                         pEdges[i + 2] = dx1;
 609                     }
 610                     if (pEdges[i + 3] <= dx1) {
 611                         pEdges[i + 3] = dx1 + 1;
 612                     }
 613 
 614                     /* Get IntArgbPre pixel data from source */
 615                     (*pHelperFunc)(pSrcInfo,
 616                                    pData, 1,
 617                                    xlong, 0,
 618                                    ylong, 0);
 619 
 620                     /* Interpolate result pixels if needed */
 621                     if (pInterpFunc) {
 622                         (*pInterpFunc)(pData, 1,
 623                                        FractOfLong(xlong-LongOneHalf), 0,
 624                                        FractOfLong(ylong-LongOneHalf), 0);
 625                     }
 626 
 627                     /* Store/Composite interpolated pixels into dest */
 628                     pDst = PtrCoord(pDstInfo->rasBase,
 629                                     dx1, pDstInfo->pixelStride,
 630                                     dy1, pDstInfo->scanStride);
 631                     (*pMaskBlitPrim->funcs.maskblit)(pDst, pData,
 632                                                      0, 0, 0,
 633                                                      1, 1,
 634                                                      pDstInfo, pSrcInfo,
 635                                                      pMaskBlitPrim,
 636                                                      pCompInfo);
 637                 }
 638 
 639                 /* Increment to next input pixel */
 640                 dx1++;
 641             }
 642 
 643             /* Increment to next scanline */
 644             dy1++;
 645         }
 646     }
 647     Region_EndIteration(env, pClipInfo);
 648 }
 649 
 650 #define BL_INTERP_V1_to_V2_by_F(v1, v2, f) \
 651     (((v1)<<8) + ((v2)-(v1))*(f))
 652 
 653 #define BL_ACCUM(comp) \
 654     do { \
 655         jint c1 = ((jubyte *) pRGB)[comp]; \
 656         jint c2 = ((jubyte *) pRGB)[comp+4]; \
 657         jint cR = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
 658         c1 = ((jubyte *) pRGB)[comp+8]; \
 659         c2 = ((jubyte *) pRGB)[comp+12]; \
 660         c2 = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
 661         cR = BL_INTERP_V1_to_V2_by_F(cR, c2, yfactor); \
 662         ((jubyte *)pRes)[comp] = (jubyte) ((cR + (1<<15)) >> 16); \
 663     } while (0)
 664 
 665 static void
 666 BilinearInterp(jint *pRGB, jint numpix,
 667                jint xfract, jint dxfract,
 668                jint yfract, jint dyfract)
 669 {
 670     jint j;
 671     jint *pRes = pRGB;
 672 
 673     for (j = 0; j < numpix; j++) {
 674         jint xfactor;
 675         jint yfactor;
 676         xfactor = URShift(xfract, 32-8);
 677         yfactor = URShift(yfract, 32-8);
 678         BL_ACCUM(0);
 679         BL_ACCUM(1);
 680         BL_ACCUM(2);
 681         BL_ACCUM(3);
 682         pRes++;
 683         pRGB += 4;
 684         xfract += dxfract;
 685         yfract += dyfract;
 686     }
 687 }
 688 
 689 #define SAT(val, max) \
 690     do { \
 691         val &= ~(val >> 31);  /* negatives become 0 */ \
 692         val -= max;           /* only overflows are now positive */ \
 693         val &= (val >> 31);   /* positives become 0 */ \
 694         val += max;           /* range is now [0 -> max] */ \
 695     } while (0)
 696 
 697 #ifdef __sparc
 698 /* For sparc, floating point multiplies are faster than integer */
 699 #define BICUBIC_USE_DBL_LUT
 700 #else
 701 /* For x86, integer multiplies are faster than floating point */
 702 /* Note that on x86 Linux the choice of best algorithm varies
 703  * depending on the compiler optimization and the processor type.
 704  * Currently, the sun/awt x86 Linux builds are not optimized so
 705  * all the variations produce mediocre performance.
 706  * For now we will use the choice that works best for the Windows
 707  * build until the (lack of) optimization issues on Linux are resolved.
 708  */
 709 #define BICUBIC_USE_INT_MATH
 710 #endif
 711 
 712 #ifdef BICUBIC_USE_DBL_CAST
 713 
 714 #define BC_DblToCoeff(v)        (v)
 715 #define BC_COEFF_ONE            1.0
 716 #define BC_TYPE                 jdouble
 717 #define BC_V_HALF               0.5
 718 #define BC_CompToV(v)           ((jdouble) (v))
 719 #define BC_STORE_COMPS(pRes) \
 720     do { \
 721         jint a = (jint) accumA; \
 722         jint r = (jint) accumR; \
 723         jint g = (jint) accumG; \
 724         jint b = (jint) accumB; \
 725         SAT(a, 255); \
 726         SAT(r, a); \
 727         SAT(g, a); \
 728         SAT(b, a); \
 729         *pRes = ((a << 24) | (r << 16) | (g <<  8) | (b)); \
 730     } while (0)
 731 
 732 #endif /* BICUBIC_USE_DBL_CAST */
 733 
 734 #ifdef BICUBIC_USE_DBL_LUT
 735 
 736 #define ItoD1(v)    ((jdouble) (v))
 737 #define ItoD4(v)    ItoD1(v),  ItoD1(v+1),   ItoD1(v+2),   ItoD1(v+3)
 738 #define ItoD16(v)   ItoD4(v),  ItoD4(v+4),   ItoD4(v+8),   ItoD4(v+12)
 739 #define ItoD64(v)   ItoD16(v), ItoD16(v+16), ItoD16(v+32), ItoD16(v+48)
 740 
 741 static jdouble ItoD_table[] = {
 742     ItoD64(0), ItoD64(64), ItoD64(128), ItoD64(192)
 743 };
 744 
 745 #define BC_DblToCoeff(v)        (v)
 746 #define BC_COEFF_ONE            1.0
 747 #define BC_TYPE                 jdouble
 748 #define BC_V_HALF               0.5
 749 #define BC_CompToV(v)           ItoD_table[v]
 750 #define BC_STORE_COMPS(pRes) \
 751     do { \
 752         jint a = (jint) accumA; \
 753         jint r = (jint) accumR; \
 754         jint g = (jint) accumG; \
 755         jint b = (jint) accumB; \
 756         SAT(a, 255); \
 757         SAT(r, a); \
 758         SAT(g, a); \
 759         SAT(b, a); \
 760         *pRes = ((a << 24) | (r << 16) | (g <<  8) | (b)); \
 761     } while (0)
 762 
 763 #endif /* BICUBIC_USE_DBL_LUT */
 764 
 765 #ifdef BICUBIC_USE_INT_MATH
 766 
 767 #define BC_DblToCoeff(v)        ((jint) ((v) * 256))
 768 #define BC_COEFF_ONE            256
 769 #define BC_TYPE                 jint
 770 #define BC_V_HALF               (1 << 15)
 771 #define BC_CompToV(v)           ((jint) v)
 772 #define BC_STORE_COMPS(pRes) \
 773     do { \
 774         accumA >>= 16; \
 775         accumR >>= 16; \
 776         accumG >>= 16; \
 777         accumB >>= 16; \
 778         SAT(accumA, 255); \
 779         SAT(accumR, accumA); \
 780         SAT(accumG, accumA); \
 781         SAT(accumB, accumA); \
 782         *pRes = ((accumA << 24) | (accumR << 16) | (accumG << 8) | (accumB)); \
 783     } while (0)
 784 
 785 #endif /* BICUBIC_USE_INT_MATH */
 786 
 787 #define BC_ACCUM(index, ycindex, xcindex) \
 788     do { \
 789         BC_TYPE factor = bicubic_coeff[xcindex] * bicubic_coeff[ycindex]; \
 790         int rgb; \
 791         rgb = pRGB[index]; \
 792         accumB += BC_CompToV((rgb >>  0) & 0xff) * factor; \
 793         accumG += BC_CompToV((rgb >>  8) & 0xff) * factor; \
 794         accumR += BC_CompToV((rgb >> 16) & 0xff) * factor; \
 795         accumA += BC_CompToV((rgb >> 24) & 0xff) * factor; \
 796     } while (0)
 797 
 798 static BC_TYPE bicubic_coeff[513];
 799 static jboolean bicubictableinited;
 800 
 801 static void
 802 init_bicubic_table(jdouble A)
 803 {
 804     /*
 805      * The following formulas are designed to give smooth
 806      * results when 'A' is -0.5 or -1.0.
 807      */
 808     int i;
 809     for (i = 0; i < 256; i++) {
 810         /* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */
 811         jdouble x = i / 256.0;
 812         x = ((A+2)*x - (A+3))*x*x + 1;
 813         bicubic_coeff[i] = BC_DblToCoeff(x);
 814     }
 815 
 816     for (; i < 384; i++) {
 817         /* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */
 818         jdouble x = i / 256.0;
 819         x = ((A*x - 5*A)*x + 8*A)*x - 4*A;
 820         bicubic_coeff[i] = BC_DblToCoeff(x);
 821     }
 822 
 823     bicubic_coeff[384] = (BC_COEFF_ONE - bicubic_coeff[128]*2) / 2;
 824 
 825     for (i++; i <= 512; i++) {
 826         bicubic_coeff[i] = BC_COEFF_ONE - (bicubic_coeff[512-i] +
 827                                            bicubic_coeff[i-256] +
 828                                            bicubic_coeff[768-i]);
 829     }
 830 
 831     bicubictableinited = JNI_TRUE;
 832 }
 833 
 834 static void
 835 BicubicInterp(jint *pRGB, jint numpix,
 836               jint xfract, jint dxfract,
 837               jint yfract, jint dyfract)
 838 {
 839     jint i;
 840     jint *pRes = pRGB;
 841 
 842     if (!bicubictableinited) {
 843         init_bicubic_table(-0.5);
 844     }
 845 
 846     for (i = 0; i < numpix; i++) {
 847         BC_TYPE accumA, accumR, accumG, accumB;
 848         jint xfactor, yfactor;
 849 
 850         xfactor = URShift(xfract, 32-8);
 851         yfactor = URShift(yfract, 32-8);
 852         accumA = accumR = accumG = accumB = BC_V_HALF;
 853         BC_ACCUM(0, yfactor+256, xfactor+256);
 854         BC_ACCUM(1, yfactor+256, xfactor+  0);
 855         BC_ACCUM(2, yfactor+256, 256-xfactor);
 856         BC_ACCUM(3, yfactor+256, 512-xfactor);
 857         BC_ACCUM(4, yfactor+  0, xfactor+256);
 858         BC_ACCUM(5, yfactor+  0, xfactor+  0);
 859         BC_ACCUM(6, yfactor+  0, 256-xfactor);
 860         BC_ACCUM(7, yfactor+  0, 512-xfactor);
 861         BC_ACCUM(8, 256-yfactor, xfactor+256);
 862         BC_ACCUM(9, 256-yfactor, xfactor+  0);
 863         BC_ACCUM(10, 256-yfactor, 256-xfactor);
 864         BC_ACCUM(11, 256-yfactor, 512-xfactor);
 865         BC_ACCUM(12, 512-yfactor, xfactor+256);
 866         BC_ACCUM(13, 512-yfactor, xfactor+  0);
 867         BC_ACCUM(14, 512-yfactor, 256-xfactor);
 868         BC_ACCUM(15, 512-yfactor, 512-xfactor);
 869         BC_STORE_COMPS(pRes);
 870         pRes++;
 871         pRGB += 16;
 872         xfract += dxfract;
 873         yfract += dyfract;
 874     }
 875 }
 876 
 877 #ifdef MAKE_STUBS
 878 
 879 static void
 880 BilinearInterpStub(jint *pRGBbase, jint numpix,
 881                    jint xfract, jint dxfract,
 882                    jint yfract, jint dyfract)
 883 {
 884     jint *pRGB = pRGBbase;
 885     while (--numpix >= 0) {
 886         *pRGBbase = *pRGB;
 887         pRGBbase += 1;
 888         pRGB += 4;
 889     }
 890 }
 891 
 892 static void
 893 BicubicInterpStub(jint *pRGBbase, jint numpix,
 894                   jint xfract, jint dxfract,
 895                   jint yfract, jint dyfract)
 896 {
 897     jint *pRGB = pRGBbase+5;
 898     while (--numpix >= 0) {
 899         *pRGBbase = *pRGB;
 900         pRGBbase += 1;
 901         pRGB += 16;
 902     }
 903 }
 904 
 905 #endif /* MAKE_STUBS */