1 /*
   2  * Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <stdlib.h>
  27 #include "jni_util.h"
  28 #include "math.h"
  29 
  30 #include "GraphicsPrimitiveMgr.h"
  31 #include "Region.h"
  32 
  33 #include "sun_java2d_loops_TransformHelper.h"
  34 #include "java_awt_image_AffineTransformOp.h"
  35 
  36 /*
  37  * The stub functions replace the bilinear and bicubic interpolation
  38  * functions with NOP versions so that the performance of the helper
  39  * functions that fetch the data can be more directly tested.  They
  40  * are not compiled or enabled by default.  Change the following
  41  * #undef to a #define to build the stub functions.
  42  *
  43  * When compiled, they are enabled by the environment variable TXSTUB.
  44  * When compiled, there is also code to disable the VIS versions and
  45  * use the C versions in this file in their place by defining the TXNOVIS
  46  * environment variable.
  47  */
  48 #undef MAKE_STUBS
  49 
  50 /* The number of IntArgbPre samples to store in the temporary buffer. */
  51 #define LINE_SIZE       2048
  52 
  53 /* The size of a stack allocated buffer to hold edge coordinates (see below). */
  54 #define MAXEDGES 1024
  55 
  56 /* Declare the software interpolation functions. */
  57 static TransformInterpFunc BilinearInterp;
  58 static TransformInterpFunc BicubicInterp;
  59 
  60 #ifdef MAKE_STUBS
  61 /* Optionally Declare the stub interpolation functions. */
  62 static TransformInterpFunc BilinearInterpStub;
  63 static TransformInterpFunc BicubicInterpStub;
  64 #endif /* MAKE_STUBS */
  65 
  66 /*
  67  * Initially choose the software interpolation functions.
  68  * These choices can be overridden by platform code that runs during the
  69  * primitive registration phase of initialization by storing pointers to
  70  * better functions in these pointers.
  71  * Compiling the stubs also turns on code below that can re-install the
  72  * software functions or stub functions on the first call to this primitive.
  73  */
  74 TransformInterpFunc *pBilinearFunc = BilinearInterp;
  75 TransformInterpFunc *pBicubicFunc = BicubicInterp;
  76 
  77 /*
  78  * The dxydxy parameters of the inverse transform determine how
  79  * quickly we step through the source image.  For tiny scale
  80  * factors (on the order of 1E-16 or so) the stepping distances
  81  * are huge.  The image has been scaled so small that stepping
  82  * a single pixel in device space moves the sampling point by
  83  * billions (or more) pixels in the source image space.  These
  84  * huge stepping values can overflow the whole part of the longs
  85  * we use for the fixed point stepping equations and so we need
  86  * a more robust solution.  We could simply iterate over every
  87  * device pixel, use the inverse transform to transform it back
  88  * into the source image coordinate system and then test it for
  89  * being in range and sample pixel-by-pixel, but that is quite
  90  * a bit more expensive.  Fortunately, if the scale factors are
  91  * so tiny that we overflow our long values then the number of
  92  * pixels we are planning to visit should be very tiny.  The only
  93  * exception to that rule is if the scale factor along one
  94  * dimension is tiny (creating the huge stepping values), and
  95  * the scale factor along the other dimension is fairly regular
  96  * or an up-scale.  In that case we have a lot of pixels along
  97  * the direction of the larger axis to sample, but few along the
  98  * smaller axis.  Though, pessimally, with an added shear factor
  99  * such a linearly tiny image could have bounds that cover a large
 100  * number of pixels.  Such odd transformations should be very
 101  * rare and the absolute limit on calculations would involve a
 102  * single reverse transform of every pixel in the output image
 103  * which is not fast, but it should not cause an undue stall
 104  * of the rendering software.
 105  *
 106  * The specific test we will use is to calculate the inverse
 107  * transformed values of every corner of the destination bounds
 108  * (in order to be user-clip independent) and if we can
 109  * perform a fixed-point-long inverse transform of all of
 110  * those points without overflowing we will use the fast
 111  * fixed point algorithm.  Otherwise we will use the safe
 112  * per-pixel transform algorithm.
 113  * The 4 corners are 0,0, 0,dsth, dstw,0, dstw,dsth
 114  * Transformed they are:
 115  *     tx,               ty
 116  *     tx       +dxdy*H, ty       +dydy*H
 117  *     tx+dxdx*W,        ty+dydx*W
 118  *     tx+dxdx*W+dxdy*H, ty+dydx*W+dydy*H
 119  */
 120 /* We reject coordinates not less than 1<<30 so that the distance between */
 121 /* any 2 of them is less than 1<<31 which would overflow into the sign */
 122 /* bit of a signed long value used to represent fixed point coordinates. */
 123 #define TX_FIXED_UNSAFE(v)  (fabs(v) >= (1<<30))
 124 static jboolean
 125 checkOverflow(jint dxoff, jint dyoff,
 126               SurfaceDataBounds *pBounds,
 127               TransformInfo *pItxInfo,
 128               jdouble *retx, jdouble *rety)
 129 {
 130     jdouble x, y;
 131 
 132     x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
 133     y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
 134     Transform_transform(pItxInfo, &x, &y);
 135     *retx = x;
 136     *rety = y;
 137     if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
 138         return JNI_TRUE;
 139     }
 140 
 141     x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
 142     y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
 143     Transform_transform(pItxInfo, &x, &y);
 144     if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
 145         return JNI_TRUE;
 146     }
 147 
 148     x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
 149     y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
 150     Transform_transform(pItxInfo, &x, &y);
 151     if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
 152         return JNI_TRUE;
 153     }
 154 
 155     x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
 156     y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
 157     Transform_transform(pItxInfo, &x, &y);
 158     if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
 159         return JNI_TRUE;
 160     }
 161 
 162     return JNI_FALSE;
 163 }
 164 
 165 /*
 166  * Fill the edge buffer with pairs of coordinates representing the maximum
 167  * left and right pixels of the destination surface that should be processed
 168  * on each scanline, clipped to the bounds parameter.
 169  * The number of scanlines to calculate is implied by the bounds parameter.
 170  * Only pixels that map back through the specified (inverse) transform to a
 171  * source coordinate that falls within the (0, 0, sw, sh) bounds of the
 172  * source image should be processed.
 173  * pEdges points to an array of jints that holds 2 + numedges*2 values where
 174  * numedges should match (pBounds->y2 - pBounds->y1).
 175  * The first two jints in pEdges should be set to y1 and y2 and every pair
 176  * of jints after that represent the xmin,xmax of all pixels in range of
 177  * the transformed blit for the corresponding scanline.
 178  */
 179 static void
 180 calculateEdges(jint *pEdges,
 181                SurfaceDataBounds *pBounds,
 182                TransformInfo *pItxInfo,
 183                jlong xbase, jlong ybase,
 184                juint sw, juint sh)
 185 {
 186     jlong dxdxlong, dydxlong;
 187     jlong dxdylong, dydylong;
 188     jlong drowxlong, drowylong;
 189     jint dx1, dy1, dx2, dy2;
 190 
 191     dxdxlong = DblToLong(pItxInfo->dxdx);
 192     dydxlong = DblToLong(pItxInfo->dydx);
 193     dxdylong = DblToLong(pItxInfo->dxdy);
 194     dydylong = DblToLong(pItxInfo->dydy);
 195 
 196     dx1 = pBounds->x1;
 197     dy1 = pBounds->y1;
 198     dx2 = pBounds->x2;
 199     dy2 = pBounds->y2;
 200     *pEdges++ = dy1;
 201     *pEdges++ = dy2;
 202 
 203     drowxlong = (dx2-dx1-1) * dxdxlong;
 204     drowylong = (dx2-dx1-1) * dydxlong;
 205 
 206     while (dy1 < dy2) {
 207         jlong xlong, ylong;
 208 
 209         dx1 = pBounds->x1;
 210         dx2 = pBounds->x2;
 211 
 212         xlong = xbase;
 213         ylong = ybase;
 214         while (dx1 < dx2 &&
 215                (((juint) WholeOfLong(ylong)) >= sh ||
 216                 ((juint) WholeOfLong(xlong)) >= sw))
 217         {
 218             dx1++;
 219             xlong += dxdxlong;
 220             ylong += dydxlong;
 221         }
 222 
 223         xlong = xbase + drowxlong;
 224         ylong = ybase + drowylong;
 225         while (dx2 > dx1 &&
 226                (((juint) WholeOfLong(ylong)) >= sh ||
 227                 ((juint) WholeOfLong(xlong)) >= sw))
 228         {
 229             dx2--;
 230             xlong -= dxdxlong;
 231             ylong -= dydxlong;
 232         }
 233 
 234         *pEdges++ = dx1;
 235         *pEdges++ = dx2;
 236 
 237         /* Increment to next scanline */
 238         xbase += dxdylong;
 239         ybase += dydylong;
 240         dy1++;
 241     }
 242 }
 243 
 244 static void
 245 Transform_SafeHelper(JNIEnv *env,
 246                      SurfaceDataOps *srcOps,
 247                      SurfaceDataOps *dstOps,
 248                      SurfaceDataRasInfo *pSrcInfo,
 249                      SurfaceDataRasInfo *pDstInfo,
 250                      NativePrimitive *pMaskBlitPrim,
 251                      CompositeInfo *pCompInfo,
 252                      TransformHelperFunc *pHelperFunc,
 253                      TransformInterpFunc *pInterpFunc,
 254                      RegionData *pClipInfo, TransformInfo *pItxInfo,
 255                      jint *pData, jint *pEdges,
 256                      jint dxoff, jint dyoff, jint sw, jint sh);
 257 
 258 /*
 259  * Class:     sun_java2d_loops_TransformHelper
 260  * Method:    Transform
 261  * Signature: (Lsun/java2d/loops/MaskBlit;Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;Ljava/awt/geom/AffineTransform;IIIIIIIII[I)V
 262  */
 263 JNIEXPORT void JNICALL
 264 Java_sun_java2d_loops_TransformHelper_Transform
 265     (JNIEnv *env, jobject self,
 266      jobject maskblit,
 267      jobject srcData, jobject dstData,
 268      jobject comp, jobject clip,
 269      jobject itxform, jint txtype,
 270      jint sx1, jint sy1, jint sx2, jint sy2,
 271      jint dx1, jint dy1, jint dx2, jint dy2,
 272      jintArray edgeArray, jint dxoff, jint dyoff)
 273 {
 274     SurfaceDataOps *srcOps;
 275     SurfaceDataOps *dstOps;
 276     SurfaceDataRasInfo srcInfo;
 277     SurfaceDataRasInfo dstInfo;
 278     NativePrimitive *pHelperPrim;
 279     NativePrimitive *pMaskBlitPrim;
 280     CompositeInfo compInfo;
 281     RegionData clipInfo;
 282     TransformInfo itxInfo;
 283     jint maxlinepix;
 284     TransformHelperFunc *pHelperFunc;
 285     TransformInterpFunc *pInterpFunc;
 286     jdouble xorig, yorig;
 287     jlong numedges;
 288     jint *pEdges;
 289     jint edgebuf[2 + MAXEDGES * 2];
 290     union {
 291         jlong align;
 292         jint data[LINE_SIZE];
 293     } rgb;
 294 
 295 #ifdef MAKE_STUBS
 296     static int th_initialized;
 297 
 298     /* For debugging only - used to swap in alternate funcs for perf testing */
 299     if (!th_initialized) {
 300         if (getenv("TXSTUB") != 0) {
 301             pBilinearFunc = BilinearInterpStub;
 302             pBicubicFunc = BicubicInterpStub;
 303         } else if (getenv("TXNOVIS") != 0) {
 304             pBilinearFunc = BilinearInterp;
 305             pBicubicFunc = BicubicInterp;
 306         }
 307         th_initialized = 1;
 308     }
 309 #endif /* MAKE_STUBS */
 310 
 311     pHelperPrim = GetNativePrim(env, self);
 312     if (pHelperPrim == NULL) {
 313         /* Should never happen... */
 314         return;
 315     }
 316     pMaskBlitPrim = GetNativePrim(env, maskblit);
 317     if (pMaskBlitPrim == NULL) {
 318         /* Exception was thrown by GetNativePrim */
 319         return;
 320     }
 321     if (pMaskBlitPrim->pCompType->getCompInfo != NULL) {
 322         (*pMaskBlitPrim->pCompType->getCompInfo)(env, &compInfo, comp);
 323     }
 324     if (Region_GetInfo(env, clip, &clipInfo)) {
 325         return;
 326     }
 327 
 328     srcOps = SurfaceData_GetOps(env, srcData);
 329     if (srcOps == 0) {
 330         return;
 331     }
 332     dstOps = SurfaceData_GetOps(env, dstData);
 333     if (dstOps == 0) {
 334         return;
 335     }
 336 
 337     /*
 338      * Grab the appropriate pointer to the helper and interpolation
 339      * routines and calculate the maximum number of destination pixels
 340      * that can be processed in one intermediate buffer based on the
 341      * size of the buffer and the number of samples needed per pixel.
 342      */
 343     switch (txtype) {
 344     case java_awt_image_AffineTransformOp_TYPE_NEAREST_NEIGHBOR:
 345         pHelperFunc = pHelperPrim->funcs.transformhelpers->nnHelper;
 346         pInterpFunc = NULL;
 347         maxlinepix = LINE_SIZE;
 348         break;
 349     case java_awt_image_AffineTransformOp_TYPE_BILINEAR:
 350         pHelperFunc = pHelperPrim->funcs.transformhelpers->blHelper;
 351         pInterpFunc = pBilinearFunc;
 352         maxlinepix = LINE_SIZE / 4;
 353         break;
 354     case java_awt_image_AffineTransformOp_TYPE_BICUBIC:
 355         pHelperFunc = pHelperPrim->funcs.transformhelpers->bcHelper;
 356         pInterpFunc = pBicubicFunc;
 357         maxlinepix = LINE_SIZE / 16;
 358         break;
 359     default:
 360         // Should not happen, but just in case.
 361         return;
 362     }
 363 
 364     srcInfo.bounds.x1 = sx1;
 365     srcInfo.bounds.y1 = sy1;
 366     srcInfo.bounds.x2 = sx2;
 367     srcInfo.bounds.y2 = sy2;
 368     dstInfo.bounds.x1 = dx1;
 369     dstInfo.bounds.y1 = dy1;
 370     dstInfo.bounds.x2 = dx2;
 371     dstInfo.bounds.y2 = dy2;
 372     SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
 373     if (srcOps->Lock(env, srcOps, &srcInfo, pHelperPrim->srcflags)
 374         != SD_SUCCESS)
 375     {
 376         /* edgeArray should already contain zeros for min/maxy */
 377         return;
 378     }
 379     if (dstOps->Lock(env, dstOps, &dstInfo, pMaskBlitPrim->dstflags)
 380         != SD_SUCCESS)
 381     {
 382         SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 383         /* edgeArray should already contain zeros for min/maxy */
 384         return;
 385     }
 386     Region_IntersectBounds(&clipInfo, &dstInfo.bounds);


 387 
 388     numedges = (((jlong) dstInfo.bounds.y2) - ((jlong) dstInfo.bounds.y1));
 389     if (numedges <= 0) {
 390         pEdges = NULL;
 391     } else if (!JNU_IsNull(env, edgeArray)) {
 392         /*
 393          * Ideally Java should allocate an array large enough, but if
 394          * we ever have a miscommunication about the number of edge
 395          * lines, or if the Java array calculation should overflow to
 396          * a positive number and succeed in allocating an array that
 397          * is too small, we need to verify that it can still hold the
 398          * number of integers that we plan to store to be safe.
 399          */
 400         jsize edgesize = (*env)->GetArrayLength(env, edgeArray);
 401         /* (edgesize/2 - 1) should avoid any overflow or underflow. */
 402         pEdges = (((edgesize / 2) - 1) >= numedges)
 403             ? (*env)->GetPrimitiveArrayCritical(env, edgeArray, NULL)
 404             : NULL;
 405     } else if (numedges > MAXEDGES) {
 406         /* numedges variable (jlong) can be at most ((1<<32)-1) */
 407         /* memsize can overflow a jint, but not a jlong */
 408         jlong memsize = ((numedges * 2) + 2) * sizeof(*pEdges);
 409         pEdges = (memsize == ((size_t) memsize))
 410             ? malloc((size_t) memsize)
 411             : NULL;
 412     } else {
 413         pEdges = edgebuf;
 414     }
 415 
 416     if (pEdges == NULL) {
 417         if (!(*env)->ExceptionCheck(env) && numedges > 0) {
 418             JNU_ThrowInternalError(env, "Unable to allocate edge list");
 419         }
 420         SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
 421         SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 422         /* edgeArray should already contain zeros for min/maxy */
 423         return;
 424     }
 425 
 426     Transform_GetInfo(env, itxform, &itxInfo);
 427 
 428     if (!Region_IsEmpty(&clipInfo)) {
 429         srcOps->GetRasInfo(env, srcOps, &srcInfo);
 430         dstOps->GetRasInfo(env, dstOps, &dstInfo);
 431         if (srcInfo.rasBase == NULL || dstInfo.rasBase == NULL) {
 432             pEdges[0] = pEdges[1] = 0;
 433         } else if (checkOverflow(dxoff, dyoff, &dstInfo.bounds,
 434                                  &itxInfo, &xorig, &yorig))
 435         {
 436             Transform_SafeHelper(env, srcOps, dstOps,
 437                                  &srcInfo, &dstInfo,
 438                                  pMaskBlitPrim, &compInfo,
 439                                  pHelperFunc, pInterpFunc,
 440                                  &clipInfo, &itxInfo, rgb.data, pEdges,
 441                                  dxoff, dyoff, sx2-sx1, sy2-sy1);
 442         } else {
 443             SurfaceDataBounds span;
 444             jlong dxdxlong, dydxlong;
 445             jlong dxdylong, dydylong;
 446             jlong xbase, ybase;
 447 
 448             dxdxlong = DblToLong(itxInfo.dxdx);
 449             dydxlong = DblToLong(itxInfo.dydx);
 450             dxdylong = DblToLong(itxInfo.dxdy);
 451             dydylong = DblToLong(itxInfo.dydy);
 452             xbase = DblToLong(xorig);
 453             ybase = DblToLong(yorig);
 454 
 455             calculateEdges(pEdges, &dstInfo.bounds, &itxInfo,
 456                            xbase, ybase, sx2-sx1, sy2-sy1);
 457 
 458             Region_StartIteration(env, &clipInfo);
 459             while (Region_NextIteration(&clipInfo, &span)) {
 460                 jlong rowxlong, rowylong;
 461                 void *pDst;
 462 
 463                 dy1 = span.y1;
 464                 dy2 = span.y2;
 465                 rowxlong = xbase + (dy1 - dstInfo.bounds.y1) * dxdylong;
 466                 rowylong = ybase + (dy1 - dstInfo.bounds.y1) * dydylong;
 467 
 468                 while (dy1 < dy2) {
 469                     jlong xlong, ylong;
 470 
 471                     /* Note - process at most one scanline at a time. */
 472 
 473                     dx1 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 2];
 474                     dx2 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 3];
 475                     if (dx1 < span.x1) dx1 = span.x1;
 476                     if (dx2 > span.x2) dx2 = span.x2;
 477 
 478                     /* All pixels from dx1 to dx2 have centers in bounds */
 479                     while (dx1 < dx2) {
 480                         /* Can process at most one buffer full at a time */
 481                         jint numpix = dx2 - dx1;
 482                         if (numpix > maxlinepix) {
 483                             numpix = maxlinepix;
 484                         }
 485 
 486                         xlong =
 487                             rowxlong + ((dx1 - dstInfo.bounds.x1) * dxdxlong);
 488                         ylong =
 489                             rowylong + ((dx1 - dstInfo.bounds.x1) * dydxlong);
 490 
 491                         /* Get IntArgbPre pixel data from source */
 492                         (*pHelperFunc)(&srcInfo,
 493                                        rgb.data, numpix,
 494                                        xlong, dxdxlong,
 495                                        ylong, dydxlong);
 496 
 497                         /* Interpolate result pixels if needed */
 498                         if (pInterpFunc) {
 499                             (*pInterpFunc)(rgb.data, numpix,
 500                                            FractOfLong(xlong-LongOneHalf),
 501                                            FractOfLong(dxdxlong),
 502                                            FractOfLong(ylong-LongOneHalf),
 503                                            FractOfLong(dydxlong));
 504                         }
 505 
 506                         /* Store/Composite interpolated pixels into dest */
 507                         pDst = PtrCoord(dstInfo.rasBase,
 508                                         dx1, dstInfo.pixelStride,
 509                                         dy1, dstInfo.scanStride);
 510                         (*pMaskBlitPrim->funcs.maskblit)(pDst, rgb.data,
 511                                                          0, 0, 0,
 512                                                          numpix, 1,
 513                                                          &dstInfo, &srcInfo,
 514                                                          pMaskBlitPrim,
 515                                                          &compInfo);
 516 
 517                         /* Increment to next buffer worth of input pixels */
 518                         dx1 += maxlinepix;
 519                     }
 520 
 521                     /* Increment to next scanline */
 522                     rowxlong += dxdylong;
 523                     rowylong += dydylong;
 524                     dy1++;
 525                 }
 526             }
 527             Region_EndIteration(env, &clipInfo);
 528         }
 529         SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
 530         SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
 531     } else {
 532         pEdges[0] = pEdges[1] = 0;
 533     }
 534 
 535     if (!JNU_IsNull(env, edgeArray)) {
 536         (*env)->ReleasePrimitiveArrayCritical(env, edgeArray, pEdges, 0);
 537     } else if (pEdges != edgebuf) {
 538         free(pEdges);
 539     }
 540     SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
 541     SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 542 }
 543 
 544 static void
 545 Transform_SafeHelper(JNIEnv *env,
 546                      SurfaceDataOps *srcOps,
 547                      SurfaceDataOps *dstOps,
 548                      SurfaceDataRasInfo *pSrcInfo,
 549                      SurfaceDataRasInfo *pDstInfo,
 550                      NativePrimitive *pMaskBlitPrim,
 551                      CompositeInfo *pCompInfo,
 552                      TransformHelperFunc *pHelperFunc,
 553                      TransformInterpFunc *pInterpFunc,
 554                      RegionData *pClipInfo, TransformInfo *pItxInfo,
 555                      jint *pData, jint *pEdges,
 556                      jint dxoff, jint dyoff, jint sw, jint sh)
 557 {
 558     SurfaceDataBounds span;
 559     jint dx1, dx2;
 560     jint dy1, dy2;
 561     jint i, iy;
 562 
 563     dy1 = pDstInfo->bounds.y1;
 564     dy2 = pDstInfo->bounds.y2;
 565     dx1 = pDstInfo->bounds.x1;
 566     dx2 = pDstInfo->bounds.x2;
 567     pEdges[0] = dy1;
 568     pEdges[1] = dy2;
 569     for (iy = dy1; iy < dy2; iy++) {
 570         jint i = (iy - dy1) * 2;
 571         /* row spans are set to max,min until we find a pixel in range below */
 572         pEdges[i + 2] = dx2;
 573         pEdges[i + 3] = dx1;
 574     }
 575 
 576     Region_StartIteration(env, pClipInfo);
 577     while (Region_NextIteration(pClipInfo, &span)) {
 578         dy1 = span.y1;
 579         dy2 = span.y2;
 580         while (dy1 < dy2) {
 581             dx1 = span.x1;
 582             dx2 = span.x2;
 583             i = (dy1 - pDstInfo->bounds.y1) * 2;
 584             while (dx1 < dx2) {
 585                 jdouble x, y;
 586                 jlong xlong, ylong;
 587 
 588                 x = dxoff + dx1 + 0.5;
 589                 y = dyoff + dy1 + 0.5;
 590                 Transform_transform(pItxInfo, &x, &y);
 591                 xlong = DblToLong(x);
 592                 ylong = DblToLong(y);
 593 
 594                 /* Process only pixels with centers in bounds
 595                  * Test double values to avoid overflow in conversion
 596                  * to long values and then also test the long values
 597                  * in case they rounded up and out of bounds during
 598                  * the conversion.
 599                  */
 600                 if (x >= 0 && y >= 0 && x < sw && y < sh &&
 601                     WholeOfLong(xlong) < sw &&
 602                     WholeOfLong(ylong) < sh)
 603                 {
 604                     void *pDst;
 605 
 606                     if (pEdges[i + 2] > dx1) {
 607                         pEdges[i + 2] = dx1;
 608                     }
 609                     if (pEdges[i + 3] <= dx1) {
 610                         pEdges[i + 3] = dx1 + 1;
 611                     }
 612 
 613                     /* Get IntArgbPre pixel data from source */
 614                     (*pHelperFunc)(pSrcInfo,
 615                                    pData, 1,
 616                                    xlong, 0,
 617                                    ylong, 0);
 618 
 619                     /* Interpolate result pixels if needed */
 620                     if (pInterpFunc) {
 621                         (*pInterpFunc)(pData, 1,
 622                                        FractOfLong(xlong-LongOneHalf), 0,
 623                                        FractOfLong(ylong-LongOneHalf), 0);
 624                     }
 625 
 626                     /* Store/Composite interpolated pixels into dest */
 627                     pDst = PtrCoord(pDstInfo->rasBase,
 628                                     dx1, pDstInfo->pixelStride,
 629                                     dy1, pDstInfo->scanStride);
 630                     (*pMaskBlitPrim->funcs.maskblit)(pDst, pData,
 631                                                      0, 0, 0,
 632                                                      1, 1,
 633                                                      pDstInfo, pSrcInfo,
 634                                                      pMaskBlitPrim,
 635                                                      pCompInfo);
 636                 }
 637 
 638                 /* Increment to next input pixel */
 639                 dx1++;
 640             }
 641 
 642             /* Increment to next scanline */
 643             dy1++;
 644         }
 645     }
 646     Region_EndIteration(env, pClipInfo);
 647 }
 648 
 649 #define BL_INTERP_V1_to_V2_by_F(v1, v2, f) \
 650     (((v1)<<8) + ((v2)-(v1))*(f))
 651 
 652 #define BL_ACCUM(comp) \
 653     do { \
 654         jint c1 = ((jubyte *) pRGB)[comp]; \
 655         jint c2 = ((jubyte *) pRGB)[comp+4]; \
 656         jint cR = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
 657         c1 = ((jubyte *) pRGB)[comp+8]; \
 658         c2 = ((jubyte *) pRGB)[comp+12]; \
 659         c2 = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
 660         cR = BL_INTERP_V1_to_V2_by_F(cR, c2, yfactor); \
 661         ((jubyte *)pRes)[comp] = (jubyte) ((cR + (1<<15)) >> 16); \
 662     } while (0)
 663 
 664 static void
 665 BilinearInterp(jint *pRGB, jint numpix,
 666                jint xfract, jint dxfract,
 667                jint yfract, jint dyfract)
 668 {
 669     jint j;
 670     jint *pRes = pRGB;
 671 
 672     for (j = 0; j < numpix; j++) {
 673         jint xfactor;
 674         jint yfactor;
 675         xfactor = URShift(xfract, 32-8);
 676         yfactor = URShift(yfract, 32-8);
 677         BL_ACCUM(0);
 678         BL_ACCUM(1);
 679         BL_ACCUM(2);
 680         BL_ACCUM(3);
 681         pRes++;
 682         pRGB += 4;
 683         xfract += dxfract;
 684         yfract += dyfract;
 685     }
 686 }
 687 
 688 #define SAT(val, max) \
 689     do { \
 690         val &= ~(val >> 31);  /* negatives become 0 */ \
 691         val -= max;           /* only overflows are now positive */ \
 692         val &= (val >> 31);   /* positives become 0 */ \
 693         val += max;           /* range is now [0 -> max] */ \
 694     } while (0)
 695 
 696 #ifdef __sparc
 697 /* For sparc, floating point multiplies are faster than integer */
 698 #define BICUBIC_USE_DBL_LUT
 699 #else
 700 /* For x86, integer multiplies are faster than floating point */
 701 /* Note that on x86 Linux the choice of best algorithm varies
 702  * depending on the compiler optimization and the processor type.
 703  * Currently, the sun/awt x86 Linux builds are not optimized so
 704  * all the variations produce mediocre performance.
 705  * For now we will use the choice that works best for the Windows
 706  * build until the (lack of) optimization issues on Linux are resolved.
 707  */
 708 #define BICUBIC_USE_INT_MATH
 709 #endif
 710 
 711 #ifdef BICUBIC_USE_DBL_CAST
 712 
 713 #define BC_DblToCoeff(v)        (v)
 714 #define BC_COEFF_ONE            1.0
 715 #define BC_TYPE                 jdouble
 716 #define BC_V_HALF               0.5
 717 #define BC_CompToV(v)           ((jdouble) (v))
 718 #define BC_STORE_COMPS(pRes) \
 719     do { \
 720         jint a = (jint) accumA; \
 721         jint r = (jint) accumR; \
 722         jint g = (jint) accumG; \
 723         jint b = (jint) accumB; \
 724         SAT(a, 255); \
 725         SAT(r, a); \
 726         SAT(g, a); \
 727         SAT(b, a); \
 728         *pRes = ((a << 24) | (r << 16) | (g <<  8) | (b)); \
 729     } while (0)
 730 
 731 #endif /* BICUBIC_USE_DBL_CAST */
 732 
 733 #ifdef BICUBIC_USE_DBL_LUT
 734 
 735 #define ItoD1(v)    ((jdouble) (v))
 736 #define ItoD4(v)    ItoD1(v),  ItoD1(v+1),   ItoD1(v+2),   ItoD1(v+3)
 737 #define ItoD16(v)   ItoD4(v),  ItoD4(v+4),   ItoD4(v+8),   ItoD4(v+12)
 738 #define ItoD64(v)   ItoD16(v), ItoD16(v+16), ItoD16(v+32), ItoD16(v+48)
 739 
 740 static jdouble ItoD_table[] = {
 741     ItoD64(0), ItoD64(64), ItoD64(128), ItoD64(192)
 742 };
 743 
 744 #define BC_DblToCoeff(v)        (v)
 745 #define BC_COEFF_ONE            1.0
 746 #define BC_TYPE                 jdouble
 747 #define BC_V_HALF               0.5
 748 #define BC_CompToV(v)           ItoD_table[v]
 749 #define BC_STORE_COMPS(pRes) \
 750     do { \
 751         jint a = (jint) accumA; \
 752         jint r = (jint) accumR; \
 753         jint g = (jint) accumG; \
 754         jint b = (jint) accumB; \
 755         SAT(a, 255); \
 756         SAT(r, a); \
 757         SAT(g, a); \
 758         SAT(b, a); \
 759         *pRes = ((a << 24) | (r << 16) | (g <<  8) | (b)); \
 760     } while (0)
 761 
 762 #endif /* BICUBIC_USE_DBL_LUT */
 763 
 764 #ifdef BICUBIC_USE_INT_MATH
 765 
 766 #define BC_DblToCoeff(v)        ((jint) ((v) * 256))
 767 #define BC_COEFF_ONE            256
 768 #define BC_TYPE                 jint
 769 #define BC_V_HALF               (1 << 15)
 770 #define BC_CompToV(v)           ((jint) v)
 771 #define BC_STORE_COMPS(pRes) \
 772     do { \
 773         accumA >>= 16; \
 774         accumR >>= 16; \
 775         accumG >>= 16; \
 776         accumB >>= 16; \
 777         SAT(accumA, 255); \
 778         SAT(accumR, accumA); \
 779         SAT(accumG, accumA); \
 780         SAT(accumB, accumA); \
 781         *pRes = ((accumA << 24) | (accumR << 16) | (accumG << 8) | (accumB)); \
 782     } while (0)
 783 
 784 #endif /* BICUBIC_USE_INT_MATH */
 785 
 786 #define BC_ACCUM(index, ycindex, xcindex) \
 787     do { \
 788         BC_TYPE factor = bicubic_coeff[xcindex] * bicubic_coeff[ycindex]; \
 789         int rgb; \
 790         rgb = pRGB[index]; \
 791         accumB += BC_CompToV((rgb >>  0) & 0xff) * factor; \
 792         accumG += BC_CompToV((rgb >>  8) & 0xff) * factor; \
 793         accumR += BC_CompToV((rgb >> 16) & 0xff) * factor; \
 794         accumA += BC_CompToV((rgb >> 24) & 0xff) * factor; \
 795     } while (0)
 796 
 797 static BC_TYPE bicubic_coeff[513];
 798 static jboolean bicubictableinited;
 799 
 800 static void
 801 init_bicubic_table(jdouble A)
 802 {
 803     /*
 804      * The following formulas are designed to give smooth
 805      * results when 'A' is -0.5 or -1.0.
 806      */
 807     int i;
 808     for (i = 0; i < 256; i++) {
 809         /* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */
 810         jdouble x = i / 256.0;
 811         x = ((A+2)*x - (A+3))*x*x + 1;
 812         bicubic_coeff[i] = BC_DblToCoeff(x);
 813     }
 814 
 815     for (; i < 384; i++) {
 816         /* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */
 817         jdouble x = i / 256.0;
 818         x = ((A*x - 5*A)*x + 8*A)*x - 4*A;
 819         bicubic_coeff[i] = BC_DblToCoeff(x);
 820     }
 821 
 822     bicubic_coeff[384] = (BC_COEFF_ONE - bicubic_coeff[128]*2) / 2;
 823 
 824     for (i++; i <= 512; i++) {
 825         bicubic_coeff[i] = BC_COEFF_ONE - (bicubic_coeff[512-i] +
 826                                            bicubic_coeff[i-256] +
 827                                            bicubic_coeff[768-i]);
 828     }
 829 
 830     bicubictableinited = JNI_TRUE;
 831 }
 832 
 833 static void
 834 BicubicInterp(jint *pRGB, jint numpix,
 835               jint xfract, jint dxfract,
 836               jint yfract, jint dyfract)
 837 {
 838     jint i;
 839     jint *pRes = pRGB;
 840 
 841     if (!bicubictableinited) {
 842         init_bicubic_table(-0.5);
 843     }
 844 
 845     for (i = 0; i < numpix; i++) {
 846         BC_TYPE accumA, accumR, accumG, accumB;
 847         jint xfactor, yfactor;
 848 
 849         xfactor = URShift(xfract, 32-8);
 850         yfactor = URShift(yfract, 32-8);
 851         accumA = accumR = accumG = accumB = BC_V_HALF;
 852         BC_ACCUM(0, yfactor+256, xfactor+256);
 853         BC_ACCUM(1, yfactor+256, xfactor+  0);
 854         BC_ACCUM(2, yfactor+256, 256-xfactor);
 855         BC_ACCUM(3, yfactor+256, 512-xfactor);
 856         BC_ACCUM(4, yfactor+  0, xfactor+256);
 857         BC_ACCUM(5, yfactor+  0, xfactor+  0);
 858         BC_ACCUM(6, yfactor+  0, 256-xfactor);
 859         BC_ACCUM(7, yfactor+  0, 512-xfactor);
 860         BC_ACCUM(8, 256-yfactor, xfactor+256);
 861         BC_ACCUM(9, 256-yfactor, xfactor+  0);
 862         BC_ACCUM(10, 256-yfactor, 256-xfactor);
 863         BC_ACCUM(11, 256-yfactor, 512-xfactor);
 864         BC_ACCUM(12, 512-yfactor, xfactor+256);
 865         BC_ACCUM(13, 512-yfactor, xfactor+  0);
 866         BC_ACCUM(14, 512-yfactor, 256-xfactor);
 867         BC_ACCUM(15, 512-yfactor, 512-xfactor);
 868         BC_STORE_COMPS(pRes);
 869         pRes++;
 870         pRGB += 16;
 871         xfract += dxfract;
 872         yfract += dyfract;
 873     }
 874 }
 875 
 876 #ifdef MAKE_STUBS
 877 
 878 static void
 879 BilinearInterpStub(jint *pRGBbase, jint numpix,
 880                    jint xfract, jint dxfract,
 881                    jint yfract, jint dyfract)
 882 {
 883     jint *pRGB = pRGBbase;
 884     while (--numpix >= 0) {
 885         *pRGBbase = *pRGB;
 886         pRGBbase += 1;
 887         pRGB += 4;
 888     }
 889 }
 890 
 891 static void
 892 BicubicInterpStub(jint *pRGBbase, jint numpix,
 893                   jint xfract, jint dxfract,
 894                   jint yfract, jint dyfract)
 895 {
 896     jint *pRGB = pRGBbase+5;
 897     while (--numpix >= 0) {
 898         *pRGBbase = *pRGB;
 899         pRGBbase += 1;
 900         pRGB += 16;
 901     }
 902 }
 903 
 904 #endif /* MAKE_STUBS */
--- EOF ---