1 /*
   2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include "awt.h"
  27 #include <sun_java2d_windows_GDIBlitLoops.h>
  28 #include "gdefs.h"
  29 #include "Trace.h"
  30 #include "GDIWindowSurfaceData.h"
  31 
  32 static RGBQUAD *byteGrayPalette = NULL;
  33 
  34 extern "C" {
  35 
  36 typedef struct tagBitmapheader  {
  37     BITMAPINFOHEADER bmiHeader;
  38     union {
  39         DWORD           dwMasks[3];
  40         RGBQUAD         palette[256];
  41     } colors;
  42 } BmiType;
  43 
  44 /*
  45  * Some GDI functions functions will fail if they operate on memory which spans
  46  * virtual allocations as used by modern garbage collectors (ie ZGC).
  47  * So if the call to SetDIBitsToDevice fails, we will re-try it on malloced
  48  * memory rather than the pinned Java heap memory.
  49  * Once Microsoft fix the GDI bug, the small performance penalty of this retry
  50  * will be gone.
  51  */
  52 static void retryingSetDIBitsToDevice(
  53     HDC              hdc,
  54     int              xDest,
  55     int              yDest,
  56     DWORD            w,
  57     DWORD            h,
  58     int              xSrc,
  59     int              ySrc,
  60     UINT             StartScan,
  61     UINT             cLines,
  62     const VOID       *lpvBits,
  63     BITMAPINFO       *lpbmi,
  64     UINT             ColorUse) {
  65 
  66 #ifdef DEBUG_PERF
  67     LARGE_INTEGER    ts1, ts2;
  68     QueryPerformanceCounter(&ts1);
  69 #endif
  70 
  71     int ret =
  72         SetDIBitsToDevice(hdc, xDest, yDest, w, h,
  73                           xSrc, ySrc, StartScan, cLines, lpvBits,
  74                           lpbmi, ColorUse);
  75 
  76     if (ret != 0 || h == 0) {
  77 #ifdef DEBUG_PERF
  78          QueryPerformanceCounter(&ts2);
  79          printf("success time: %zd\n", (ts2.QuadPart-ts1.QuadPart));
  80 #endif
  81         return;
  82     }
  83 
  84     size_t size = lpbmi->bmiHeader.biSizeImage;
  85     void* imageData = NULL;
  86     try {
  87         imageData = safe_Malloc(size);
  88     } catch (std::bad_alloc&) {
  89     }
  90     if (imageData == NULL) {
  91         return;
  92     }
  93     memcpy(imageData, lpvBits, size); // this is the most expensive part.
  94     SetDIBitsToDevice(hdc, xDest, yDest, w, h,
  95                       xSrc, ySrc, StartScan, cLines, imageData,
  96                       lpbmi, ColorUse);
  97     free(imageData);
  98 
  99 #ifdef DEBUG_PERF
 100     QueryPerformanceCounter(&ts2);
 101     printf("with retry time: %zd\n", (ts2.QuadPart-ts1.QuadPart));
 102 #endif
 103 
 104 };
 105 
 106 /*
 107  * Class:     sun_java2d_windows_GDIBlitLoops
 108  * Method:    nativeBlit
 109  * Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;IIIIIIZ)V
 110  */
 111 JNIEXPORT void JNICALL
 112 Java_sun_java2d_windows_GDIBlitLoops_nativeBlit
 113     (JNIEnv *env, jobject joSelf,
 114      jobject srcData, jobject dstData,
 115      jobject clip,
 116      jint srcx, jint srcy,
 117      jint dstx, jint dsty,
 118      jint width, jint height,
 119      jint rmask, jint gmask, jint bmask,
 120      jboolean needLut)
 121 {
 122     J2dTraceLn(J2D_TRACE_INFO, "GDIBlitLoops_nativeBlit");
 123 
 124     SurfaceDataRasInfo srcInfo;
 125     SurfaceDataOps *srcOps = SurfaceData_GetOps(env, srcData);
 126     GDIWinSDOps *dstOps = GDIWindowSurfaceData_GetOps(env, dstData);
 127     jint lockFlags;
 128 
 129     srcInfo.bounds.x1 = srcx;
 130     srcInfo.bounds.y1 = srcy;
 131     srcInfo.bounds.x2 = srcx + width;
 132     srcInfo.bounds.y2 = srcy + height;
 133     if (needLut) {
 134         lockFlags = (SD_LOCK_READ | SD_LOCK_LUT);
 135     } else {
 136         lockFlags = SD_LOCK_READ;
 137     }
 138     // This method is used among other things for on-screen copyArea, in which
 139     // case the source and destination surfaces are the same. It is important
 140     // to first lock the source and then get the hDC for the destination
 141     // surface because the same per-thread hDC will be used for both
 142     // and we need to have the correct clip set to the hDC
 143     // used with the SetDIBitsToDevice call.
 144     if (srcOps->Lock(env, srcOps, &srcInfo, lockFlags) != SD_SUCCESS) {
 145         return;
 146     }
 147 
 148     SurfaceDataBounds dstBounds = {dstx, dsty, dstx + width, dsty + height};
 149     // Intersect the source and dest rects. Note that the source blit bounds
 150     // will be adjusted to the surfaces's bounds if needed.
 151     SurfaceData_IntersectBlitBounds(&(srcInfo.bounds), &dstBounds,
 152                                     dstx - srcx, dsty - srcy);
 153 
 154     srcx = srcInfo.bounds.x1;
 155     srcy = srcInfo.bounds.y1;
 156     dstx = dstBounds.x1;
 157     dsty = dstBounds.y1;
 158     width = srcInfo.bounds.x2 - srcInfo.bounds.x1;
 159     height = srcInfo.bounds.y2 - srcInfo.bounds.y1;
 160 
 161     if (width > 0 && height > 0)
 162     {
 163         BmiType bmi;
 164         // REMIND: A performance tweak here would be to make some of this
 165         // data static.  For example, we could have one structure that is
 166         // always used for ByteGray copies and we only change dynamic data
 167         // in the structure with every new copy.  Also, we could store
 168         // structures with Ops or with the Java objects so that surfaces
 169         // could retain their own DIB info and we would not need to
 170         // recreate it every time.
 171 
 172         // GetRasInfo implicitly calls GetPrimitiveArrayCritical
 173         // and since GetDC uses JNI it needs to be called first.
 174         HDC hDC = dstOps->GetDC(env, dstOps, 0, NULL, clip, NULL, 0);
 175         if (hDC == NULL) {
 176             SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 177             return;
 178         }
 179         srcOps->GetRasInfo(env, srcOps, &srcInfo);
 180         if (srcInfo.rasBase == NULL) {
 181             dstOps->ReleaseDC(env, dstOps, hDC);
 182             SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 183             return;
 184         }
 185         void *rasBase = ((char *)srcInfo.rasBase) + srcInfo.scanStride * srcy +
 186                         srcInfo.pixelStride * srcx;
 187 
 188         // If scanlines are DWORD-aligned (scanStride is a multiple of 4),
 189         // then we can do the work much faster.  This is due to a constraint
 190         // in the way DIBs are structured and parsed by GDI
 191         jboolean fastBlt = ((srcInfo.scanStride & 0x03) == 0);
 192         bmi.bmiHeader.biSize = sizeof(bmi.bmiHeader);
 193         bmi.bmiHeader.biWidth = srcInfo.scanStride/srcInfo.pixelStride;
 194         // fastBlt copies whole image in one call; else copy line-by-line
 195         LONG dwHeight = srcInfo.bounds.y2 - srcInfo.bounds.y1;
 196         bmi.bmiHeader.biHeight = (fastBlt) ? -dwHeight : -1;
 197         bmi.bmiHeader.biPlanes = 1;
 198         bmi.bmiHeader.biBitCount = (WORD)srcInfo.pixelStride * 8;
 199         // 1,3,4 byte use BI_RGB, 2 byte use BI_BITFIELD...
 200         // 4 byte _can_ use BI_BITFIELD, but this seems to cause a performance
 201         // penalty.  Since we only ever have one format (xrgb) for 32-bit
 202         // images that enter this function, just use BI_RGB.
 203         // Could do BI_RGB for 2-byte 555 format, but no perceived
 204         // performance benefit.
 205         bmi.bmiHeader.biCompression = (srcInfo.pixelStride != 2)
 206                 ? BI_RGB : BI_BITFIELDS;
 207         bmi.bmiHeader.biSizeImage = (bmi.bmiHeader.biWidth * dwHeight *
 208                                      srcInfo.pixelStride);
 209         bmi.bmiHeader.biXPelsPerMeter = 0;
 210         bmi.bmiHeader.biYPelsPerMeter = 0;
 211         bmi.bmiHeader.biClrUsed = 0;
 212         bmi.bmiHeader.biClrImportant = 0;
 213         if (srcInfo.pixelStride == 1) {
 214             // Copy palette info into bitmap for 8-bit image
 215             if (needLut) {
 216                 memcpy(bmi.colors.palette, srcInfo.lutBase, srcInfo.lutSize * sizeof(RGBQUAD));
 217                 if (srcInfo.lutSize != 256) {
 218                     bmi.bmiHeader.biClrUsed = srcInfo.lutSize;
 219                 }
 220             } else {
 221                 // If no LUT needed, must be ByteGray src.  If we have not
 222                 // yet created the byteGrayPalette, create it now and copy
 223                 // it into our temporary bmi structure.
 224                 // REMIND: byteGrayPalette is a leak since we do not have
 225                 // a mechanism to free it up.  This should be fine, since it
 226                 // is only 256 bytes for any process and only gets malloc'd
 227                 // when using ByteGray surfaces.  Eventually, we should use
 228                 // the new Disposer mechanism to delete this native memory.
 229                 if (byteGrayPalette == NULL) {
 230                     // assert (256 * sizeof(RGBQUAD)) <= SIZE_MAX
 231                     byteGrayPalette = (RGBQUAD *)safe_Malloc(256 * sizeof(RGBQUAD));
 232                     for (int i = 0; i < 256; ++i) {
 233                         byteGrayPalette[i].rgbRed = i;
 234                         byteGrayPalette[i].rgbGreen = i;
 235                         byteGrayPalette[i].rgbBlue = i;
 236                     }
 237                 }
 238                 memcpy(bmi.colors.palette, byteGrayPalette, 256 * sizeof(RGBQUAD));
 239             }
 240         } else if (srcInfo.pixelStride == 2) {
 241             // For 16-bit case, init the masks for the pixel depth
 242             bmi.colors.dwMasks[0] = rmask;
 243             bmi.colors.dwMasks[1] = gmask;
 244             bmi.colors.dwMasks[2] = bmask;
 245         }
 246 
 247         if (fastBlt) {
 248             // Window could go away at any time, leaving bits on the screen
 249             // from this GDI call, so make sure window still exists
 250             if (::IsWindowVisible(dstOps->window)) {
 251                 // Could also call StretchDIBits.  Testing showed slight
 252                 // performance advantage of SetDIBits instead, so since we
 253                 // have no need of scaling, might as well use SetDIBits.
 254                 retryingSetDIBitsToDevice(hDC, dstx, dsty, width, height,
 255                     0, 0, 0, height, rasBase,
 256                     (BITMAPINFO*)&bmi, DIB_RGB_COLORS);
 257             }
 258         } else {
 259             // Source scanlines not DWORD-aligned - copy each scanline individually
 260             for (int i = 0; i < height; i += 1) {
 261                 if (::IsWindowVisible(dstOps->window)) {
 262                     retryingSetDIBitsToDevice(hDC, dstx, dsty+i, width, 1,
 263                         0, 0, 0, 1, rasBase,
 264                         (BITMAPINFO*)&bmi, DIB_RGB_COLORS);
 265                     rasBase = (void*)((char*)rasBase + srcInfo.scanStride);
 266                 } else {
 267                     break;
 268                 }
 269             }
 270         }
 271         dstOps->ReleaseDC(env, dstOps, hDC);
 272         SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
 273     }
 274     SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 275 
 276     return;
 277 }
 278 
 279 } // extern "C"