1 /* 2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.scenario.effect.impl.state; 27 28 import com.sun.javafx.geom.Rectangle; 29 import com.sun.javafx.geom.transform.BaseTransform; 30 import com.sun.javafx.geom.transform.NoninvertibleTransformException; 31 import com.sun.scenario.effect.Color4f; 32 import com.sun.scenario.effect.Effect; 33 import com.sun.scenario.effect.FilterContext; 34 import com.sun.scenario.effect.Filterable; 35 import com.sun.scenario.effect.ImageData; 36 import com.sun.scenario.effect.impl.BufferUtil; 37 import com.sun.scenario.effect.impl.EffectPeer; 38 import com.sun.scenario.effect.impl.Renderer; 39 import java.nio.FloatBuffer; 40 41 /** 42 * The RenderState for a box filter kernel that can be applied using a 43 * standard linear convolution kernel. 44 * A box filter has a size that represents how large of an area around a 45 * given pixel should be averaged. If the size is 1.0 then just the pixel 46 * itself should be averaged and the operation is a NOP. Values smaller 47 * than that are automatically treated as 1.0/NOP. 48 * For any odd size, the kernel weights the center pixel and an equal number 49 * of pixels on either side of it equally, so the weights for size 2N+1 are: 50 * [ {N copes of 1.0} 1.0 {N more copies of 1.0} ] 51 * As the size grows past that integer size, we must then add another kernel 52 * weight entry on both sides of the existing array of 1.0 weights and give 53 * them a fractional weight of half of the amount we exceeded the last odd 54 * size, so the weights for some size (2N+1)+e (e for epsilon) are: 55 * [ e/2.0 {2*N+1 copies of 1.0} e/2.0 ] 56 * As the size continues to grow, when it reaches the next even size, we get 57 * weights for size 2*N+1+1 to be: 58 * [ 0.5 {2*N+1 copies of 1.0} 0.5 ] 59 * and as the size continues to grow and approaches the next odd number, we 60 * see that 2(N+1)+1 == 2N+2+1 == 2N+1 + 2, so (e) approaches 2 and the 61 * numbers on each end of the weights array approach e/2.0 == 1.0 and we end 62 * up back at the pattern for an odd size again: 63 * [ 1.0 {2*N+1 copies of 1.0} 1.0 ] 64 * 65 * *************************** 66 * SOFTWARE LIMITATION CAVEAT: 67 * *************************** 68 * 69 * Note that the highly optimized software filters for BoxBlur/Shadow will 70 * actually do a very optimized "running sum" operation that is only currently 71 * implemented for equal weighted kernels. Also, until recently we had always 72 * been rounding down the size by casting it to an integer at a high level (in 73 * the FX layer peer synchronization code), so for now the software filters 74 * may only implement a subset of the above theory and new optimized loops that 75 * allow partial sums on the first and last values will need to be written. 76 * Until then we will be rounding the sizes to an odd size, but only in the 77 * sw loops. 78 */ 79 public class BoxRenderState extends LinearConvolveRenderState { 80 public static final int MAX_BOX_SIZES[] = { 81 getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 0), 82 getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 1), 83 getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 2), 84 getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 3), 85 }; 86 87 private final boolean isShadow; 88 private final int blurPasses; 89 private final float spread; 90 private Color4f shadowColor; 91 92 private EffectCoordinateSpace space; 93 private BaseTransform inputtx; 94 private BaseTransform resulttx; 95 private final float inputSizeH; 96 private final float inputSizeV; 97 private final int spreadPass; 98 private float samplevectors[]; 99 100 private int validatedPass; 101 private float passSize; 102 private FloatBuffer weights; 103 private float weightsValidSize; 104 private float weightsValidSpread; 105 private boolean swCompatible; // true if we can use the sw peers 106 107 public static int getMaxSizeForKernelSize(int kernelSize, int blurPasses) { 108 if (blurPasses == 0) { 109 return Integer.MAX_VALUE; 110 } 111 // Kernel sizes are always odd, so if the supplied ksize is even then 112 // we need to use ksize-1 to compute the max as that is actually the 113 // largest kernel we will be able to produce that is no larger than 114 // ksize for any given pass size. 115 int passSize = (kernelSize - 1) | 1; 116 passSize = ((passSize - 1) / blurPasses) | 1; 117 assert getKernelSize(passSize, blurPasses) <= kernelSize; 118 return passSize; 119 } 120 121 public static int getKernelSize(int passSize, int blurPasses) { 122 int kernelSize = (passSize < 1) ? 1 : passSize; 123 kernelSize = (kernelSize-1) * blurPasses + 1; 124 kernelSize |= 1; 125 return kernelSize; 126 } 127 128 public BoxRenderState(float hsize, float vsize, int blurPasses, float spread, 129 boolean isShadow, Color4f shadowColor, BaseTransform filtertx) 130 { 131 /* 132 * The operation starts as a description of the size of a (pair of) 133 * box filter kernels measured relative to that user space coordinate 134 * system and to be applied horizontally and vertically in that same 135 * space. The presence of a filter transform can mean that the 136 * direction we apply the box convolutions could change as well 137 * as the new size of the box summations relative to the pixels 138 * produced under that transform. 139 * 140 * Since the box filter is best described by the summation of a range 141 * of discrete pixels horizontally and vertically, and since the 142 * software algorithms vastly prefer applying the sums horizontally 143 * and vertically to groups of whole pixels using an incremental "add 144 * the next pixel at the front edge of the box and subtract the pixel 145 * that is at the back edge of the box" technique, we will constrain 146 * our box size to an integer size and attempt to force the inputs 147 * to produce an axis aligned intermediate image. But, in the end, 148 * we must be prepared for an arbitrary transform on the input image 149 * which essentially means being able to back off to an arbitrary 150 * invocation on the associated LinearConvolvePeer from the software 151 * hand-written Box peers. 152 * 153 * We will track the direction and size of the box as we traverse 154 * different coordinate spaces with the intent that eventually we 155 * will perform the math of the convolution with weights calculated 156 * for one sample per pixel in the indicated direction and applied as 157 * closely to the intended final filter transform as we can achieve 158 * with the following caveats (very similar to the caveats for the 159 * more general GaussianRenderState): 160 * 161 * - There is a maximum kernel size that the hardware pixel shaders 162 * can apply so we will try to keep the scaling of the filtered 163 * pixels low enough that we do not exceed that data limitation. 164 * 165 * - Software vastly prefers to apply these weights along horizontal 166 * and vertical vectors, but can apply them in an arbitrary direction 167 * if need be by backing off to the generic LinearConvolvePeer. 168 * 169 * - If the box is large enough, then applying a smaller box kernel 170 * to a downscaled input is close enough to applying the larger box 171 * to a larger scaled input. Our maximum kernel size is large enough 172 * for this effect to be hidden if we max out the kernel. 173 * 174 * - We can tell the inputs what transform we want them to use, but 175 * they can always produce output under a different transform and 176 * then return a result with a "post-processing" trasnform to be 177 * applied (as we are doing here ourselves). Thus, we can plan 178 * how we want to apply the convolution weights and samples here, 179 * but we will have to reevaluate our actions when the actual 180 * input pixels are created later. 181 * 182 * - We will try to blur at a nice axis-aligned orientation (which is 183 * preferred for the software versions of the shaders) and perform 184 * any rotation and skewing in the final post-processing result 185 * transform as that amount of blurring will quite effectively cover 186 * up any distortion that would occur by not rendering at the 187 * appropriate angles. 188 * 189 * To achieve this we start out with untransformed sample vectors 190 * which are unit vectors along the X and Y axes. We transform them 191 * into the requested filter space, adjust the kernel size and see 192 * if we can support that kernel size. If it is too large of a 193 * projected kernel, then we request the input at a smaller scale 194 * and perform a maximum kernel convolution on it and then indicate 195 * that this result will need to be scaled by the caller. When this 196 * method is done we will have computed what we need to do to the 197 * input pixels when they come in if the inputtx was honored, otherwise 198 * we may have to adjust the values further in {@link @validateInput()}. 199 */ 200 this.isShadow = isShadow; 201 this.shadowColor = shadowColor; 202 this.spread = spread; 203 this.blurPasses = blurPasses; 204 if (filtertx == null) filtertx = BaseTransform.IDENTITY_TRANSFORM; 205 double txScaleX = Math.hypot(filtertx.getMxx(), filtertx.getMyx()); 206 double txScaleY = Math.hypot(filtertx.getMxy(), filtertx.getMyy()); 207 float fSizeH = (float) (hsize * txScaleX); 208 float fSizeV = (float) (vsize * txScaleY); 209 int maxPassSize = MAX_BOX_SIZES[blurPasses]; 210 if (fSizeH > maxPassSize) { 211 txScaleX = maxPassSize / hsize; 212 fSizeH = maxPassSize; 213 } 214 if (fSizeV > maxPassSize) { 215 txScaleY = maxPassSize / vsize; 216 fSizeV = maxPassSize; 217 } 218 this.inputSizeH = fSizeH; 219 this.inputSizeV = fSizeV; 220 this.spreadPass = (fSizeV > 1) ? 1 : 0; 221 // We always want to use an unrotated space to do our filtering, so 222 // we interpose our scaled-only space in all cases, but we do check 223 // if it happens to be equivalent (ignoring translations) to the 224 // original filtertx so we can avoid introducing extra layers of 225 // transforms. 226 boolean custom = (txScaleX != filtertx.getMxx() || 227 0.0 != filtertx.getMyx() || 228 txScaleY != filtertx.getMyy() || 229 0.0 != filtertx.getMxy()); 230 if (custom) { 231 this.space = EffectCoordinateSpace.CustomSpace; 232 this.inputtx = BaseTransform.getScaleInstance(txScaleX, txScaleY); 233 this.resulttx = filtertx 234 .copy() 235 .deriveWithScale(1.0 / txScaleX, 1.0 / txScaleY, 1.0); 236 } else { 237 this.space = EffectCoordinateSpace.RenderSpace; 238 this.inputtx = filtertx; 239 this.resulttx = BaseTransform.IDENTITY_TRANSFORM; 240 } 241 // assert inputtx.mxy == inputtx.myx == 0.0 242 } 243 244 public int getBoxPixelSize(int pass) { 245 float size = passSize; 246 if (size < 1.0f) size = 1.0f; 247 int boxsize = ((int) Math.ceil(size)) | 1; 248 return boxsize; 249 } 250 251 public int getBlurPasses() { 252 return blurPasses; 253 } 254 255 public float getSpread() { 256 return spread; 257 } 258 259 @Override 260 public boolean isShadow() { 261 return isShadow; 262 } 263 264 @Override 265 public Color4f getShadowColor() { 266 return shadowColor; 267 } 268 269 @Override 270 public float[] getPassShadowColorComponents() { 271 return (validatedPass == 0) 272 ? BLACK_COMPONENTS 273 : shadowColor.getPremultipliedRGBComponents(); 274 } 275 276 @Override 277 public EffectCoordinateSpace getEffectTransformSpace() { 278 return space; 279 } 280 281 @Override 282 public BaseTransform getInputTransform(BaseTransform filterTransform) { 283 return inputtx; 284 } 285 286 @Override 287 public BaseTransform getResultTransform(BaseTransform filterTransform) { 288 return resulttx; 289 } 290 291 @Override 292 public EffectPeer getPassPeer(Renderer r, FilterContext fctx) { 293 if (isPassNop()) { 294 return null; 295 } 296 int ksize = getPassKernelSize(); 297 int psize = getPeerSize(ksize); 298 Effect.AccelType actype = r.getAccelType(); 299 String name; 300 switch (actype) { 301 case NONE: 302 case SIMD: 303 if (swCompatible && spread == 0.0f) { 304 name = isShadow() ? "BoxShadow" : "BoxBlur"; 305 break; 306 } 307 /* FALLS THROUGH */ 308 default: 309 name = isShadow() ? "LinearConvolveShadow" : "LinearConvolve"; 310 break; 311 } 312 EffectPeer peer = r.getPeerInstance(fctx, name, psize); 313 return peer; 314 } 315 316 @Override 317 public Rectangle getInputClip(int i, Rectangle filterClip) { 318 if (filterClip != null) { 319 int klenh = getInputKernelSize(0); 320 int klenv = getInputKernelSize(1); 321 if ((klenh | klenv) > 1) { 322 filterClip = new Rectangle(filterClip); 323 // We actually want to grow them by (klen-1)/2, but since we 324 // have forced the klen sizes to be odd above, a simple integer 325 // divide by 2 is enough... 326 filterClip.grow(klenh/2, klenv/2); 327 } 328 } 329 return filterClip; 330 } 331 332 @Override 333 public ImageData validatePassInput(ImageData src, int pass) { 334 this.validatedPass = pass; 335 BaseTransform srcTx = src.getTransform(); 336 samplevectors = new float[2]; 337 samplevectors[pass] = 1.0f; 338 float iSize = (pass == 0) ? inputSizeH : inputSizeV; 339 if (srcTx.isTranslateOrIdentity()) { 340 this.swCompatible = true; 341 this.passSize = iSize; 342 } else { 343 // The input produced a texture that requires transformation, 344 // reevaluate our box sizes. 345 // First (inverse) transform our sample vectors from the intended 346 // srcTx space back into the actual pixel space of the src texture. 347 // Then evaluate their length and attempt to absorb as much of any 348 // implicit scaling that would happen into our final pixelSizes, 349 // but if we overflow the maximum supportable pass size then we will 350 // just have to sample sparsely with a longer than unit vector. 351 // REMIND: we should also downsample the texture by powers of 352 // 2 if our sampling will be more sparse than 1 sample per 2 353 // pixels. 354 try { 355 srcTx.inverseDeltaTransform(samplevectors, 0, samplevectors, 0, 1); 356 } catch (NoninvertibleTransformException ex) { 357 this.passSize = 0.0f; 358 samplevectors[0] = samplevectors[1] = 0.0f; 359 this.swCompatible = true; 360 return src; 361 } 362 double srcScale = Math.hypot(samplevectors[0], samplevectors[1]); 363 float pSize = (float) (iSize * srcScale); 364 pSize *= srcScale; 365 int maxPassSize = MAX_BOX_SIZES[blurPasses]; 366 if (pSize > maxPassSize) { 367 pSize = maxPassSize; 368 srcScale = maxPassSize / iSize; 369 } 370 this.passSize = pSize; 371 // For a pixelSize that was less than maxPassSize, the following 372 // lines renormalize the un-transformed vector back into a unit 373 // vector in the proper direction and we absorbed its length 374 // into the pixelSize that we will apply for the box filter weights. 375 // If we clipped the pixelSize to maxPassSize, then it will not 376 // actually end up as a unit vector, but it will represent the 377 // proper sampling deltas for the indicated box size (which should 378 // be maxPassSize in that case). 379 samplevectors[0] /= srcScale; 380 samplevectors[1] /= srcScale; 381 // If we are still sampling by an axis aligned unit vector, then the 382 // optimized software filters can still do their "incremental sum" 383 // magic. 384 // REMIND: software loops could actually do an infinitely sized 385 // kernel with only memory requirements getting in the way, but 386 // the values being tested here are constrained by the limits of 387 // the hardware peers. It is not clear how to fix this since we 388 // have to choose how to proceed before we have enough information 389 // to know if the inputs will be cooperative enough to assume 390 // software limits, and then once we get here, we may have already 391 // constrained ourselves into a situation where we must use the 392 // hardware peers. Still, there may be more "fighting" we can do 393 // to hold on to compatibility with the software loops perhaps? 394 Rectangle srcSize = src.getUntransformedBounds(); 395 if (pass == 0) { 396 this.swCompatible = nearOne(samplevectors[0], srcSize.width) 397 && nearZero(samplevectors[1], srcSize.width); 398 } else { 399 this.swCompatible = nearZero(samplevectors[0], srcSize.height) 400 && nearOne(samplevectors[1], srcSize.height); 401 } 402 } 403 Filterable f = src.getUntransformedImage(); 404 samplevectors[0] /= f.getPhysicalWidth(); 405 samplevectors[1] /= f.getPhysicalHeight(); 406 return src; 407 } 408 409 @Override 410 public Rectangle getPassResultBounds(Rectangle srcdimension) { 411 // Note that the pass vector and the pass radius may be adjusted for 412 // a transformed input, but our output will be in the untransformed 413 // "filter" coordinate space so we need to use the "input" values that 414 // are in that same coordinate space. 415 Rectangle ret = new Rectangle(srcdimension); 416 if (validatedPass == 0) { 417 ret.grow(getInputKernelSize(0) / 2, 0); 418 } else { 419 ret.grow(0, getInputKernelSize(1) / 2); 420 } 421 return ret; 422 } 423 424 @Override 425 public float[] getPassVector() { 426 float xoff = samplevectors[0]; 427 float yoff = samplevectors[1]; 428 int ksize = getPassKernelSize(); 429 int center = ksize / 2; 430 float ret[] = new float[4]; 431 ret[0] = xoff; 432 ret[1] = yoff; 433 ret[2] = -center * xoff; 434 ret[3] = -center * yoff; 435 return ret; 436 } 437 438 @Override 439 public int getPassWeightsArrayLength() { 440 validateWeights(); 441 return weights.limit() / 4; 442 } 443 444 @Override 445 public FloatBuffer getPassWeights() { 446 validateWeights(); 447 weights.rewind(); 448 return weights; 449 } 450 451 private void validateWeights() { 452 float pSize; 453 if (blurPasses == 0) { 454 pSize = 1.0f; 455 } else { 456 pSize = passSize; 457 // 1.0f is the minimum size and is a NOP (each pixel averaged 458 // over itself) 459 if (pSize < 1.0f) pSize = 1.0f; 460 } 461 float passSpread = (validatedPass == spreadPass) ? spread : 0f; 462 if (weights != null && 463 weightsValidSize == pSize && 464 weightsValidSpread == passSpread) 465 { 466 return; 467 } 468 469 // round klen up to a full pixel size and make sure it is odd so 470 // that we center the kernel around each pixel center (1.0 of the 471 // total size/weight is centered on the current pixel and then 472 // the remainder is split (size-1.0)/2 on each side. 473 // If the size is 2, then we don't want to average each pair of 474 // pixels together (weights: 0.5, 0.5), instead we want to take each 475 // pixel and average in half of each of its neighbors with it 476 // (weights: 0.25, 0.5, 0.25). 477 int klen = ((int) Math.ceil(pSize)) | 1; 478 int totalklen = klen; 479 for (int p = 1; p < blurPasses; p++) { 480 totalklen += klen - 1; 481 } 482 double ik[] = new double[totalklen]; 483 for (int i = 0; i < klen; i++) { 484 ik[i] = 1.0; 485 } 486 // The sum of the ik[] array is now klen, but we want the sum to 487 // be size. The worst case difference will be less than 2.0 since 488 // the klen length is the ceil of the actual size possibly bumped up 489 // to an odd number. Thus it can have been bumped up by no more than 490 // 2.0. If there is an excess, we need to take half of it out of each 491 // of the two end weights (first and last). 492 double excess = klen - pSize; 493 if (excess > 0.0) { 494 // assert (excess * 0.5 < 1.0) 495 ik[0] = ik[klen-1] = 1.0 - excess * 0.5; 496 } 497 int filledklen = klen; 498 for (int p = 1; p < blurPasses; p++) { 499 filledklen += klen - 1; 500 int i = filledklen - 1; 501 while (i > klen) { 502 double sum = ik[i]; 503 for (int k = 1; k < klen; k++) { 504 sum += ik[i-k]; 505 } 506 ik[i--] = sum; 507 } 508 while (i > 0) { 509 double sum = ik[i]; 510 for (int k = 0; k < i; k++) { 511 sum += ik[k]; 512 } 513 ik[i--] = sum; 514 } 515 } 516 // assert (filledklen == totalklen == ik.length) 517 double sum = 0.0; 518 for (int i = 0; i < ik.length; i++) { 519 sum += ik[i]; 520 } 521 // We need to apply the spread on only one pass 522 // Prefer pass1 if r1 is not trivial 523 // Otherwise use pass 0 so that it doesn't disappear 524 sum += (1.0 - sum) * passSpread; 525 526 if (weights == null) { 527 // peersize(MAX_KERNEL_SIZE) rounded up to the next multiple of 4 528 int maxbufsize = getPeerSize(MAX_KERNEL_SIZE); 529 maxbufsize = (maxbufsize + 3) & (~3); 530 weights = BufferUtil.newFloatBuffer(maxbufsize); 531 } 532 weights.clear(); 533 for (int i = 0; i < ik.length; i++) { 534 weights.put((float) (ik[i] / sum)); 535 } 536 int limit = getPeerSize(ik.length); 537 while (weights.position() < limit) { 538 weights.put(0f); 539 } 540 weights.limit(limit); 541 weights.rewind(); 542 } 543 544 @Override 545 public int getInputKernelSize(int pass) { 546 float size = (pass == 0) ? inputSizeH : inputSizeV; 547 if (size < 1.0f) size = 1.0f; 548 int klen = ((int) Math.ceil(size)) | 1; 549 int totalklen = 1; 550 for (int p = 0; p < blurPasses; p++) { 551 totalklen += klen - 1; 552 } 553 return totalklen; 554 } 555 556 @Override 557 public int getPassKernelSize() { 558 float size = passSize; 559 if (size < 1.0f) size = 1.0f; 560 int klen = ((int) Math.ceil(size)) | 1; 561 int totalklen = 1; 562 for (int p = 0; p < blurPasses; p++) { 563 totalklen += klen - 1; 564 } 565 return totalklen; 566 } 567 568 @Override 569 public boolean isNop() { 570 if (isShadow) return false; 571 return (blurPasses == 0 572 || (inputSizeH <= 1.0f && inputSizeV <= 1.0f)); 573 } 574 575 @Override 576 public boolean isPassNop() { 577 if (isShadow && validatedPass == 1) return false; 578 return (blurPasses == 0 || (passSize) <= 1.0f); 579 } 580 }