1 /* 2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.scenario.effect.impl.state; 27 28 import com.sun.javafx.geom.Rectangle; 29 import com.sun.javafx.geom.transform.BaseTransform; 30 import com.sun.javafx.geom.transform.NoninvertibleTransformException; 31 import com.sun.scenario.effect.Color4f; 32 import com.sun.scenario.effect.Effect; 33 import com.sun.scenario.effect.FilterContext; 34 import com.sun.scenario.effect.Filterable; 35 import com.sun.scenario.effect.ImageData; 36 import com.sun.scenario.effect.impl.BufferUtil; 37 import com.sun.scenario.effect.impl.EffectPeer; 38 import com.sun.scenario.effect.impl.Renderer; 39 import java.nio.FloatBuffer; 40 41 /** 42 * The RenderState for a box filter kernel that can be applied using a 43 * standard linear convolution kernel. 44 * A box filter has a size that represents how large of an area around a 45 * given pixel should be averaged. If the size is 1.0 then just the pixel 46 * itself should be averaged and the operation is a NOP. Values smaller 47 * than that are automatically treated as 1.0/NOP. 48 * For any odd size, the kernel weights the center pixel and an equal number 49 * of pixels on either side of it equally, so the weights for size 2N+1 are: 50 * [ {N copes of 1.0} 1.0 {N more copies of 1.0} ] 51 * As the size grows past that integer size, we must then add another kernel 52 * weight entry on both sides of the existing array of 1.0 weights and give 53 * them a fractional weight of half of the amount we exceeded the last odd 54 * size, so the weights for some size (2N+1)+e (e for epsilon) are: 55 * [ e/2.0 {2*N+1 copies of 1.0} e/2.0 ] 56 * As the size continues to grow, when it reaches the next even size, we get 57 * weights for size 2*N+1+1 to be: 58 * [ 0.5 {2*N+1 copies of 1.0} 0.5 ] 59 * and as the size continues to grow and approaches the next odd number, we 60 * see that 2(N+1)+1 == 2N+2+1 == 2N+1 + 2, so (e) approaches 2 and the 61 * numbers on each end of the weights array approach e/2.0 == 1.0 and we end 62 * up back at the pattern for an odd size again: 63 * [ 1.0 {2*N+1 copies of 1.0} 1.0 ] 64 * 65 * *************************** 66 * SOFTWARE LIMITATION CAVEAT: 67 * *************************** 68 * 69 * Note that the highly optimized software filters for BoxBlur/Shadow will 70 * actually do a very optimized "running sum" operation that is only currently 71 * implemented for equal weighted kernels. Also, until recently we had always 72 * been rounding down the size by casting it to an integer at a high level (in 73 * the FX layer peer synchronization code), so for now the software filters 74 * may only implement a subset of the above theory and new optimized loops that 75 * allow partial sums on the first and last values will need to be written. 76 * Until then we will be rounding the sizes to an odd size, but only in the 77 * sw loops. 78 */ 79 public class BoxRenderState extends LinearConvolveRenderState { 80 public static final int MAX_BOX_SIZES[] = { 81 getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 0), 82 getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 1), 83 getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 2), 84 getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 3), 85 }; 86 87 private final boolean isShadow; 88 private final int blurPasses; 89 private final float spread; 90 private Color4f shadowColor; 91 92 private EffectCoordinateSpace space; 93 private BaseTransform inputtx; 94 private BaseTransform resulttx; 95 private final float inputSizeH; 96 private final float inputSizeV; 97 private final int spreadPass; 98 private float samplevectors[]; 99 100 private int validatedPass; 101 private float passSize; 102 private FloatBuffer weights; 103 private float weightsValidSize; 104 private float weightsValidSpread; 105 private boolean swCompatible; // true if we can use the sw peers 106 107 public static int getMaxSizeForKernelSize(int kernelSize, int blurPasses) { 108 if (blurPasses == 0) { 109 return Integer.MAX_VALUE; 110 } 111 // Kernel sizes are always odd, so if the supplied ksize is even then 112 // we need to use ksize-1 to compute the max as that is actually the 113 // largest kernel we will be able to produce that is no larger than 114 // ksize for any given pass size. 115 int passSize = (kernelSize - 1) | 1; 116 passSize = ((passSize - 1) / blurPasses) | 1; 117 assert getKernelSize(passSize, blurPasses) <= kernelSize; 118 return passSize; 119 } 120 121 public static int getKernelSize(int passSize, int blurPasses) { 122 int kernelSize = (passSize < 1) ? 1 : passSize; 123 kernelSize = (kernelSize-1) * blurPasses + 1; 124 kernelSize |= 1; 125 return kernelSize; 126 } 127 128 public BoxRenderState(float hsize, float vsize, int blurPasses, float spread, 129 boolean isShadow, Color4f shadowColor, BaseTransform filtertx) 130 { 131 /* 132 * The operation starts as a description of the size of a (pair of) 133 * box filter kernels measured relative to that user space coordinate 134 * system and to be applied horizontally and vertically in that same 135 * space. The presence of a filter transform can mean that the 136 * direction we apply the box convolutions could change as well 137 * as the new size of the box summations relative to the pixels 138 * produced under that transform. 139 * 140 * Since the box filter is best described by the summation of a range 141 * of discrete pixels horizontally and vertically, and since the 142 * software algorithms vastly prefer applying the sums horizontally 143 * and vertically to groups of whole pixels using an incremental "add 144 * the next pixel at the front edge of the box and subtract the pixel 145 * that is at the back edge of the box" technique, we will constrain 146 * our box size to an integer size and attempt to force the inputs 147 * to produce an axis aligned intermediate image. But, in the end, 148 * we must be prepared for an arbitrary transform on the input image 149 * which essentially means being able to back off to an arbitrary 150 * invocation on the associated LinearConvolvePeer from the software 151 * hand-written Box peers. 152 * 153 * We will track the direction and size of the box as we traverse 154 * different coordinate spaces with the intent that eventually we 155 * will perform the math of the convolution with weights calculated 156 * for one sample per pixel in the indicated direction and applied as 157 * closely to the intended final filter transform as we can achieve 158 * with the following caveats (very similar to the caveats for the 159 * more general GaussianRenderState): 160 * 161 * - There is a maximum kernel size that the hardware pixel shaders 162 * can apply so we will try to keep the scaling of the filtered 163 * pixels low enough that we do not exceed that data limitation. 164 * 165 * - Software vastly prefers to apply these weights along horizontal 166 * and vertical vectors, but can apply them in an arbitrary direction 167 * if need be by backing off to the generic LinearConvolvePeer. 168 * 169 * - If the box is large enough, then applying a smaller box kernel 170 * to a downscaled input is close enough to applying the larger box 171 * to a larger scaled input. Our maximum kernel size is large enough 172 * for this effect to be hidden if we max out the kernel. 173 * 174 * - We can tell the inputs what transform we want them to use, but 175 * they can always produce output under a different transform and 176 * then return a result with a "post-processing" trasnform to be 177 * applied (as we are doing here ourselves). Thus, we can plan 178 * how we want to apply the convolution weights and samples here, 179 * but we will have to reevaluate our actions when the actual 180 * input pixels are created later. 181 * 182 * - We will try to blur at a nice axis-aligned orientation (which is 183 * preferred for the software versions of the shaders) and perform 184 * any rotation and skewing in the final post-processing result 185 * transform as that amount of blurring will quite effectively cover 186 * up any distortion that would occur by not rendering at the 187 * appropriate angles. 188 * 189 * To achieve this we start out with untransformed sample vectors 190 * which are unit vectors along the X and Y axes. We transform them 191 * into the requested filter space, adjust the kernel size and see 192 * if we can support that kernel size. If it is too large of a 193 * projected kernel, then we request the input at a smaller scale 194 * and perform a maximum kernel convolution on it and then indicate 195 * that this result will need to be scaled by the caller. When this 196 * method is done we will have computed what we need to do to the 197 * input pixels when they come in if the inputtx was honored, otherwise 198 * we may have to adjust the values further in {@link @validateInput()}. 199 */ 200 this.isShadow = isShadow; 201 this.shadowColor = shadowColor; 202 this.spread = spread; 203 this.blurPasses = blurPasses; 204 double txScaleX = Math.hypot(filtertx.getMxx(), filtertx.getMyx()); 205 double txScaleY = Math.hypot(filtertx.getMxy(), filtertx.getMyy()); 206 float fSizeH = (float) (hsize * txScaleX); 207 float fSizeV = (float) (vsize * txScaleY); 208 int maxPassSize = MAX_BOX_SIZES[blurPasses]; 209 if (fSizeH > maxPassSize) { 210 txScaleX = maxPassSize / hsize; 211 fSizeH = maxPassSize; 212 } 213 if (fSizeV > maxPassSize) { 214 txScaleY = maxPassSize / vsize; 215 fSizeV = maxPassSize; 216 } 217 this.inputSizeH = fSizeH; 218 this.inputSizeV = fSizeV; 219 this.spreadPass = (fSizeV > 1) ? 1 : 0; 220 // We always want to use an unrotated space to do our filtering, so 221 // we interpose our scaled-only space in all cases, but we do check 222 // if it happens to be equivalent (ignoring translations) to the 223 // original filtertx so we can avoid introducing extra layers of 224 // transforms. 225 boolean custom = (txScaleX != filtertx.getMxx() || 226 0.0 != filtertx.getMyx() || 227 txScaleY != filtertx.getMyy() || 228 0.0 != filtertx.getMxy()); 229 if (custom) { 230 this.space = EffectCoordinateSpace.CustomSpace; 231 this.inputtx = BaseTransform.getScaleInstance(txScaleX, txScaleY); 232 this.resulttx = filtertx 233 .copy() 234 .deriveWithScale(1.0 / txScaleX, 1.0 / txScaleY, 1.0); 235 } else { 236 this.space = EffectCoordinateSpace.RenderSpace; 237 this.inputtx = filtertx; 238 this.resulttx = BaseTransform.IDENTITY_TRANSFORM; 239 } 240 // assert inputtx.mxy == inputtx.myx == 0.0 241 } 242 243 public int getBoxPixelSize(int pass) { 244 float size = passSize; 245 if (size < 1.0f) size = 1.0f; 246 int boxsize = ((int) Math.ceil(size)) | 1; 247 return boxsize; 248 } 249 250 public int getBlurPasses() { 251 return blurPasses; 252 } 253 254 public float getSpread() { 255 return spread; 256 } 257 258 @Override 259 public boolean isShadow() { 260 return isShadow; 261 } 262 263 @Override 264 public Color4f getShadowColor() { 265 return shadowColor; 266 } 267 268 @Override 269 public float[] getPassShadowColorComponents() { 270 return (validatedPass == 0) 271 ? BLACK_COMPONENTS 272 : shadowColor.getPremultipliedRGBComponents(); 273 } 274 275 @Override 276 public EffectCoordinateSpace getEffectTransformSpace() { 277 return space; 278 } 279 280 @Override 281 public BaseTransform getInputTransform(BaseTransform filterTransform) { 282 return inputtx; 283 } 284 285 @Override 286 public BaseTransform getResultTransform(BaseTransform filterTransform) { 287 return resulttx; 288 } 289 290 @Override 291 public EffectPeer getPassPeer(Renderer r, FilterContext fctx) { 292 if (isPassNop()) { 293 return null; 294 } 295 int ksize = getPassKernelSize(); 296 int psize = getPeerSize(ksize); 297 Effect.AccelType actype = r.getAccelType(); 298 String name; 299 switch (actype) { 300 case NONE: 301 case SIMD: 302 if (swCompatible && spread == 0.0f) { 303 name = isShadow() ? "BoxShadow" : "BoxBlur"; 304 break; 305 } 306 /* FALLS THROUGH */ 307 default: 308 name = isShadow() ? "LinearConvolveShadow" : "LinearConvolve"; 309 break; 310 } 311 EffectPeer peer = r.getPeerInstance(fctx, name, psize); 312 return peer; 313 } 314 315 @Override 316 public Rectangle getInputClip(int i, Rectangle filterClip) { 317 if (filterClip != null) { 318 int klenh = ((int) Math.ceil(Math.max(inputSizeH, 1.0))) | 1; 319 int klenv = ((int) Math.ceil(Math.max(inputSizeV, 1.0))) | 1; 320 if ((klenh | klenv) > 1) { 321 filterClip = new Rectangle(filterClip); 322 // We actually want to grow them by (klen-1)/2, but since we 323 // have forced the klen sizes to be odd above, a simple integer 324 // divide by 2 is enough... 325 filterClip.grow(klenh/2, klenv/2); 326 } 327 } 328 return filterClip; 329 } 330 331 @Override 332 public ImageData validatePassInput(ImageData src, int pass) { 333 this.validatedPass = pass; 334 BaseTransform srcTx = src.getTransform(); 335 samplevectors = new float[2]; 336 samplevectors[pass] = 1.0f; 337 float iSize = (pass == 0) ? inputSizeH : inputSizeV; 338 if (srcTx.isTranslateOrIdentity()) { 339 this.swCompatible = true; 340 this.passSize = iSize; 341 } else { 342 // The input produced a texture that requires transformation, 343 // reevaluate our box sizes. 344 // First (inverse) transform our sample vectors from the intended 345 // srcTx space back into the actual pixel space of the src texture. 346 // Then evaluate their length and attempt to absorb as much of any 347 // implicit scaling that would happen into our final pixelSizes, 348 // but if we overflow the maximum supportable pass size then we will 349 // just have to sample sparsely with a longer than unit vector. 350 // REMIND: we should also downsample the texture by powers of 351 // 2 if our sampling will be more sparse than 1 sample per 2 352 // pixels. 353 try { 354 srcTx.inverseDeltaTransform(samplevectors, 0, samplevectors, 0, 1); 355 } catch (NoninvertibleTransformException ex) { 356 this.passSize = 0.0f; 357 samplevectors[0] = samplevectors[1] = 0.0f; 358 this.swCompatible = true; 359 return src; 360 } 361 double srcScale = Math.hypot(samplevectors[0], samplevectors[1]); 362 float pSize = (float) (iSize * srcScale); 363 pSize *= srcScale; 364 int maxPassSize = MAX_BOX_SIZES[blurPasses]; 365 if (pSize > maxPassSize) { 366 pSize = maxPassSize; 367 srcScale = maxPassSize / iSize; 368 } 369 this.passSize = pSize; 370 // For a pixelSize that was less than maxPassSize, the following 371 // lines renormalize the un-transformed vector back into a unit 372 // vector in the proper direction and we absorbed its length 373 // into the pixelSize that we will apply for the box filter weights. 374 // If we clipped the pixelSize to maxPassSize, then it will not 375 // actually end up as a unit vector, but it will represent the 376 // proper sampling deltas for the indicated box size (which should 377 // be maxPassSize in that case). 378 samplevectors[0] /= srcScale; 379 samplevectors[1] /= srcScale; 380 // If we are still sampling by an axis aligned unit vector, then the 381 // optimized software filters can still do their "incremental sum" 382 // magic. 383 // REMIND: software loops could actually do an infinitely sized 384 // kernel with only memory requirements getting in the way, but 385 // the values being tested here are constrained by the limits of 386 // the hardware peers. It is not clear how to fix this since we 387 // have to choose how to proceed before we have enough information 388 // to know if the inputs will be cooperative enough to assume 389 // software limits, and then once we get here, we may have already 390 // constrained ourselves into a situation where we must use the 391 // hardware peers. Still, there may be more "fighting" we can do 392 // to hold on to compatibility with the software loops perhaps? 393 Rectangle srcSize = src.getUntransformedBounds(); 394 if (pass == 0) { 395 this.swCompatible = nearOne(samplevectors[0], srcSize.width) 396 && nearZero(samplevectors[1], srcSize.width); 397 } else { 398 this.swCompatible = nearZero(samplevectors[0], srcSize.height) 399 && nearOne(samplevectors[1], srcSize.height); 400 } 401 } 402 Filterable f = src.getUntransformedImage(); 403 samplevectors[0] /= f.getPhysicalWidth(); 404 samplevectors[1] /= f.getPhysicalHeight(); 405 return src; 406 } 407 408 @Override 409 public Rectangle getPassResultBounds(Rectangle srcdimension) { 410 // Note that the pass vector and the pass radius may be adjusted for 411 // a transformed input, but our output will be in the untransformed 412 // "filter" coordinate space so we need to use the "input" values that 413 // are in that same coordinate space. 414 Rectangle ret = new Rectangle(srcdimension); 415 if (validatedPass == 0) { 416 ret.grow(getInputKernelSize(0) / 2, 0); 417 } else { 418 ret.grow(0, getInputKernelSize(1) / 2); 419 } 420 return ret; 421 } 422 423 @Override 424 public float[] getPassVector() { 425 float xoff = samplevectors[0]; 426 float yoff = samplevectors[1]; 427 int ksize = getPassKernelSize(); 428 int center = ksize / 2; 429 float ret[] = new float[4]; 430 ret[0] = xoff; 431 ret[1] = yoff; 432 ret[2] = -center * xoff; 433 ret[3] = -center * yoff; 434 return ret; 435 } 436 437 @Override 438 public int getPassWeightsArrayLength() { 439 validateWeights(); 440 return weights.limit() / 4; 441 } 442 443 @Override 444 public FloatBuffer getPassWeights() { 445 validateWeights(); 446 weights.rewind(); 447 return weights; 448 } 449 450 private void validateWeights() { 451 float pSize; 452 if (blurPasses == 0) { 453 pSize = 1.0f; 454 } else { 455 pSize = passSize; 456 // 1.0f is the minimum size and is a NOP (each pixel averaged 457 // over itself) 458 if (pSize < 1.0f) pSize = 1.0f; 459 } 460 float passSpread = (validatedPass == spreadPass) ? spread : 0f; 461 if (weights != null && 462 weightsValidSize == pSize && 463 weightsValidSpread == passSpread) 464 { 465 return; 466 } 467 468 // round klen up to a full pixel size and make sure it is odd so 469 // that we center the kernel around each pixel center (1.0 of the 470 // total size/weight is centered on the current pixel and then 471 // the remainder is split (size-1.0)/2 on each side. 472 // If the size is 2, then we don't want to average each pair of 473 // pixels together (weights: 0.5, 0.5), instead we want to take each 474 // pixel and average in half of each of its neighbors with it 475 // (weights: 0.25, 0.5, 0.25). 476 int klen = ((int) Math.ceil(pSize)) | 1; 477 int totalklen = klen; 478 for (int p = 1; p < blurPasses; p++) { 479 totalklen += klen - 1; 480 } 481 double ik[] = new double[totalklen]; 482 for (int i = 0; i < klen; i++) { 483 ik[i] = 1.0; 484 } 485 // The sum of the ik[] array is now klen, but we want the sum to 486 // be size. The worst case difference will be less than 2.0 since 487 // the klen length is the ceil of the actual size possibly bumped up 488 // to an odd number. Thus it can have been bumped up by no more than 489 // 2.0. If there is an excess, we need to take half of it out of each 490 // of the two end weights (first and last). 491 double excess = klen - pSize; 492 if (excess > 0.0) { 493 // assert (excess * 0.5 < 1.0) 494 ik[0] = ik[klen-1] = 1.0 - excess * 0.5; 495 } 496 int filledklen = klen; 497 for (int p = 1; p < blurPasses; p++) { 498 filledklen += klen - 1; 499 int i = filledklen - 1; 500 while (i > klen) { 501 double sum = ik[i]; 502 for (int k = 1; k < klen; k++) { 503 sum += ik[i-k]; 504 } 505 ik[i--] = sum; 506 } 507 while (i > 0) { 508 double sum = ik[i]; 509 for (int k = 0; k < i; k++) { 510 sum += ik[k]; 511 } 512 ik[i--] = sum; 513 } 514 } 515 // assert (filledklen == totalklen == ik.length) 516 double sum = 0.0; 517 for (int i = 0; i < ik.length; i++) { 518 sum += ik[i]; 519 } 520 // We need to apply the spread on only one pass 521 // Prefer pass1 if r1 is not trivial 522 // Otherwise use pass 0 so that it doesn't disappear 523 sum += (1.0 - sum) * passSpread; 524 525 if (weights == null) { 526 // peersize(MAX_KERNEL_SIZE) rounded up to the next multiple of 4 527 int maxbufsize = getPeerSize(MAX_KERNEL_SIZE); 528 maxbufsize = (maxbufsize + 3) & (~3); 529 weights = BufferUtil.newFloatBuffer(maxbufsize); 530 } 531 weights.clear(); 532 for (int i = 0; i < ik.length; i++) { 533 weights.put((float) (ik[i] / sum)); 534 } 535 int limit = getPeerSize(ik.length); 536 while (weights.position() < limit) { 537 weights.put(0f); 538 } 539 weights.limit(limit); 540 weights.rewind(); 541 } 542 543 @Override 544 public int getInputKernelSize(int pass) { 545 float size = (pass == 0) ? inputSizeH : inputSizeV; 546 if (size < 1.0f) size = 1.0f; 547 int klen = ((int) Math.ceil(size)) | 1; 548 int totalklen = 1; 549 for (int p = 0; p < blurPasses; p++) { 550 totalklen += klen - 1; 551 } 552 return totalklen; 553 } 554 555 @Override 556 public int getPassKernelSize() { 557 float size = passSize; 558 if (size < 1.0f) size = 1.0f; 559 int klen = ((int) Math.ceil(size)) | 1; 560 int totalklen = 1; 561 for (int p = 0; p < blurPasses; p++) { 562 totalklen += klen - 1; 563 } 564 return totalklen; 565 } 566 567 @Override 568 public boolean isNop() { 569 if (isShadow) return false; 570 return (blurPasses == 0 571 || (inputSizeH <= 1.0f && inputSizeV <= 1.0f)); 572 } 573 574 @Override 575 public boolean isPassNop() { 576 if (isShadow && validatedPass == 1) return false; 577 return (blurPasses == 0 || (passSize) <= 1.0f); 578 } 579 }