/* * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package com.sun.scenario.effect.impl.state; import com.sun.javafx.geom.Rectangle; import com.sun.javafx.geom.transform.BaseTransform; import com.sun.javafx.geom.transform.NoninvertibleTransformException; import com.sun.scenario.effect.Color4f; import com.sun.scenario.effect.Effect; import com.sun.scenario.effect.FilterContext; import com.sun.scenario.effect.Filterable; import com.sun.scenario.effect.ImageData; import com.sun.scenario.effect.impl.BufferUtil; import com.sun.scenario.effect.impl.EffectPeer; import com.sun.scenario.effect.impl.Renderer; import java.nio.FloatBuffer; /** * The RenderState for a box filter kernel that can be applied using a * standard linear convolution kernel. * A box filter has a size that represents how large of an area around a * given pixel should be averaged. If the size is 1.0 then just the pixel * itself should be averaged and the operation is a NOP. Values smaller * than that are automatically treated as 1.0/NOP. * For any odd size, the kernel weights the center pixel and an equal number * of pixels on either side of it equally, so the weights for size 2N+1 are: * [ {N copes of 1.0} 1.0 {N more copies of 1.0} ] * As the size grows past that integer size, we must then add another kernel * weight entry on both sides of the existing array of 1.0 weights and give * them a fractional weight of half of the amount we exceeded the last odd * size, so the weights for some size (2N+1)+e (e for epsilon) are: * [ e/2.0 {2*N+1 copies of 1.0} e/2.0 ] * As the size continues to grow, when it reaches the next even size, we get * weights for size 2*N+1+1 to be: * [ 0.5 {2*N+1 copies of 1.0} 0.5 ] * and as the size continues to grow and approaches the next odd number, we * see that 2(N+1)+1 == 2N+2+1 == 2N+1 + 2, so (e) approaches 2 and the * numbers on each end of the weights array approach e/2.0 == 1.0 and we end * up back at the pattern for an odd size again: * [ 1.0 {2*N+1 copies of 1.0} 1.0 ] * * *************************** * SOFTWARE LIMITATION CAVEAT: * *************************** * * Note that the highly optimized software filters for BoxBlur/Shadow will * actually do a very optimized "running sum" operation that is only currently * implemented for equal weighted kernels. Also, until recently we had always * been rounding down the size by casting it to an integer at a high level (in * the FX layer peer synchronization code), so for now the software filters * may only implement a subset of the above theory and new optimized loops that * allow partial sums on the first and last values will need to be written. * Until then we will be rounding the sizes to an odd size, but only in the * sw loops. */ public class BoxRenderState extends LinearConvolveRenderState { public static final int MAX_BOX_SIZES[] = { getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 0), getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 1), getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 2), getMaxSizeForKernelSize(MAX_KERNEL_SIZE, 3), }; private final boolean isShadow; private final int blurPasses; private final float spread; private Color4f shadowColor; private EffectCoordinateSpace space; private BaseTransform inputtx; private BaseTransform resulttx; private final float inputSizeH; private final float inputSizeV; private final int spreadPass; private float samplevectors[]; private int validatedPass; private float passSize; private FloatBuffer weights; private float weightsValidSize; private float weightsValidSpread; private boolean swCompatible; // true if we can use the sw peers public static int getMaxSizeForKernelSize(int kernelSize, int blurPasses) { if (blurPasses == 0) { return Integer.MAX_VALUE; } // Kernel sizes are always odd, so if the supplied ksize is even then // we need to use ksize-1 to compute the max as that is actually the // largest kernel we will be able to produce that is no larger than // ksize for any given pass size. int passSize = (kernelSize - 1) | 1; passSize = ((passSize - 1) / blurPasses) | 1; assert getKernelSize(passSize, blurPasses) <= kernelSize; return passSize; } public static int getKernelSize(int passSize, int blurPasses) { int kernelSize = (passSize < 1) ? 1 : passSize; kernelSize = (kernelSize-1) * blurPasses + 1; kernelSize |= 1; return kernelSize; } public BoxRenderState(float hsize, float vsize, int blurPasses, float spread, boolean isShadow, Color4f shadowColor, BaseTransform filtertx) { /* * The operation starts as a description of the size of a (pair of) * box filter kernels measured relative to that user space coordinate * system and to be applied horizontally and vertically in that same * space. The presence of a filter transform can mean that the * direction we apply the box convolutions could change as well * as the new size of the box summations relative to the pixels * produced under that transform. * * Since the box filter is best described by the summation of a range * of discrete pixels horizontally and vertically, and since the * software algorithms vastly prefer applying the sums horizontally * and vertically to groups of whole pixels using an incremental "add * the next pixel at the front edge of the box and subtract the pixel * that is at the back edge of the box" technique, we will constrain * our box size to an integer size and attempt to force the inputs * to produce an axis aligned intermediate image. But, in the end, * we must be prepared for an arbitrary transform on the input image * which essentially means being able to back off to an arbitrary * invocation on the associated LinearConvolvePeer from the software * hand-written Box peers. * * We will track the direction and size of the box as we traverse * different coordinate spaces with the intent that eventually we * will perform the math of the convolution with weights calculated * for one sample per pixel in the indicated direction and applied as * closely to the intended final filter transform as we can achieve * with the following caveats (very similar to the caveats for the * more general GaussianRenderState): * * - There is a maximum kernel size that the hardware pixel shaders * can apply so we will try to keep the scaling of the filtered * pixels low enough that we do not exceed that data limitation. * * - Software vastly prefers to apply these weights along horizontal * and vertical vectors, but can apply them in an arbitrary direction * if need be by backing off to the generic LinearConvolvePeer. * * - If the box is large enough, then applying a smaller box kernel * to a downscaled input is close enough to applying the larger box * to a larger scaled input. Our maximum kernel size is large enough * for this effect to be hidden if we max out the kernel. * * - We can tell the inputs what transform we want them to use, but * they can always produce output under a different transform and * then return a result with a "post-processing" trasnform to be * applied (as we are doing here ourselves). Thus, we can plan * how we want to apply the convolution weights and samples here, * but we will have to reevaluate our actions when the actual * input pixels are created later. * * - We will try to blur at a nice axis-aligned orientation (which is * preferred for the software versions of the shaders) and perform * any rotation and skewing in the final post-processing result * transform as that amount of blurring will quite effectively cover * up any distortion that would occur by not rendering at the * appropriate angles. * * To achieve this we start out with untransformed sample vectors * which are unit vectors along the X and Y axes. We transform them * into the requested filter space, adjust the kernel size and see * if we can support that kernel size. If it is too large of a * projected kernel, then we request the input at a smaller scale * and perform a maximum kernel convolution on it and then indicate * that this result will need to be scaled by the caller. When this * method is done we will have computed what we need to do to the * input pixels when they come in if the inputtx was honored, otherwise * we may have to adjust the values further in {@link @validateInput()}. */ this.isShadow = isShadow; this.shadowColor = shadowColor; this.spread = spread; this.blurPasses = blurPasses; double txScaleX = Math.hypot(filtertx.getMxx(), filtertx.getMyx()); double txScaleY = Math.hypot(filtertx.getMxy(), filtertx.getMyy()); float fSizeH = (float) (hsize * txScaleX); float fSizeV = (float) (vsize * txScaleY); int maxPassSize = MAX_BOX_SIZES[blurPasses]; if (fSizeH > maxPassSize) { txScaleX = maxPassSize / hsize; fSizeH = maxPassSize; } if (fSizeV > maxPassSize) { txScaleY = maxPassSize / vsize; fSizeV = maxPassSize; } this.inputSizeH = fSizeH; this.inputSizeV = fSizeV; this.spreadPass = (fSizeV > 1) ? 1 : 0; // We always want to use an unrotated space to do our filtering, so // we interpose our scaled-only space in all cases, but we do check // if it happens to be equivalent (ignoring translations) to the // original filtertx so we can avoid introducing extra layers of // transforms. boolean custom = (txScaleX != filtertx.getMxx() || 0.0 != filtertx.getMyx() || txScaleY != filtertx.getMyy() || 0.0 != filtertx.getMxy()); if (custom) { this.space = EffectCoordinateSpace.CustomSpace; this.inputtx = BaseTransform.getScaleInstance(txScaleX, txScaleY); this.resulttx = filtertx .copy() .deriveWithScale(1.0 / txScaleX, 1.0 / txScaleY, 1.0); } else { this.space = EffectCoordinateSpace.RenderSpace; this.inputtx = filtertx; this.resulttx = BaseTransform.IDENTITY_TRANSFORM; } // assert inputtx.mxy == inputtx.myx == 0.0 } public int getBoxPixelSize(int pass) { float size = passSize; if (size < 1.0f) size = 1.0f; int boxsize = ((int) Math.ceil(size)) | 1; return boxsize; } public int getBlurPasses() { return blurPasses; } public float getSpread() { return spread; } @Override public boolean isShadow() { return isShadow; } @Override public Color4f getShadowColor() { return shadowColor; } @Override public float[] getPassShadowColorComponents() { return (validatedPass == 0) ? BLACK_COMPONENTS : shadowColor.getPremultipliedRGBComponents(); } @Override public EffectCoordinateSpace getEffectTransformSpace() { return space; } @Override public BaseTransform getInputTransform(BaseTransform filterTransform) { return inputtx; } @Override public BaseTransform getResultTransform(BaseTransform filterTransform) { return resulttx; } @Override public EffectPeer getPassPeer(Renderer r, FilterContext fctx) { if (isPassNop()) { return null; } int ksize = getPassKernelSize(); int psize = getPeerSize(ksize); Effect.AccelType actype = r.getAccelType(); String name; switch (actype) { case NONE: case SIMD: if (swCompatible && spread == 0.0f) { name = isShadow() ? "BoxShadow" : "BoxBlur"; break; } /* FALLS THROUGH */ default: name = isShadow() ? "LinearConvolveShadow" : "LinearConvolve"; break; } EffectPeer peer = r.getPeerInstance(fctx, name, psize); return peer; } @Override public Rectangle getInputClip(int i, Rectangle filterClip) { if (filterClip != null) { int klenh = ((int) Math.ceil(Math.max(inputSizeH, 1.0))) | 1; int klenv = ((int) Math.ceil(Math.max(inputSizeV, 1.0))) | 1; if ((klenh | klenv) > 1) { filterClip = new Rectangle(filterClip); // We actually want to grow them by (klen-1)/2, but since we // have forced the klen sizes to be odd above, a simple integer // divide by 2 is enough... filterClip.grow(klenh/2, klenv/2); } } return filterClip; } @Override public ImageData validatePassInput(ImageData src, int pass) { this.validatedPass = pass; BaseTransform srcTx = src.getTransform(); samplevectors = new float[2]; samplevectors[pass] = 1.0f; float iSize = (pass == 0) ? inputSizeH : inputSizeV; if (srcTx.isTranslateOrIdentity()) { this.swCompatible = true; this.passSize = iSize; } else { // The input produced a texture that requires transformation, // reevaluate our box sizes. // First (inverse) transform our sample vectors from the intended // srcTx space back into the actual pixel space of the src texture. // Then evaluate their length and attempt to absorb as much of any // implicit scaling that would happen into our final pixelSizes, // but if we overflow the maximum supportable pass size then we will // just have to sample sparsely with a longer than unit vector. // REMIND: we should also downsample the texture by powers of // 2 if our sampling will be more sparse than 1 sample per 2 // pixels. try { srcTx.inverseDeltaTransform(samplevectors, 0, samplevectors, 0, 1); } catch (NoninvertibleTransformException ex) { this.passSize = 0.0f; samplevectors[0] = samplevectors[1] = 0.0f; this.swCompatible = true; return src; } double srcScale = Math.hypot(samplevectors[0], samplevectors[1]); float pSize = (float) (iSize * srcScale); pSize *= srcScale; int maxPassSize = MAX_BOX_SIZES[blurPasses]; if (pSize > maxPassSize) { pSize = maxPassSize; srcScale = maxPassSize / iSize; } this.passSize = pSize; // For a pixelSize that was less than maxPassSize, the following // lines renormalize the un-transformed vector back into a unit // vector in the proper direction and we absorbed its length // into the pixelSize that we will apply for the box filter weights. // If we clipped the pixelSize to maxPassSize, then it will not // actually end up as a unit vector, but it will represent the // proper sampling deltas for the indicated box size (which should // be maxPassSize in that case). samplevectors[0] /= srcScale; samplevectors[1] /= srcScale; // If we are still sampling by an axis aligned unit vector, then the // optimized software filters can still do their "incremental sum" // magic. // REMIND: software loops could actually do an infinitely sized // kernel with only memory requirements getting in the way, but // the values being tested here are constrained by the limits of // the hardware peers. It is not clear how to fix this since we // have to choose how to proceed before we have enough information // to know if the inputs will be cooperative enough to assume // software limits, and then once we get here, we may have already // constrained ourselves into a situation where we must use the // hardware peers. Still, there may be more "fighting" we can do // to hold on to compatibility with the software loops perhaps? Rectangle srcSize = src.getUntransformedBounds(); if (pass == 0) { this.swCompatible = nearOne(samplevectors[0], srcSize.width) && nearZero(samplevectors[1], srcSize.width); } else { this.swCompatible = nearZero(samplevectors[0], srcSize.height) && nearOne(samplevectors[1], srcSize.height); } } Filterable f = src.getUntransformedImage(); samplevectors[0] /= f.getPhysicalWidth(); samplevectors[1] /= f.getPhysicalHeight(); return src; } @Override public Rectangle getPassResultBounds(Rectangle srcdimension) { // Note that the pass vector and the pass radius may be adjusted for // a transformed input, but our output will be in the untransformed // "filter" coordinate space so we need to use the "input" values that // are in that same coordinate space. Rectangle ret = new Rectangle(srcdimension); if (validatedPass == 0) { ret.grow(getInputKernelSize(0) / 2, 0); } else { ret.grow(0, getInputKernelSize(1) / 2); } return ret; } @Override public float[] getPassVector() { float xoff = samplevectors[0]; float yoff = samplevectors[1]; int ksize = getPassKernelSize(); int center = ksize / 2; float ret[] = new float[4]; ret[0] = xoff; ret[1] = yoff; ret[2] = -center * xoff; ret[3] = -center * yoff; return ret; } @Override public int getPassWeightsArrayLength() { validateWeights(); return weights.limit() / 4; } @Override public FloatBuffer getPassWeights() { validateWeights(); weights.rewind(); return weights; } private void validateWeights() { float pSize; if (blurPasses == 0) { pSize = 1.0f; } else { pSize = passSize; // 1.0f is the minimum size and is a NOP (each pixel averaged // over itself) if (pSize < 1.0f) pSize = 1.0f; } float passSpread = (validatedPass == spreadPass) ? spread : 0f; if (weights != null && weightsValidSize == pSize && weightsValidSpread == passSpread) { return; } // round klen up to a full pixel size and make sure it is odd so // that we center the kernel around each pixel center (1.0 of the // total size/weight is centered on the current pixel and then // the remainder is split (size-1.0)/2 on each side. // If the size is 2, then we don't want to average each pair of // pixels together (weights: 0.5, 0.5), instead we want to take each // pixel and average in half of each of its neighbors with it // (weights: 0.25, 0.5, 0.25). int klen = ((int) Math.ceil(pSize)) | 1; int totalklen = klen; for (int p = 1; p < blurPasses; p++) { totalklen += klen - 1; } double ik[] = new double[totalklen]; for (int i = 0; i < klen; i++) { ik[i] = 1.0; } // The sum of the ik[] array is now klen, but we want the sum to // be size. The worst case difference will be less than 2.0 since // the klen length is the ceil of the actual size possibly bumped up // to an odd number. Thus it can have been bumped up by no more than // 2.0. If there is an excess, we need to take half of it out of each // of the two end weights (first and last). double excess = klen - pSize; if (excess > 0.0) { // assert (excess * 0.5 < 1.0) ik[0] = ik[klen-1] = 1.0 - excess * 0.5; } int filledklen = klen; for (int p = 1; p < blurPasses; p++) { filledklen += klen - 1; int i = filledklen - 1; while (i > klen) { double sum = ik[i]; for (int k = 1; k < klen; k++) { sum += ik[i-k]; } ik[i--] = sum; } while (i > 0) { double sum = ik[i]; for (int k = 0; k < i; k++) { sum += ik[k]; } ik[i--] = sum; } } // assert (filledklen == totalklen == ik.length) double sum = 0.0; for (int i = 0; i < ik.length; i++) { sum += ik[i]; } // We need to apply the spread on only one pass // Prefer pass1 if r1 is not trivial // Otherwise use pass 0 so that it doesn't disappear sum += (1.0 - sum) * passSpread; if (weights == null) { // peersize(MAX_KERNEL_SIZE) rounded up to the next multiple of 4 int maxbufsize = getPeerSize(MAX_KERNEL_SIZE); maxbufsize = (maxbufsize + 3) & (~3); weights = BufferUtil.newFloatBuffer(maxbufsize); } weights.clear(); for (int i = 0; i < ik.length; i++) { weights.put((float) (ik[i] / sum)); } int limit = getPeerSize(ik.length); while (weights.position() < limit) { weights.put(0f); } weights.limit(limit); weights.rewind(); } @Override public int getInputKernelSize(int pass) { float size = (pass == 0) ? inputSizeH : inputSizeV; if (size < 1.0f) size = 1.0f; int klen = ((int) Math.ceil(size)) | 1; int totalklen = 1; for (int p = 0; p < blurPasses; p++) { totalklen += klen - 1; } return totalklen; } @Override public int getPassKernelSize() { float size = passSize; if (size < 1.0f) size = 1.0f; int klen = ((int) Math.ceil(size)) | 1; int totalklen = 1; for (int p = 0; p < blurPasses; p++) { totalklen += klen - 1; } return totalklen; } @Override public boolean isNop() { if (isShadow) return false; return (blurPasses == 0 || (inputSizeH <= 1.0f && inputSizeV <= 1.0f)); } @Override public boolean isPassNop() { if (isShadow && validatedPass == 1) return false; return (blurPasses == 0 || (passSize) <= 1.0f); } }