1 /* 2 * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.loop.phases; 26 27 import static org.graalvm.compiler.core.common.GraalOptions.MaximumDesiredSize; 28 29 import java.util.ArrayList; 30 import java.util.Iterator; 31 import java.util.List; 32 33 import jdk.internal.vm.compiler.collections.EconomicMap; 34 import org.graalvm.compiler.core.common.RetryableBailoutException; 35 import org.graalvm.compiler.core.common.calc.CanonicalCondition; 36 import org.graalvm.compiler.debug.DebugContext; 37 import org.graalvm.compiler.graph.Graph.Mark; 38 import org.graalvm.compiler.graph.Node; 39 import org.graalvm.compiler.graph.Position; 40 import org.graalvm.compiler.loop.CountedLoopInfo; 41 import org.graalvm.compiler.loop.InductionVariable.Direction; 42 import org.graalvm.compiler.loop.LoopEx; 43 import org.graalvm.compiler.loop.LoopFragmentInside; 44 import org.graalvm.compiler.loop.LoopFragmentWhole; 45 import org.graalvm.compiler.nodeinfo.InputType; 46 import org.graalvm.compiler.nodes.AbstractBeginNode; 47 import org.graalvm.compiler.nodes.AbstractEndNode; 48 import org.graalvm.compiler.nodes.AbstractMergeNode; 49 import org.graalvm.compiler.nodes.BeginNode; 50 import org.graalvm.compiler.nodes.ControlSplitNode; 51 import org.graalvm.compiler.nodes.EndNode; 52 import org.graalvm.compiler.nodes.FixedNode; 53 import org.graalvm.compiler.nodes.FixedWithNextNode; 54 import org.graalvm.compiler.nodes.IfNode; 55 import org.graalvm.compiler.nodes.LogicNode; 56 import org.graalvm.compiler.nodes.LoopBeginNode; 57 import org.graalvm.compiler.nodes.LoopExitNode; 58 import org.graalvm.compiler.nodes.NodeView; 59 import org.graalvm.compiler.nodes.PhiNode; 60 import org.graalvm.compiler.nodes.SafepointNode; 61 import org.graalvm.compiler.nodes.StructuredGraph; 62 import org.graalvm.compiler.nodes.ValueNode; 63 import org.graalvm.compiler.nodes.calc.AddNode; 64 import org.graalvm.compiler.nodes.calc.CompareNode; 65 import org.graalvm.compiler.nodes.calc.ConditionalNode; 66 import org.graalvm.compiler.nodes.calc.IntegerLessThanNode; 67 import org.graalvm.compiler.nodes.extended.OpaqueNode; 68 import org.graalvm.compiler.nodes.extended.SwitchNode; 69 import org.graalvm.compiler.phases.common.CanonicalizerPhase; 70 import org.graalvm.compiler.phases.tiers.PhaseContext; 71 72 public abstract class LoopTransformations { 73 74 private LoopTransformations() { 75 // does not need to be instantiated 76 } 77 78 public static void peel(LoopEx loop) { 79 loop.inside().duplicate().insertBefore(loop); 80 loop.loopBegin().setLoopFrequency(Math.max(0.0, loop.loopBegin().loopFrequency() - 1)); 81 } 82 83 public static void fullUnroll(LoopEx loop, PhaseContext context, CanonicalizerPhase canonicalizer) { 84 // assert loop.isCounted(); //TODO (gd) strengthen : counted with known trip count 85 LoopBeginNode loopBegin = loop.loopBegin(); 86 StructuredGraph graph = loopBegin.graph(); 87 int initialNodeCount = graph.getNodeCount(); 88 while (!loopBegin.isDeleted()) { 89 Mark mark = graph.getMark(); 90 peel(loop); 91 canonicalizer.applyIncremental(graph, context, mark); 92 loop.invalidateFragments(); 93 if (graph.getNodeCount() > initialNodeCount + MaximumDesiredSize.getValue(graph.getOptions()) * 2) { 94 throw new RetryableBailoutException("FullUnroll : Graph seems to grow out of proportion"); 95 } 96 } 97 } 98 99 public static void unswitch(LoopEx loop, List<ControlSplitNode> controlSplitNodeSet) { 100 ControlSplitNode firstNode = controlSplitNodeSet.iterator().next(); 101 LoopFragmentWhole originalLoop = loop.whole(); 102 StructuredGraph graph = firstNode.graph(); 103 104 loop.loopBegin().incrementUnswitches(); 105 106 // create new control split out of loop 107 ControlSplitNode newControlSplit = (ControlSplitNode) firstNode.copyWithInputs(); 108 originalLoop.entryPoint().replaceAtPredecessor(newControlSplit); 109 110 /* 111 * The code below assumes that all of the control split nodes have the same successor 112 * structure, which should have been enforced by findUnswitchable. 113 */ 114 Iterator<Position> successors = firstNode.successorPositions().iterator(); 115 assert successors.hasNext(); 116 // original loop is used as first successor 117 Position firstPosition = successors.next(); 118 AbstractBeginNode originalLoopBegin = BeginNode.begin(originalLoop.entryPoint()); 119 firstPosition.set(newControlSplit, originalLoopBegin); 120 originalLoopBegin.setNodeSourcePosition(firstPosition.get(firstNode).getNodeSourcePosition()); 121 122 while (successors.hasNext()) { 123 Position position = successors.next(); 124 // create a new loop duplicate and connect it. 125 LoopFragmentWhole duplicateLoop = originalLoop.duplicate(); 126 AbstractBeginNode newBegin = BeginNode.begin(duplicateLoop.entryPoint()); 127 newBegin.setNodeSourcePosition(position.get(firstNode).getNodeSourcePosition()); 128 position.set(newControlSplit, newBegin); 129 130 // For each cloned ControlSplitNode, simplify the proper path 131 for (ControlSplitNode controlSplitNode : controlSplitNodeSet) { 132 ControlSplitNode duplicatedControlSplit = duplicateLoop.getDuplicatedNode(controlSplitNode); 133 if (duplicatedControlSplit.isAlive()) { 134 AbstractBeginNode survivingSuccessor = (AbstractBeginNode) position.get(duplicatedControlSplit); 135 survivingSuccessor.replaceAtUsages(InputType.Guard, newBegin); 136 graph.removeSplitPropagate(duplicatedControlSplit, survivingSuccessor); 137 } 138 } 139 } 140 // original loop is simplified last to avoid deleting controlSplitNode too early 141 for (ControlSplitNode controlSplitNode : controlSplitNodeSet) { 142 if (controlSplitNode.isAlive()) { 143 AbstractBeginNode survivingSuccessor = (AbstractBeginNode) firstPosition.get(controlSplitNode); 144 survivingSuccessor.replaceAtUsages(InputType.Guard, originalLoopBegin); 145 graph.removeSplitPropagate(controlSplitNode, survivingSuccessor); 146 } 147 } 148 149 // TODO (gd) probabilities need some amount of fixup.. (probably also in other transforms) 150 } 151 152 public static void partialUnroll(LoopEx loop, EconomicMap<LoopBeginNode, OpaqueNode> opaqueUnrolledStrides) { 153 assert loop.loopBegin().isMainLoop(); 154 loop.loopBegin().graph().getDebug().log("LoopPartialUnroll %s", loop); 155 156 LoopFragmentInside newSegment = loop.inside().duplicate(); 157 newSegment.insertWithinAfter(loop, opaqueUnrolledStrides); 158 159 } 160 161 // This function splits candidate loops into pre, main and post loops, 162 // dividing the iteration space to facilitate the majority of iterations 163 // being executed in a main loop, which will have RCE implemented upon it. 164 // The initial loop form is constrained to single entry/exit, but can have 165 // flow. The translation looks like: 166 // 167 // @formatter:off 168 // 169 // (Simple Loop entry) (Pre Loop Entry) 170 // | | 171 // (LoopBeginNode) (LoopBeginNode) 172 // | | 173 // (Loop Control Test)<------ ==> (Loop control Test)<------ 174 // / \ \ / \ \ 175 // (Loop Exit) (Loop Body) | (Loop Exit) (Loop Body) | 176 // | | | | | | 177 // (continue code) (Loop End) | if (M < length)* (Loop End) | 178 // \ / / \ \ / 179 // -----> / | -----> 180 // / if ( ... )* 181 // / / \ 182 // / / \ 183 // / / \ 184 // | / (Main Loop Entry) 185 // | | | 186 // | | (LoopBeginNode) 187 // | | | 188 // | | (Loop Control Test)<------ 189 // | | / \ \ 190 // | | (Loop Exit) (Loop Body) | 191 // \ \ | | | 192 // \ \ | (Loop End) | 193 // \ \ | \ / 194 // \ \ | ------> 195 // \ \ | 196 // (Main Loop Merge)* 197 // | 198 // (Post Loop Entry) 199 // | 200 // (LoopBeginNode) 201 // | 202 // (Loop Control Test)<----- 203 // / \ \ 204 // (Loop Exit) (Loop Body) | 205 // | | | 206 // (continue code) (Loop End) | 207 // \ / 208 // -----> 209 // 210 // Key: "*" = optional. 211 // @formatter:on 212 // 213 // The value "M" is the maximal value of the loop trip for the original 214 // loop. The value of "length" is applicable to the number of arrays found 215 // in the loop but is reduced if some or all of the arrays are known to be 216 // the same length as "M". The maximum number of tests can be equal to the 217 // number of arrays in the loop, where multiple instances of an array are 218 // subsumed into a single test for that arrays length. 219 // 220 // If the optional main loop entry tests are absent, the Pre Loop exit 221 // connects to the Main loops entry and there is no merge hanging off the 222 // main loops exit to converge flow from said tests. All split use data 223 // flow is mitigated through phi(s) in the main merge if present and 224 // passed through the main and post loop phi(s) from the originating pre 225 // loop with final phi(s) and data flow patched to the "continue code". 226 // The pre loop is constrained to one iteration for now and will likely 227 // be updated to produce vector alignment if applicable. 228 229 public static LoopBeginNode insertPrePostLoops(LoopEx loop) { 230 StructuredGraph graph = loop.loopBegin().graph(); 231 graph.getDebug().log("LoopTransformations.insertPrePostLoops %s", loop); 232 LoopFragmentWhole preLoop = loop.whole(); 233 CountedLoopInfo preCounted = loop.counted(); 234 IfNode preLimit = preCounted.getLimitTest(); 235 assert preLimit != null; 236 LoopBeginNode preLoopBegin = loop.loopBegin(); 237 LoopExitNode preLoopExitNode = preLoopBegin.getSingleLoopExit(); 238 FixedNode continuationNode = preLoopExitNode.next(); 239 240 // Each duplication is inserted after the original, ergo create the post loop first 241 LoopFragmentWhole mainLoop = preLoop.duplicate(); 242 LoopFragmentWhole postLoop = preLoop.duplicate(); 243 preLoopBegin.incrementSplits(); 244 preLoopBegin.incrementSplits(); 245 preLoopBegin.setPreLoop(); 246 graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, graph, "After duplication"); 247 LoopBeginNode mainLoopBegin = mainLoop.getDuplicatedNode(preLoopBegin); 248 mainLoopBegin.setMainLoop(); 249 LoopBeginNode postLoopBegin = postLoop.getDuplicatedNode(preLoopBegin); 250 postLoopBegin.setPostLoop(); 251 252 EndNode postEndNode = getBlockEndAfterLoopExit(postLoopBegin); 253 AbstractMergeNode postMergeNode = postEndNode.merge(); 254 LoopExitNode postLoopExitNode = postLoopBegin.getSingleLoopExit(); 255 256 // Update the main loop phi initialization to carry from the pre loop 257 for (PhiNode prePhiNode : preLoopBegin.phis()) { 258 PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode); 259 mainPhiNode.setValueAt(0, prePhiNode); 260 } 261 262 EndNode mainEndNode = getBlockEndAfterLoopExit(mainLoopBegin); 263 AbstractMergeNode mainMergeNode = mainEndNode.merge(); 264 AbstractEndNode postEntryNode = postLoopBegin.forwardEnd(); 265 266 // In the case of no Bounds tests, we just flow right into the main loop 267 AbstractBeginNode mainLandingNode = BeginNode.begin(postEntryNode); 268 LoopExitNode mainLoopExitNode = mainLoopBegin.getSingleLoopExit(); 269 mainLoopExitNode.setNext(mainLandingNode); 270 preLoopExitNode.setNext(mainLoopBegin.forwardEnd()); 271 272 // Add and update any phi edges as per merge usage as needed and update usages 273 processPreLoopPhis(loop, mainLoop, postLoop); 274 continuationNode.predecessor().clearSuccessors(); 275 postLoopExitNode.setNext(continuationNode); 276 cleanupMerge(postMergeNode, postLoopExitNode); 277 cleanupMerge(mainMergeNode, mainLandingNode); 278 279 // Change the preLoop to execute one iteration for now 280 updatePreLoopLimit(preCounted); 281 preLoopBegin.setLoopFrequency(1); 282 mainLoopBegin.setLoopFrequency(Math.max(0.0, mainLoopBegin.loopFrequency() - 2)); 283 postLoopBegin.setLoopFrequency(Math.max(0.0, postLoopBegin.loopFrequency() - 1)); 284 285 // The pre and post loops don't require safepoints at all 286 for (SafepointNode safepoint : preLoop.nodes().filter(SafepointNode.class)) { 287 graph.removeFixed(safepoint); 288 } 289 for (SafepointNode safepoint : postLoop.nodes().filter(SafepointNode.class)) { 290 graph.removeFixed(safepoint); 291 } 292 graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "InsertPrePostLoops %s", loop); 293 return mainLoopBegin; 294 } 295 296 /** 297 * Cleanup the merge and remove the predecessors too. 298 */ 299 private static void cleanupMerge(AbstractMergeNode mergeNode, AbstractBeginNode landingNode) { 300 for (EndNode end : mergeNode.cfgPredecessors().snapshot()) { 301 mergeNode.removeEnd(end); 302 end.safeDelete(); 303 } 304 mergeNode.prepareDelete(landingNode); 305 mergeNode.safeDelete(); 306 } 307 308 private static void processPreLoopPhis(LoopEx preLoop, LoopFragmentWhole mainLoop, LoopFragmentWhole postLoop) { 309 // process phis for the post loop 310 LoopBeginNode preLoopBegin = preLoop.loopBegin(); 311 for (PhiNode prePhiNode : preLoopBegin.phis()) { 312 PhiNode postPhiNode = postLoop.getDuplicatedNode(prePhiNode); 313 PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode); 314 postPhiNode.setValueAt(0, mainPhiNode); 315 316 // Build a work list to update the pre loop phis to the post loops phis 317 for (Node usage : prePhiNode.usages().snapshot()) { 318 if (usage == mainPhiNode) { 319 continue; 320 } 321 if (preLoop.isOutsideLoop(usage)) { 322 usage.replaceFirstInput(prePhiNode, postPhiNode); 323 } 324 } 325 } 326 for (Node node : preLoop.inside().nodes()) { 327 for (Node externalUsage : node.usages().snapshot()) { 328 if (preLoop.isOutsideLoop(externalUsage)) { 329 Node postUsage = postLoop.getDuplicatedNode(node); 330 assert postUsage != null; 331 externalUsage.replaceFirstInput(node, postUsage); 332 } 333 } 334 } 335 } 336 337 /** 338 * Find the end of the block following the LoopExit. 339 */ 340 private static EndNode getBlockEndAfterLoopExit(LoopBeginNode curLoopBegin) { 341 FixedNode node = curLoopBegin.getSingleLoopExit().next(); 342 // Find the last node after the exit blocks starts 343 return getBlockEnd(node); 344 } 345 346 private static EndNode getBlockEnd(FixedNode node) { 347 FixedNode curNode = node; 348 while (curNode instanceof FixedWithNextNode) { 349 curNode = ((FixedWithNextNode) curNode).next(); 350 } 351 return (EndNode) curNode; 352 } 353 354 private static void updatePreLoopLimit(CountedLoopInfo preCounted) { 355 // Update the pre loops limit test 356 // Make new limit one iteration 357 ValueNode newLimit = AddNode.add(preCounted.getStart(), preCounted.getCounter().strideNode(), NodeView.DEFAULT); 358 // Fetch the variable we are not replacing and configure the one we are 359 ValueNode ub = preCounted.getLimit(); 360 LogicNode entryCheck; 361 if (preCounted.getDirection() == Direction.Up) { 362 entryCheck = IntegerLessThanNode.create(newLimit, ub, NodeView.DEFAULT); 363 } else { 364 entryCheck = IntegerLessThanNode.create(ub, newLimit, NodeView.DEFAULT); 365 } 366 newLimit = ConditionalNode.create(entryCheck, newLimit, ub, NodeView.DEFAULT); 367 // Re-wire the condition with the new limit 368 CompareNode compareNode = (CompareNode) preCounted.getLimitTest().condition(); 369 compareNode.replaceFirstInput(ub, compareNode.graph().addOrUniqueWithInputs(newLimit)); 370 } 371 372 public static List<ControlSplitNode> findUnswitchable(LoopEx loop) { 373 List<ControlSplitNode> controls = null; 374 ValueNode invariantValue = null; 375 for (IfNode ifNode : loop.whole().nodes().filter(IfNode.class)) { 376 if (loop.isOutsideLoop(ifNode.condition())) { 377 if (controls == null) { 378 invariantValue = ifNode.condition(); 379 controls = new ArrayList<>(); 380 controls.add(ifNode); 381 } else if (ifNode.condition() == invariantValue) { 382 controls.add(ifNode); 383 } 384 } 385 } 386 if (controls == null) { 387 SwitchNode firstSwitch = null; 388 for (SwitchNode switchNode : loop.whole().nodes().filter(SwitchNode.class)) { 389 if (switchNode.successors().count() > 1 && loop.isOutsideLoop(switchNode.value())) { 390 if (controls == null) { 391 firstSwitch = switchNode; 392 invariantValue = switchNode.value(); 393 controls = new ArrayList<>(); 394 controls.add(switchNode); 395 } else if (switchNode.value() == invariantValue && firstSwitch.structureEquals(switchNode)) { 396 // Only collect switches which test the same values in the same order 397 controls.add(switchNode); 398 } 399 } 400 } 401 } 402 return controls; 403 } 404 405 public static boolean isUnrollableLoop(LoopEx loop) { 406 if (!loop.isCounted() || !loop.counted().getCounter().isConstantStride() || !loop.loop().getChildren().isEmpty()) { 407 return false; 408 } 409 assert loop.counted().getDirection() != null; 410 LoopBeginNode loopBegin = loop.loopBegin(); 411 LogicNode condition = loop.counted().getLimitTest().condition(); 412 if (!(condition instanceof CompareNode)) { 413 return false; 414 } 415 if (((CompareNode) condition).condition() == CanonicalCondition.EQ) { 416 condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s condition unsupported %s ", loopBegin, ((CompareNode) condition).condition()); 417 return false; 418 } 419 long stride = loop.counted().getCounter().constantStride(); 420 try { 421 Math.addExact(stride, stride); 422 } catch (ArithmeticException ae) { 423 condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s doubling the stride overflows %d", loopBegin, stride); 424 return false; 425 } 426 if (loopBegin.isMainLoop() || loopBegin.isSimpleLoop()) { 427 // Flow-less loops to partial unroll for now. 3 blocks corresponds to an if that either 428 // exits or continues the loop. There might be fixed and floating work within the loop 429 // as well. 430 if (loop.loop().getBlocks().size() < 3) { 431 return true; 432 } 433 condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s too large to unroll %s ", loopBegin, loop.loop().getBlocks().size()); 434 } 435 return false; 436 } 437 }