1 /*
   2  * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 package org.graalvm.compiler.loop.phases;
  26 
  27 import static org.graalvm.compiler.core.common.GraalOptions.MaximumDesiredSize;
  28 
  29 import java.util.ArrayList;
  30 import java.util.Iterator;
  31 import java.util.List;
  32 
  33 import jdk.internal.vm.compiler.collections.EconomicMap;
  34 import org.graalvm.compiler.core.common.RetryableBailoutException;
  35 import org.graalvm.compiler.core.common.calc.CanonicalCondition;
  36 import org.graalvm.compiler.debug.DebugContext;
  37 import org.graalvm.compiler.graph.Graph.Mark;
  38 import org.graalvm.compiler.graph.Node;
  39 import org.graalvm.compiler.graph.Position;
  40 import org.graalvm.compiler.loop.CountedLoopInfo;
  41 import org.graalvm.compiler.loop.InductionVariable.Direction;
  42 import org.graalvm.compiler.loop.LoopEx;
  43 import org.graalvm.compiler.loop.LoopFragmentInside;
  44 import org.graalvm.compiler.loop.LoopFragmentWhole;
  45 import org.graalvm.compiler.nodeinfo.InputType;
  46 import org.graalvm.compiler.nodes.AbstractBeginNode;
  47 import org.graalvm.compiler.nodes.AbstractEndNode;
  48 import org.graalvm.compiler.nodes.AbstractMergeNode;
  49 import org.graalvm.compiler.nodes.BeginNode;
  50 import org.graalvm.compiler.nodes.ControlSplitNode;
  51 import org.graalvm.compiler.nodes.EndNode;
  52 import org.graalvm.compiler.nodes.FixedNode;
  53 import org.graalvm.compiler.nodes.FixedWithNextNode;
  54 import org.graalvm.compiler.nodes.IfNode;
  55 import org.graalvm.compiler.nodes.LogicNode;
  56 import org.graalvm.compiler.nodes.LoopBeginNode;
  57 import org.graalvm.compiler.nodes.NodeView;
  58 import org.graalvm.compiler.nodes.PhiNode;
  59 import org.graalvm.compiler.nodes.SafepointNode;
  60 import org.graalvm.compiler.nodes.StructuredGraph;
  61 import org.graalvm.compiler.nodes.ValueNode;
  62 import org.graalvm.compiler.nodes.calc.AddNode;
  63 import org.graalvm.compiler.nodes.calc.CompareNode;
  64 import org.graalvm.compiler.nodes.calc.ConditionalNode;
  65 import org.graalvm.compiler.nodes.calc.IntegerLessThanNode;
  66 import org.graalvm.compiler.nodes.extended.OpaqueNode;
  67 import org.graalvm.compiler.nodes.extended.SwitchNode;
  68 import org.graalvm.compiler.phases.common.CanonicalizerPhase;
  69 import org.graalvm.compiler.phases.tiers.PhaseContext;
  70 
  71 public abstract class LoopTransformations {
  72 
  73     private LoopTransformations() {
  74         // does not need to be instantiated
  75     }
  76 
  77     public static void peel(LoopEx loop) {
  78         loop.inside().duplicate().insertBefore(loop);
  79         loop.loopBegin().setLoopFrequency(Math.max(0.0, loop.loopBegin().loopFrequency() - 1));
  80     }
  81 
  82     public static void fullUnroll(LoopEx loop, PhaseContext context, CanonicalizerPhase canonicalizer) {
  83         // assert loop.isCounted(); //TODO (gd) strengthen : counted with known trip count
  84         LoopBeginNode loopBegin = loop.loopBegin();
  85         StructuredGraph graph = loopBegin.graph();
  86         int initialNodeCount = graph.getNodeCount();
  87         while (!loopBegin.isDeleted()) {
  88             Mark mark = graph.getMark();
  89             peel(loop);
  90             canonicalizer.applyIncremental(graph, context, mark);
  91             loop.invalidateFragments();
  92             if (graph.getNodeCount() > initialNodeCount + MaximumDesiredSize.getValue(graph.getOptions()) * 2) {
  93                 throw new RetryableBailoutException("FullUnroll : Graph seems to grow out of proportion");
  94             }
  95         }
  96     }
  97 
  98     public static void unswitch(LoopEx loop, List<ControlSplitNode> controlSplitNodeSet) {
  99         ControlSplitNode firstNode = controlSplitNodeSet.iterator().next();
 100         LoopFragmentWhole originalLoop = loop.whole();
 101         StructuredGraph graph = firstNode.graph();
 102 
 103         loop.loopBegin().incrementUnswitches();
 104 
 105         // create new control split out of loop
 106         ControlSplitNode newControlSplit = (ControlSplitNode) firstNode.copyWithInputs();
 107         originalLoop.entryPoint().replaceAtPredecessor(newControlSplit);
 108 
 109         /*
 110          * The code below assumes that all of the control split nodes have the same successor
 111          * structure, which should have been enforced by findUnswitchable.
 112          */
 113         Iterator<Position> successors = firstNode.successorPositions().iterator();
 114         assert successors.hasNext();
 115         // original loop is used as first successor
 116         Position firstPosition = successors.next();
 117         AbstractBeginNode originalLoopBegin = BeginNode.begin(originalLoop.entryPoint());
 118         firstPosition.set(newControlSplit, originalLoopBegin);
 119         originalLoopBegin.setNodeSourcePosition(firstPosition.get(firstNode).getNodeSourcePosition());
 120 
 121         while (successors.hasNext()) {
 122             Position position = successors.next();
 123             // create a new loop duplicate and connect it.
 124             LoopFragmentWhole duplicateLoop = originalLoop.duplicate();
 125             AbstractBeginNode newBegin = BeginNode.begin(duplicateLoop.entryPoint());
 126             newBegin.setNodeSourcePosition(position.get(firstNode).getNodeSourcePosition());
 127             position.set(newControlSplit, newBegin);
 128 
 129             // For each cloned ControlSplitNode, simplify the proper path
 130             for (ControlSplitNode controlSplitNode : controlSplitNodeSet) {
 131                 ControlSplitNode duplicatedControlSplit = duplicateLoop.getDuplicatedNode(controlSplitNode);
 132                 if (duplicatedControlSplit.isAlive()) {
 133                     AbstractBeginNode survivingSuccessor = (AbstractBeginNode) position.get(duplicatedControlSplit);
 134                     survivingSuccessor.replaceAtUsages(InputType.Guard, newBegin);
 135                     graph.removeSplitPropagate(duplicatedControlSplit, survivingSuccessor);
 136                 }
 137             }
 138         }
 139         // original loop is simplified last to avoid deleting controlSplitNode too early
 140         for (ControlSplitNode controlSplitNode : controlSplitNodeSet) {
 141             if (controlSplitNode.isAlive()) {
 142                 AbstractBeginNode survivingSuccessor = (AbstractBeginNode) firstPosition.get(controlSplitNode);
 143                 survivingSuccessor.replaceAtUsages(InputType.Guard, originalLoopBegin);
 144                 graph.removeSplitPropagate(controlSplitNode, survivingSuccessor);
 145             }
 146         }
 147 
 148         // TODO (gd) probabilities need some amount of fixup.. (probably also in other transforms)
 149     }
 150 
 151     public static void partialUnroll(LoopEx loop, EconomicMap<LoopBeginNode, OpaqueNode> opaqueUnrolledStrides) {
 152         assert loop.loopBegin().isMainLoop();
 153         loop.loopBegin().graph().getDebug().log("LoopPartialUnroll %s", loop);
 154 
 155         LoopFragmentInside newSegment = loop.inside().duplicate();
 156         newSegment.insertWithinAfter(loop, opaqueUnrolledStrides);
 157 
 158     }
 159 
 160     // This function splits candidate loops into pre, main and post loops,
 161     // dividing the iteration space to facilitate the majority of iterations
 162     // being executed in a main loop, which will have RCE implemented upon it.
 163     // The initial loop form is constrained to single entry/exit, but can have
 164     // flow. The translation looks like:
 165     //
 166     //  @formatter:off
 167     //
 168     //       (Simple Loop entry)                   (Pre Loop Entry)
 169     //                |                                  |
 170     //         (LoopBeginNode)                    (LoopBeginNode)
 171     //                |                                  |
 172     //       (Loop Control Test)<------   ==>  (Loop control Test)<------
 173     //         /               \       \         /               \       \
 174     //    (Loop Exit)      (Loop Body) |    (Loop Exit)      (Loop Body) |
 175     //        |                |       |        |                |       |
 176     // (continue code)     (Loop End)  |  if (M < length)*   (Loop End)  |
 177     //                         \       /       /      \           \      /
 178     //                          ----->        /       |            ----->
 179     //                                       /  if ( ... )*
 180     //                                      /     /       \
 181     //                                     /     /         \
 182     //                                    /     /           \
 183     //                                   |     /     (Main Loop Entry)
 184     //                                   |    |             |
 185     //                                   |    |      (LoopBeginNode)
 186     //                                   |    |             |
 187     //                                   |    |     (Loop Control Test)<------
 188     //                                   |    |      /               \        \
 189     //                                   |    |  (Loop Exit)      (Loop Body) |
 190     //                                    \   \      |                |       |
 191     //                                     \   \     |            (Loop End)  |
 192     //                                      \   \    |                \       /
 193     //                                       \   \   |                 ------>
 194     //                                        \   \  |
 195     //                                      (Main Loop Merge)*
 196     //                                               |
 197     //                                      (Post Loop Entry)
 198     //                                               |
 199     //                                        (LoopBeginNode)
 200     //                                               |
 201     //                                       (Loop Control Test)<-----
 202     //                                        /               \       \
 203     //                                    (Loop Exit)     (Loop Body) |
 204     //                                        |               |       |
 205     //                                 (continue code)    (Loop End)  |
 206     //                                                         \      /
 207     //                                                          ----->
 208     //
 209     // Key: "*" = optional.
 210     // @formatter:on
 211     //
 212     // The value "M" is the maximal value of the loop trip for the original
 213     // loop. The value of "length" is applicable to the number of arrays found
 214     // in the loop but is reduced if some or all of the arrays are known to be
 215     // the same length as "M". The maximum number of tests can be equal to the
 216     // number of arrays in the loop, where multiple instances of an array are
 217     // subsumed into a single test for that arrays length.
 218     //
 219     // If the optional main loop entry tests are absent, the Pre Loop exit
 220     // connects to the Main loops entry and there is no merge hanging off the
 221     // main loops exit to converge flow from said tests. All split use data
 222     // flow is mitigated through phi(s) in the main merge if present and
 223     // passed through the main and post loop phi(s) from the originating pre
 224     // loop with final phi(s) and data flow patched to the "continue code".
 225     // The pre loop is constrained to one iteration for now and will likely
 226     // be updated to produce vector alignment if applicable.
 227 
 228     public static LoopBeginNode insertPrePostLoops(LoopEx loop) {
 229         StructuredGraph graph = loop.loopBegin().graph();
 230         graph.getDebug().log("LoopTransformations.insertPrePostLoops %s", loop);
 231         LoopFragmentWhole preLoop = loop.whole();
 232         CountedLoopInfo preCounted = loop.counted();
 233         LoopBeginNode preLoopBegin = loop.loopBegin();
 234         AbstractBeginNode preLoopExitNode = preCounted.getCountedExit();
 235 
 236         assert preLoop.nodes().contains(preLoopBegin);
 237         assert preLoop.nodes().contains(preLoopExitNode);
 238 
 239         // Each duplication is inserted after the original, ergo create the post loop first
 240         LoopFragmentWhole mainLoop = preLoop.duplicate();
 241         LoopFragmentWhole postLoop = preLoop.duplicate();
 242         preLoopBegin.incrementSplits();
 243         preLoopBegin.incrementSplits();
 244         preLoopBegin.setPreLoop();
 245         graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, graph, "After duplication");
 246         LoopBeginNode mainLoopBegin = mainLoop.getDuplicatedNode(preLoopBegin);
 247         mainLoopBegin.setMainLoop();
 248         LoopBeginNode postLoopBegin = postLoop.getDuplicatedNode(preLoopBegin);
 249         postLoopBegin.setPostLoop();
 250 
 251         AbstractBeginNode postLoopExitNode = postLoop.getDuplicatedNode(preLoopExitNode);
 252         EndNode postEndNode = getBlockEndAfterLoopExit(postLoopExitNode);
 253         AbstractMergeNode postMergeNode = postEndNode.merge();
 254 
 255         // Update the main loop phi initialization to carry from the pre loop
 256         for (PhiNode prePhiNode : preLoopBegin.phis()) {
 257             PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
 258             mainPhiNode.setValueAt(0, prePhiNode);
 259         }
 260 
 261         AbstractBeginNode mainLoopExitNode = mainLoop.getDuplicatedNode(preLoopExitNode);
 262         EndNode mainEndNode = getBlockEndAfterLoopExit(mainLoopExitNode);
 263         AbstractMergeNode mainMergeNode = mainEndNode.merge();
 264         AbstractEndNode postEntryNode = postLoopBegin.forwardEnd();
 265 
 266         // Exits have been merged, find the continuation below the merge
 267         FixedNode continuationNode = mainMergeNode.next();
 268 
 269         // In the case of no Bounds tests, we just flow right into the main loop
 270         AbstractBeginNode mainLandingNode = BeginNode.begin(postEntryNode);
 271         mainLoopExitNode.setNext(mainLandingNode);
 272         preLoopExitNode.setNext(mainLoopBegin.forwardEnd());
 273 
 274         // Add and update any phi edges as per merge usage as needed and update usages
 275         processPreLoopPhis(loop, mainLoop, postLoop);
 276         continuationNode.predecessor().clearSuccessors();
 277         postLoopExitNode.setNext(continuationNode);
 278         cleanupMerge(postMergeNode, postLoopExitNode);
 279         cleanupMerge(mainMergeNode, mainLandingNode);
 280 
 281         // Change the preLoop to execute one iteration for now
 282         updatePreLoopLimit(preCounted);
 283         preLoopBegin.setLoopFrequency(1);
 284         mainLoopBegin.setLoopFrequency(Math.max(0.0, mainLoopBegin.loopFrequency() - 2));
 285         postLoopBegin.setLoopFrequency(Math.max(0.0, postLoopBegin.loopFrequency() - 1));
 286 
 287         // The pre and post loops don't require safepoints at all
 288         for (SafepointNode safepoint : preLoop.nodes().filter(SafepointNode.class)) {
 289             graph.removeFixed(safepoint);
 290         }
 291         for (SafepointNode safepoint : postLoop.nodes().filter(SafepointNode.class)) {
 292             graph.removeFixed(safepoint);
 293         }
 294         graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "InsertPrePostLoops %s", loop);
 295         return mainLoopBegin;
 296     }
 297 
 298     /**
 299      * Cleanup the merge and remove the predecessors too.
 300      */
 301     private static void cleanupMerge(AbstractMergeNode mergeNode, AbstractBeginNode landingNode) {
 302         for (EndNode end : mergeNode.cfgPredecessors().snapshot()) {
 303             mergeNode.removeEnd(end);
 304             end.safeDelete();
 305         }
 306         mergeNode.prepareDelete(landingNode);
 307         mergeNode.safeDelete();
 308     }
 309 
 310     private static void processPreLoopPhis(LoopEx preLoop, LoopFragmentWhole mainLoop, LoopFragmentWhole postLoop) {
 311         // process phis for the post loop
 312         LoopBeginNode preLoopBegin = preLoop.loopBegin();
 313         for (PhiNode prePhiNode : preLoopBegin.phis()) {
 314             PhiNode postPhiNode = postLoop.getDuplicatedNode(prePhiNode);
 315             PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
 316             postPhiNode.setValueAt(0, mainPhiNode);
 317 
 318             // Build a work list to update the pre loop phis to the post loops phis
 319             for (Node usage : prePhiNode.usages().snapshot()) {
 320                 if (usage == mainPhiNode) {
 321                     continue;
 322                 }
 323                 if (preLoop.isOutsideLoop(usage)) {
 324                     usage.replaceFirstInput(prePhiNode, postPhiNode);
 325                 }
 326             }
 327         }
 328         for (Node node : preLoop.inside().nodes()) {
 329             for (Node externalUsage : node.usages().snapshot()) {
 330                 if (preLoop.isOutsideLoop(externalUsage)) {
 331                     Node postUsage = postLoop.getDuplicatedNode(node);
 332                     assert postUsage != null;
 333                     externalUsage.replaceFirstInput(node, postUsage);
 334                 }
 335             }
 336         }
 337     }
 338 
 339     /**
 340      * Find the end of the block following the LoopExit.
 341      */
 342     private static EndNode getBlockEndAfterLoopExit(AbstractBeginNode exit) {
 343         FixedNode node = exit.next();
 344         // Find the last node after the exit blocks starts
 345         return getBlockEnd(node);
 346     }
 347 
 348     private static EndNode getBlockEnd(FixedNode node) {
 349         FixedNode curNode = node;
 350         while (curNode instanceof FixedWithNextNode) {
 351             curNode = ((FixedWithNextNode) curNode).next();
 352         }
 353         return (EndNode) curNode;
 354     }
 355 
 356     private static void updatePreLoopLimit(CountedLoopInfo preCounted) {
 357         // Update the pre loops limit test
 358         // Make new limit one iteration
 359         ValueNode newLimit = AddNode.add(preCounted.getStart(), preCounted.getCounter().strideNode(), NodeView.DEFAULT);
 360         // Fetch the variable we are not replacing and configure the one we are
 361         ValueNode ub = preCounted.getLimit();
 362         LogicNode entryCheck;
 363         if (preCounted.getDirection() == Direction.Up) {
 364             entryCheck = IntegerLessThanNode.create(newLimit, ub, NodeView.DEFAULT);
 365         } else {
 366             entryCheck = IntegerLessThanNode.create(ub, newLimit, NodeView.DEFAULT);
 367         }
 368         newLimit = ConditionalNode.create(entryCheck, newLimit, ub, NodeView.DEFAULT);
 369         // Re-wire the condition with the new limit
 370         CompareNode compareNode = (CompareNode) preCounted.getLimitTest().condition();
 371         compareNode.replaceFirstInput(ub, compareNode.graph().addOrUniqueWithInputs(newLimit));
 372     }
 373 
 374     public static List<ControlSplitNode> findUnswitchable(LoopEx loop) {
 375         List<ControlSplitNode> controls = null;
 376         ValueNode invariantValue = null;
 377         for (IfNode ifNode : loop.whole().nodes().filter(IfNode.class)) {
 378             if (loop.isOutsideLoop(ifNode.condition())) {
 379                 if (controls == null) {
 380                     invariantValue = ifNode.condition();
 381                     controls = new ArrayList<>();
 382                     controls.add(ifNode);
 383                 } else if (ifNode.condition() == invariantValue) {
 384                     controls.add(ifNode);
 385                 }
 386             }
 387         }
 388         if (controls == null) {
 389             SwitchNode firstSwitch = null;
 390             for (SwitchNode switchNode : loop.whole().nodes().filter(SwitchNode.class)) {
 391                 if (switchNode.successors().count() > 1 && loop.isOutsideLoop(switchNode.value())) {
 392                     if (controls == null) {
 393                         firstSwitch = switchNode;
 394                         invariantValue = switchNode.value();
 395                         controls = new ArrayList<>();
 396                         controls.add(switchNode);
 397                     } else if (switchNode.value() == invariantValue && firstSwitch.structureEquals(switchNode)) {
 398                         // Only collect switches which test the same values in the same order
 399                         controls.add(switchNode);
 400                     }
 401                 }
 402             }
 403         }
 404         return controls;
 405     }
 406 
 407     public static boolean isUnrollableLoop(LoopEx loop) {
 408         if (!loop.isCounted() || !loop.counted().getCounter().isConstantStride() || !loop.loop().getChildren().isEmpty()) {
 409             return false;
 410         }
 411         assert loop.counted().getDirection() != null;
 412         LoopBeginNode loopBegin = loop.loopBegin();
 413         LogicNode condition = loop.counted().getLimitTest().condition();
 414         if (!(condition instanceof CompareNode)) {
 415             return false;
 416         }
 417         if (((CompareNode) condition).condition() == CanonicalCondition.EQ) {
 418             condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s condition unsupported %s ", loopBegin, ((CompareNode) condition).condition());
 419             return false;
 420         }
 421         long stride = loop.counted().getCounter().constantStride();
 422         try {
 423             Math.addExact(stride, stride);
 424         } catch (ArithmeticException ae) {
 425             condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s doubling the stride overflows %d", loopBegin, stride);
 426             return false;
 427         }
 428         if (!loop.canDuplicateLoop()) {
 429             return false;
 430         }
 431         if (loopBegin.isMainLoop() || loopBegin.isSimpleLoop()) {
 432             // Flow-less loops to partial unroll for now. 3 blocks corresponds to an if that either
 433             // exits or continues the loop. There might be fixed and floating work within the loop
 434             // as well.
 435             if (loop.loop().getBlocks().size() < 3) {
 436                 return true;
 437             }
 438             condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s too large to unroll %s ", loopBegin, loop.loop().getBlocks().size());
 439         }
 440         return false;
 441     }
 442 }