1 /*
   2  * Copyright (c) 2012, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package org.graalvm.compiler.loop.phases;
  24 
  25 import org.graalvm.compiler.core.common.RetryableBailoutException;
  26 import org.graalvm.compiler.debug.DebugContext;
  27 import org.graalvm.compiler.debug.GraalError;
  28 import org.graalvm.compiler.graph.Graph.Mark;
  29 import org.graalvm.compiler.graph.Node;
  30 import org.graalvm.compiler.graph.Position;
  31 import org.graalvm.compiler.loop.CountedLoopInfo;
  32 import org.graalvm.compiler.loop.InductionVariable;
  33 import org.graalvm.compiler.loop.InductionVariable.Direction;
  34 import org.graalvm.compiler.loop.LoopEx;
  35 import org.graalvm.compiler.loop.LoopFragmentInside;
  36 import org.graalvm.compiler.loop.LoopFragmentWhole;
  37 import org.graalvm.compiler.nodeinfo.InputType;
  38 import org.graalvm.compiler.nodes.AbstractBeginNode;
  39 import org.graalvm.compiler.nodes.AbstractEndNode;
  40 import org.graalvm.compiler.nodes.AbstractMergeNode;
  41 import org.graalvm.compiler.nodes.BeginNode;
  42 import org.graalvm.compiler.nodes.ConstantNode;
  43 import org.graalvm.compiler.nodes.ControlSplitNode;
  44 import org.graalvm.compiler.nodes.EndNode;
  45 import org.graalvm.compiler.nodes.FixedNode;
  46 import org.graalvm.compiler.nodes.FixedWithNextNode;
  47 import org.graalvm.compiler.nodes.IfNode;
  48 import org.graalvm.compiler.nodes.LogicNode;
  49 import org.graalvm.compiler.nodes.LoopBeginNode;
  50 import org.graalvm.compiler.nodes.LoopExitNode;
  51 import org.graalvm.compiler.nodes.PhiNode;
  52 import org.graalvm.compiler.nodes.SafepointNode;
  53 import org.graalvm.compiler.nodes.StructuredGraph;
  54 import org.graalvm.compiler.nodes.ValueNode;
  55 import org.graalvm.compiler.nodes.calc.CompareNode;
  56 import org.graalvm.compiler.nodes.calc.ConditionalNode;
  57 import org.graalvm.compiler.nodes.calc.IntegerLessThanNode;
  58 import org.graalvm.compiler.nodes.extended.SwitchNode;
  59 import org.graalvm.compiler.nodes.util.GraphUtil;
  60 import org.graalvm.compiler.phases.common.CanonicalizerPhase;
  61 import org.graalvm.compiler.phases.tiers.PhaseContext;
  62 
  63 import java.util.ArrayList;
  64 import java.util.Iterator;
  65 import java.util.List;
  66 
  67 import static org.graalvm.compiler.core.common.GraalOptions.MaximumDesiredSize;
  68 import static org.graalvm.compiler.loop.MathUtil.add;
  69 import static org.graalvm.compiler.loop.MathUtil.sub;
  70 
  71 public abstract class LoopTransformations {
  72 
  73     private LoopTransformations() {
  74         // does not need to be instantiated
  75     }
  76 
  77     public static void peel(LoopEx loop) {
  78         loop.inside().duplicate().insertBefore(loop);
  79         loop.loopBegin().setLoopFrequency(Math.max(0.0, loop.loopBegin().loopFrequency() - 1));
  80     }
  81 
  82     public static void fullUnroll(LoopEx loop, PhaseContext context, CanonicalizerPhase canonicalizer) {
  83         // assert loop.isCounted(); //TODO (gd) strengthen : counted with known trip count
  84         LoopBeginNode loopBegin = loop.loopBegin();
  85         StructuredGraph graph = loopBegin.graph();
  86         int initialNodeCount = graph.getNodeCount();
  87         while (!loopBegin.isDeleted()) {
  88             Mark mark = graph.getMark();
  89             peel(loop);
  90             canonicalizer.applyIncremental(graph, context, mark);
  91             loop.invalidateFragments();
  92             if (graph.getNodeCount() > initialNodeCount + MaximumDesiredSize.getValue(graph.getOptions()) * 2) {
  93                 throw new RetryableBailoutException("FullUnroll : Graph seems to grow out of proportion");
  94             }
  95         }
  96     }
  97 
  98     public static void unswitch(LoopEx loop, List<ControlSplitNode> controlSplitNodeSet) {
  99         ControlSplitNode firstNode = controlSplitNodeSet.iterator().next();
 100         LoopFragmentWhole originalLoop = loop.whole();
 101         StructuredGraph graph = firstNode.graph();
 102 
 103         loop.loopBegin().incrementUnswitches();
 104 
 105         // create new control split out of loop
 106         ControlSplitNode newControlSplit = (ControlSplitNode) firstNode.copyWithInputs();
 107         originalLoop.entryPoint().replaceAtPredecessor(newControlSplit);
 108 
 109         /*
 110          * The code below assumes that all of the control split nodes have the same successor
 111          * structure, which should have been enforced by findUnswitchable.
 112          */
 113         Iterator<Position> successors = firstNode.successorPositions().iterator();
 114         assert successors.hasNext();
 115         // original loop is used as first successor
 116         Position firstPosition = successors.next();
 117         AbstractBeginNode originalLoopBegin = BeginNode.begin(originalLoop.entryPoint());
 118         firstPosition.set(newControlSplit, originalLoopBegin);
 119 
 120         while (successors.hasNext()) {
 121             Position position = successors.next();
 122             // create a new loop duplicate and connect it.
 123             LoopFragmentWhole duplicateLoop = originalLoop.duplicate();
 124             AbstractBeginNode newBegin = BeginNode.begin(duplicateLoop.entryPoint());
 125             position.set(newControlSplit, newBegin);
 126 
 127             // For each cloned ControlSplitNode, simplify the proper path
 128             for (ControlSplitNode controlSplitNode : controlSplitNodeSet) {
 129                 ControlSplitNode duplicatedControlSplit = duplicateLoop.getDuplicatedNode(controlSplitNode);
 130                 if (duplicatedControlSplit.isAlive()) {
 131                     AbstractBeginNode survivingSuccessor = (AbstractBeginNode) position.get(duplicatedControlSplit);
 132                     survivingSuccessor.replaceAtUsages(InputType.Guard, newBegin);
 133                     graph.removeSplitPropagate(duplicatedControlSplit, survivingSuccessor);
 134                 }
 135             }
 136         }
 137         // original loop is simplified last to avoid deleting controlSplitNode too early
 138         for (ControlSplitNode controlSplitNode : controlSplitNodeSet) {
 139             if (controlSplitNode.isAlive()) {
 140                 AbstractBeginNode survivingSuccessor = (AbstractBeginNode) firstPosition.get(controlSplitNode);
 141                 survivingSuccessor.replaceAtUsages(InputType.Guard, originalLoopBegin);
 142                 graph.removeSplitPropagate(controlSplitNode, survivingSuccessor);
 143             }
 144         }
 145 
 146         // TODO (gd) probabilities need some amount of fixup.. (probably also in other transforms)
 147     }
 148 
 149     public static void partialUnroll(LoopEx loop, StructuredGraph graph) {
 150         assert loop.loopBegin().isMainLoop();
 151         graph.getDebug().log("LoopPartialUnroll %s", loop);
 152 
 153         LoopFragmentInside newSegment = loop.inside().duplicate();
 154         newSegment.insertWithinAfter(loop);
 155 
 156     }
 157 
 158     // This function splits candidate loops into pre, main and post loops,
 159     // dividing the iteration space to facilitate the majority of iterations
 160     // being executed in a main loop, which will have RCE implemented upon it.
 161     // The initial loop form is constrained to single entry/exit, but can have
 162     // flow. The translation looks like:
 163     //
 164     //  @formatter:off
 165     //
 166     //       (Simple Loop entry)                   (Pre Loop Entry)
 167     //                |                                  |
 168     //         (LoopBeginNode)                    (LoopBeginNode)
 169     //                |                                  |
 170     //       (Loop Control Test)<------   ==>  (Loop control Test)<------
 171     //         /               \       \         /               \       \
 172     //    (Loop Exit)      (Loop Body) |    (Loop Exit)      (Loop Body) |
 173     //        |                |       |        |                |       |
 174     // (continue code)     (Loop End)  |  if (M < length)*   (Loop End)  |
 175     //                         \       /       /      \           \      /
 176     //                          ----->        /       |            ----->
 177     //                                       /  if ( ... )*
 178     //                                      /     /       \
 179     //                                     /     /         \
 180     //                                    /     /           \
 181     //                                   |     /     (Main Loop Entry)
 182     //                                   |    |             |
 183     //                                   |    |      (LoopBeginNode)
 184     //                                   |    |             |
 185     //                                   |    |     (Loop Control Test)<------
 186     //                                   |    |      /               \        \
 187     //                                   |    |  (Loop Exit)      (Loop Body) |
 188     //                                    \   \      |                |       |
 189     //                                     \   \     |            (Loop End)  |
 190     //                                      \   \    |                \       /
 191     //                                       \   \   |                 ------>
 192     //                                        \   \  |
 193     //                                      (Main Loop Merge)*
 194     //                                               |
 195     //                                      (Post Loop Entry)
 196     //                                               |
 197     //                                        (LoopBeginNode)
 198     //                                               |
 199     //                                       (Loop Control Test)<-----
 200     //                                        /               \       \
 201     //                                    (Loop Exit)     (Loop Body) |
 202     //                                        |               |       |
 203     //                                 (continue code)    (Loop End)  |
 204     //                                                         \      /
 205     //                                                          ----->
 206     //
 207     // Key: "*" = optional.
 208     // @formatter:on
 209     //
 210     // The value "M" is the maximal value of the loop trip for the original
 211     // loop. The value of "length" is applicable to the number of arrays found
 212     // in the loop but is reduced if some or all of the arrays are known to be
 213     // the same length as "M". The maximum number of tests can be equal to the
 214     // number of arrays in the loop, where multiple instances of an array are
 215     // subsumed into a single test for that arrays length.
 216     //
 217     // If the optional main loop entry tests are absent, the Pre Loop exit
 218     // connects to the Main loops entry and there is no merge hanging off the
 219     // main loops exit to converge flow from said tests. All split use data
 220     // flow is mitigated through phi(s) in the main merge if present and
 221     // passed through the main and post loop phi(s) from the originating pre
 222     // loop with final phi(s) and data flow patched to the "continue code".
 223     // The pre loop is constrained to one iteration for now and will likely
 224     // be updated to produce vector alignment if applicable.
 225 
 226     public static void insertPrePostLoops(LoopEx loop, StructuredGraph graph) {
 227         graph.getDebug().log("LoopTransformations.insertPrePostLoops %s", loop);
 228         LoopFragmentWhole preLoop = loop.whole();
 229         CountedLoopInfo preCounted = loop.counted();
 230         IfNode preLimit = preCounted.getLimitTest();
 231         if (preLimit != null) {
 232             LoopBeginNode preLoopBegin = loop.loopBegin();
 233             InductionVariable preIv = preCounted.getCounter();
 234             LoopExitNode preLoopExitNode = preLoopBegin.getSingleLoopExit();
 235             FixedNode continuationNode = preLoopExitNode.next();
 236 
 237             // Each duplication is inserted after the original, ergo create the post loop first
 238             LoopFragmentWhole mainLoop = preLoop.duplicate();
 239             LoopFragmentWhole postLoop = preLoop.duplicate();
 240             preLoopBegin.incrementSplits();
 241             preLoopBegin.incrementSplits();
 242             preLoopBegin.setPreLoop();
 243             graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, graph, "After duplication");
 244             LoopBeginNode mainLoopBegin = mainLoop.getDuplicatedNode(preLoopBegin);
 245             mainLoopBegin.setMainLoop();
 246             LoopBeginNode postLoopBegin = postLoop.getDuplicatedNode(preLoopBegin);
 247             postLoopBegin.setPostLoop();
 248 
 249             EndNode postEndNode = getBlockEndAfterLoopExit(postLoopBegin);
 250             AbstractMergeNode postMergeNode = postEndNode.merge();
 251             LoopExitNode postLoopExitNode = postLoopBegin.getSingleLoopExit();
 252 
 253             // Update the main loop phi initialization to carry from the pre loop
 254             for (PhiNode prePhiNode : preLoopBegin.phis()) {
 255                 PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
 256                 mainPhiNode.setValueAt(0, prePhiNode);
 257             }
 258 
 259             EndNode mainEndNode = getBlockEndAfterLoopExit(mainLoopBegin);
 260             AbstractMergeNode mainMergeNode = mainEndNode.merge();
 261             AbstractEndNode postEntryNode = postLoopBegin.forwardEnd();
 262 
 263             // In the case of no Bounds tests, we just flow right into the main loop
 264             AbstractBeginNode mainLandingNode = BeginNode.begin(postEntryNode);
 265             LoopExitNode mainLoopExitNode = mainLoopBegin.getSingleLoopExit();
 266             mainLoopExitNode.setNext(mainLandingNode);
 267             preLoopExitNode.setNext(mainLoopBegin.forwardEnd());
 268 
 269             // Add and update any phi edges as per merge usage as needed and update usages
 270             processPreLoopPhis(loop, mainLoop, postLoop);
 271             continuationNode.predecessor().clearSuccessors();
 272             postLoopExitNode.setNext(continuationNode);
 273             cleanupMerge(postMergeNode, postLoopExitNode);
 274             cleanupMerge(mainMergeNode, mainLandingNode);
 275 
 276             // Change the preLoop to execute one iteration for now
 277             updateMainLoopLimit(preLimit, preIv, mainLoop);
 278             updatePreLoopLimit(preLimit, preIv, preCounted);
 279             preLoopBegin.setLoopFrequency(1);
 280             mainLoopBegin.setLoopFrequency(Math.max(0.0, mainLoopBegin.loopFrequency() - 2));
 281             postLoopBegin.setLoopFrequency(Math.max(0.0, postLoopBegin.loopFrequency() - 1));
 282 
 283             // The pre and post loops don't require safepoints at all
 284             for (SafepointNode safepoint : preLoop.nodes().filter(SafepointNode.class)) {
 285                 GraphUtil.removeFixedWithUnusedInputs(safepoint);
 286             }
 287             for (SafepointNode safepoint : postLoop.nodes().filter(SafepointNode.class)) {
 288                 GraphUtil.removeFixedWithUnusedInputs(safepoint);
 289             }
 290         }
 291         graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "InsertPrePostLoops %s", loop);
 292     }
 293 
 294     /**
 295      * Cleanup the merge and remove the predecessors too.
 296      */
 297     private static void cleanupMerge(AbstractMergeNode mergeNode, AbstractBeginNode landingNode) {
 298         for (EndNode end : mergeNode.cfgPredecessors().snapshot()) {
 299             mergeNode.removeEnd(end);
 300             end.safeDelete();
 301         }
 302         mergeNode.prepareDelete(landingNode);
 303         mergeNode.safeDelete();
 304     }
 305 
 306     private static void processPreLoopPhis(LoopEx preLoop, LoopFragmentWhole mainLoop, LoopFragmentWhole postLoop) {
 307         // process phis for the post loop
 308         LoopBeginNode preLoopBegin = preLoop.loopBegin();
 309         for (PhiNode prePhiNode : preLoopBegin.phis()) {
 310             PhiNode postPhiNode = postLoop.getDuplicatedNode(prePhiNode);
 311             PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
 312             postPhiNode.setValueAt(0, mainPhiNode);
 313 
 314             // Build a work list to update the pre loop phis to the post loops phis
 315             for (Node usage : prePhiNode.usages().snapshot()) {
 316                 if (usage == mainPhiNode) {
 317                     continue;
 318                 }
 319                 if (preLoop.isOutsideLoop(usage)) {
 320                     usage.replaceFirstInput(prePhiNode, postPhiNode);
 321                 }
 322             }
 323         }
 324         for (Node node : preLoop.inside().nodes()) {
 325             for (Node externalUsage : node.usages().snapshot()) {
 326                 if (preLoop.isOutsideLoop(externalUsage)) {
 327                     Node postUsage = postLoop.getDuplicatedNode(node);
 328                     assert postUsage != null;
 329                     externalUsage.replaceFirstInput(node, postUsage);
 330                 }
 331             }
 332         }
 333     }
 334 
 335     /**
 336      * Find the end of the block following the LoopExit.
 337      */
 338     private static EndNode getBlockEndAfterLoopExit(LoopBeginNode curLoopBegin) {
 339         FixedNode node = curLoopBegin.getSingleLoopExit().next();
 340         // Find the last node after the exit blocks starts
 341         return getBlockEnd(node);
 342     }
 343 
 344     private static EndNode getBlockEnd(FixedNode node) {
 345         FixedNode curNode = node;
 346         while (curNode instanceof FixedWithNextNode) {
 347             curNode = ((FixedWithNextNode) curNode).next();
 348         }
 349         return (EndNode) curNode;
 350     }
 351 
 352     private static void updateMainLoopLimit(IfNode preLimit, InductionVariable preIv, LoopFragmentWhole mainLoop) {
 353         // Update the main loops limit test to be different than the post loop
 354         StructuredGraph graph = preLimit.graph();
 355         IfNode mainLimit = mainLoop.getDuplicatedNode(preLimit);
 356         LogicNode ifTest = mainLimit.condition();
 357         CompareNode compareNode = (CompareNode) ifTest;
 358         ValueNode prePhi = preIv.valueNode();
 359         ValueNode mainPhi = mainLoop.getDuplicatedNode(prePhi);
 360         ValueNode preStride = preIv.strideNode();
 361         ValueNode mainStride;
 362         if (preStride instanceof ConstantNode) {
 363             mainStride = preStride;
 364         } else {
 365             mainStride = mainLoop.getDuplicatedNode(preStride);
 366         }
 367         // Fetch the bounds to pose lowering the range by one
 368         ValueNode ub = null;
 369         if (compareNode.getX() == mainPhi) {
 370             ub = compareNode.getY();
 371         } else if (compareNode.getY() == mainPhi) {
 372             ub = compareNode.getX();
 373         } else {
 374             throw GraalError.shouldNotReachHere();
 375         }
 376 
 377         // Preloop always performs at least once iteration, so remove that from the main loop.
 378         ValueNode newLimit = sub(graph, ub, mainStride);
 379 
 380         // Re-wire the condition with the new limit
 381         compareNode.replaceFirstInput(ub, newLimit);
 382     }
 383 
 384     private static void updatePreLoopLimit(IfNode preLimit, InductionVariable preIv, CountedLoopInfo preCounted) {
 385         // Update the pre loops limit test
 386         StructuredGraph graph = preLimit.graph();
 387         LogicNode ifTest = preLimit.condition();
 388         CompareNode compareNode = (CompareNode) ifTest;
 389         ValueNode prePhi = preIv.valueNode();
 390         // Make new limit one iteration
 391         ValueNode initIv = preCounted.getStart();
 392         ValueNode newLimit = add(graph, initIv, preIv.strideNode());
 393 
 394         // Fetch the variable we are not replacing and configure the one we are
 395         ValueNode ub;
 396         if (compareNode.getX() == prePhi) {
 397             ub = compareNode.getY();
 398         } else if (compareNode.getY() == prePhi) {
 399             ub = compareNode.getX();
 400         } else {
 401             throw GraalError.shouldNotReachHere();
 402         }
 403         // Re-wire the condition with the new limit
 404         if (preIv.direction() == Direction.Up) {
 405             compareNode.replaceFirstInput(ub, graph.unique(new ConditionalNode(graph.unique(new IntegerLessThanNode(newLimit, ub)), newLimit, ub)));
 406         } else {
 407             compareNode.replaceFirstInput(ub, graph.unique(new ConditionalNode(graph.unique(new IntegerLessThanNode(ub, newLimit)), newLimit, ub)));
 408         }
 409     }
 410 
 411     public static List<ControlSplitNode> findUnswitchable(LoopEx loop) {
 412         List<ControlSplitNode> controls = null;
 413         ValueNode invariantValue = null;
 414         for (IfNode ifNode : loop.whole().nodes().filter(IfNode.class)) {
 415             if (loop.isOutsideLoop(ifNode.condition())) {
 416                 if (controls == null) {
 417                     invariantValue = ifNode.condition();
 418                     controls = new ArrayList<>();
 419                     controls.add(ifNode);
 420                 } else if (ifNode.condition() == invariantValue) {
 421                     controls.add(ifNode);
 422                 }
 423             }
 424         }
 425         if (controls == null) {
 426             SwitchNode firstSwitch = null;
 427             for (SwitchNode switchNode : loop.whole().nodes().filter(SwitchNode.class)) {
 428                 if (switchNode.successors().count() > 1 && loop.isOutsideLoop(switchNode.value())) {
 429                     if (controls == null) {
 430                         firstSwitch = switchNode;
 431                         invariantValue = switchNode.value();
 432                         controls = new ArrayList<>();
 433                         controls.add(switchNode);
 434                     } else if (switchNode.value() == invariantValue && firstSwitch.structureEquals(switchNode)) {
 435                         // Only collect switches which test the same values in the same order
 436                         controls.add(switchNode);
 437                     }
 438                 }
 439             }
 440         }
 441         return controls;
 442     }
 443 
 444     public static boolean isUnrollableLoop(LoopEx loop) {
 445         if (!loop.isCounted() || !loop.counted().getCounter().isConstantStride()) {
 446             return false;
 447         }
 448         LoopBeginNode loopBegin = loop.loopBegin();
 449         if (loopBegin.isMainLoop() || loopBegin.isSimpleLoop()) {
 450             // Flow-less loops to partial unroll for now. 3 blocks corresponds to an if that either
 451             // exits or continues the loop. There might be fixed and floating work within the loop
 452             // as well.
 453             if (loop.loop().getBlocks().size() < 3) {
 454                 return true;
 455             }
 456         }
 457         return false;
 458     }
 459 }