1 /*
   2  * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 package org.graalvm.compiler.loop.phases;
  26 
  27 import static org.graalvm.compiler.core.common.GraalOptions.MaximumDesiredSize;
  28 
  29 import java.util.ArrayList;
  30 import java.util.Iterator;
  31 import java.util.List;
  32 
  33 import jdk.internal.vm.compiler.collections.EconomicMap;
  34 import org.graalvm.compiler.core.common.RetryableBailoutException;
  35 import org.graalvm.compiler.core.common.calc.CanonicalCondition;
  36 import org.graalvm.compiler.debug.DebugContext;
  37 import org.graalvm.compiler.graph.Graph.Mark;
  38 import org.graalvm.compiler.graph.Node;
  39 import org.graalvm.compiler.graph.Position;
  40 import org.graalvm.compiler.loop.CountedLoopInfo;
  41 import org.graalvm.compiler.loop.InductionVariable.Direction;
  42 import org.graalvm.compiler.loop.LoopEx;
  43 import org.graalvm.compiler.loop.LoopFragmentInside;
  44 import org.graalvm.compiler.loop.LoopFragmentWhole;
  45 import org.graalvm.compiler.nodeinfo.InputType;
  46 import org.graalvm.compiler.nodes.AbstractBeginNode;
  47 import org.graalvm.compiler.nodes.AbstractEndNode;
  48 import org.graalvm.compiler.nodes.AbstractMergeNode;
  49 import org.graalvm.compiler.nodes.BeginNode;
  50 import org.graalvm.compiler.nodes.ControlSplitNode;
  51 import org.graalvm.compiler.nodes.EndNode;
  52 import org.graalvm.compiler.nodes.FixedNode;
  53 import org.graalvm.compiler.nodes.FixedWithNextNode;
  54 import org.graalvm.compiler.nodes.IfNode;
  55 import org.graalvm.compiler.nodes.LogicNode;
  56 import org.graalvm.compiler.nodes.LoopBeginNode;
  57 import org.graalvm.compiler.nodes.LoopExitNode;
  58 import org.graalvm.compiler.nodes.NodeView;
  59 import org.graalvm.compiler.nodes.PhiNode;
  60 import org.graalvm.compiler.nodes.SafepointNode;
  61 import org.graalvm.compiler.nodes.StructuredGraph;
  62 import org.graalvm.compiler.nodes.ValueNode;
  63 import org.graalvm.compiler.nodes.calc.AddNode;
  64 import org.graalvm.compiler.nodes.calc.CompareNode;
  65 import org.graalvm.compiler.nodes.calc.ConditionalNode;
  66 import org.graalvm.compiler.nodes.calc.IntegerLessThanNode;
  67 import org.graalvm.compiler.nodes.extended.OpaqueNode;
  68 import org.graalvm.compiler.nodes.extended.SwitchNode;
  69 import org.graalvm.compiler.phases.common.CanonicalizerPhase;
  70 import org.graalvm.compiler.phases.tiers.PhaseContext;
  71 
  72 public abstract class LoopTransformations {
  73 
  74     private LoopTransformations() {
  75         // does not need to be instantiated
  76     }
  77 
  78     public static void peel(LoopEx loop) {
  79         loop.inside().duplicate().insertBefore(loop);
  80         loop.loopBegin().setLoopFrequency(Math.max(0.0, loop.loopBegin().loopFrequency() - 1));
  81     }
  82 
  83     public static void fullUnroll(LoopEx loop, PhaseContext context, CanonicalizerPhase canonicalizer) {
  84         // assert loop.isCounted(); //TODO (gd) strengthen : counted with known trip count
  85         LoopBeginNode loopBegin = loop.loopBegin();
  86         StructuredGraph graph = loopBegin.graph();
  87         int initialNodeCount = graph.getNodeCount();
  88         while (!loopBegin.isDeleted()) {
  89             Mark mark = graph.getMark();
  90             peel(loop);
  91             canonicalizer.applyIncremental(graph, context, mark);
  92             loop.invalidateFragments();
  93             if (graph.getNodeCount() > initialNodeCount + MaximumDesiredSize.getValue(graph.getOptions()) * 2) {
  94                 throw new RetryableBailoutException("FullUnroll : Graph seems to grow out of proportion");
  95             }
  96         }
  97     }
  98 
  99     public static void unswitch(LoopEx loop, List<ControlSplitNode> controlSplitNodeSet) {
 100         ControlSplitNode firstNode = controlSplitNodeSet.iterator().next();
 101         LoopFragmentWhole originalLoop = loop.whole();
 102         StructuredGraph graph = firstNode.graph();
 103 
 104         loop.loopBegin().incrementUnswitches();
 105 
 106         // create new control split out of loop
 107         ControlSplitNode newControlSplit = (ControlSplitNode) firstNode.copyWithInputs();
 108         originalLoop.entryPoint().replaceAtPredecessor(newControlSplit);
 109 
 110         /*
 111          * The code below assumes that all of the control split nodes have the same successor
 112          * structure, which should have been enforced by findUnswitchable.
 113          */
 114         Iterator<Position> successors = firstNode.successorPositions().iterator();
 115         assert successors.hasNext();
 116         // original loop is used as first successor
 117         Position firstPosition = successors.next();
 118         AbstractBeginNode originalLoopBegin = BeginNode.begin(originalLoop.entryPoint());
 119         firstPosition.set(newControlSplit, originalLoopBegin);
 120         originalLoopBegin.setNodeSourcePosition(firstPosition.get(firstNode).getNodeSourcePosition());
 121 
 122         while (successors.hasNext()) {
 123             Position position = successors.next();
 124             // create a new loop duplicate and connect it.
 125             LoopFragmentWhole duplicateLoop = originalLoop.duplicate();
 126             AbstractBeginNode newBegin = BeginNode.begin(duplicateLoop.entryPoint());
 127             newBegin.setNodeSourcePosition(position.get(firstNode).getNodeSourcePosition());
 128             position.set(newControlSplit, newBegin);
 129 
 130             // For each cloned ControlSplitNode, simplify the proper path
 131             for (ControlSplitNode controlSplitNode : controlSplitNodeSet) {
 132                 ControlSplitNode duplicatedControlSplit = duplicateLoop.getDuplicatedNode(controlSplitNode);
 133                 if (duplicatedControlSplit.isAlive()) {
 134                     AbstractBeginNode survivingSuccessor = (AbstractBeginNode) position.get(duplicatedControlSplit);
 135                     survivingSuccessor.replaceAtUsages(InputType.Guard, newBegin);
 136                     graph.removeSplitPropagate(duplicatedControlSplit, survivingSuccessor);
 137                 }
 138             }
 139         }
 140         // original loop is simplified last to avoid deleting controlSplitNode too early
 141         for (ControlSplitNode controlSplitNode : controlSplitNodeSet) {
 142             if (controlSplitNode.isAlive()) {
 143                 AbstractBeginNode survivingSuccessor = (AbstractBeginNode) firstPosition.get(controlSplitNode);
 144                 survivingSuccessor.replaceAtUsages(InputType.Guard, originalLoopBegin);
 145                 graph.removeSplitPropagate(controlSplitNode, survivingSuccessor);
 146             }
 147         }
 148 
 149         // TODO (gd) probabilities need some amount of fixup.. (probably also in other transforms)
 150     }
 151 
 152     public static void partialUnroll(LoopEx loop, EconomicMap<LoopBeginNode, OpaqueNode> opaqueUnrolledStrides) {
 153         assert loop.loopBegin().isMainLoop();
 154         loop.loopBegin().graph().getDebug().log("LoopPartialUnroll %s", loop);
 155 
 156         LoopFragmentInside newSegment = loop.inside().duplicate();
 157         newSegment.insertWithinAfter(loop, opaqueUnrolledStrides);
 158 
 159     }
 160 
 161     // This function splits candidate loops into pre, main and post loops,
 162     // dividing the iteration space to facilitate the majority of iterations
 163     // being executed in a main loop, which will have RCE implemented upon it.
 164     // The initial loop form is constrained to single entry/exit, but can have
 165     // flow. The translation looks like:
 166     //
 167     //  @formatter:off
 168     //
 169     //       (Simple Loop entry)                   (Pre Loop Entry)
 170     //                |                                  |
 171     //         (LoopBeginNode)                    (LoopBeginNode)
 172     //                |                                  |
 173     //       (Loop Control Test)<------   ==>  (Loop control Test)<------
 174     //         /               \       \         /               \       \
 175     //    (Loop Exit)      (Loop Body) |    (Loop Exit)      (Loop Body) |
 176     //        |                |       |        |                |       |
 177     // (continue code)     (Loop End)  |  if (M < length)*   (Loop End)  |
 178     //                         \       /       /      \           \      /
 179     //                          ----->        /       |            ----->
 180     //                                       /  if ( ... )*
 181     //                                      /     /       \
 182     //                                     /     /         \
 183     //                                    /     /           \
 184     //                                   |     /     (Main Loop Entry)
 185     //                                   |    |             |
 186     //                                   |    |      (LoopBeginNode)
 187     //                                   |    |             |
 188     //                                   |    |     (Loop Control Test)<------
 189     //                                   |    |      /               \        \
 190     //                                   |    |  (Loop Exit)      (Loop Body) |
 191     //                                    \   \      |                |       |
 192     //                                     \   \     |            (Loop End)  |
 193     //                                      \   \    |                \       /
 194     //                                       \   \   |                 ------>
 195     //                                        \   \  |
 196     //                                      (Main Loop Merge)*
 197     //                                               |
 198     //                                      (Post Loop Entry)
 199     //                                               |
 200     //                                        (LoopBeginNode)
 201     //                                               |
 202     //                                       (Loop Control Test)<-----
 203     //                                        /               \       \
 204     //                                    (Loop Exit)     (Loop Body) |
 205     //                                        |               |       |
 206     //                                 (continue code)    (Loop End)  |
 207     //                                                         \      /
 208     //                                                          ----->
 209     //
 210     // Key: "*" = optional.
 211     // @formatter:on
 212     //
 213     // The value "M" is the maximal value of the loop trip for the original
 214     // loop. The value of "length" is applicable to the number of arrays found
 215     // in the loop but is reduced if some or all of the arrays are known to be
 216     // the same length as "M". The maximum number of tests can be equal to the
 217     // number of arrays in the loop, where multiple instances of an array are
 218     // subsumed into a single test for that arrays length.
 219     //
 220     // If the optional main loop entry tests are absent, the Pre Loop exit
 221     // connects to the Main loops entry and there is no merge hanging off the
 222     // main loops exit to converge flow from said tests. All split use data
 223     // flow is mitigated through phi(s) in the main merge if present and
 224     // passed through the main and post loop phi(s) from the originating pre
 225     // loop with final phi(s) and data flow patched to the "continue code".
 226     // The pre loop is constrained to one iteration for now and will likely
 227     // be updated to produce vector alignment if applicable.
 228 
 229     public static LoopBeginNode insertPrePostLoops(LoopEx loop) {
 230         StructuredGraph graph = loop.loopBegin().graph();
 231         graph.getDebug().log("LoopTransformations.insertPrePostLoops %s", loop);
 232         LoopFragmentWhole preLoop = loop.whole();
 233         CountedLoopInfo preCounted = loop.counted();
 234         IfNode preLimit = preCounted.getLimitTest();
 235         assert preLimit != null;
 236         LoopBeginNode preLoopBegin = loop.loopBegin();
 237         LoopExitNode preLoopExitNode = preLoopBegin.getSingleLoopExit();
 238         FixedNode continuationNode = preLoopExitNode.next();
 239 
 240         // Each duplication is inserted after the original, ergo create the post loop first
 241         LoopFragmentWhole mainLoop = preLoop.duplicate();
 242         LoopFragmentWhole postLoop = preLoop.duplicate();
 243         preLoopBegin.incrementSplits();
 244         preLoopBegin.incrementSplits();
 245         preLoopBegin.setPreLoop();
 246         graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, graph, "After duplication");
 247         LoopBeginNode mainLoopBegin = mainLoop.getDuplicatedNode(preLoopBegin);
 248         mainLoopBegin.setMainLoop();
 249         LoopBeginNode postLoopBegin = postLoop.getDuplicatedNode(preLoopBegin);
 250         postLoopBegin.setPostLoop();
 251 
 252         EndNode postEndNode = getBlockEndAfterLoopExit(postLoopBegin);
 253         AbstractMergeNode postMergeNode = postEndNode.merge();
 254         LoopExitNode postLoopExitNode = postLoopBegin.getSingleLoopExit();
 255 
 256         // Update the main loop phi initialization to carry from the pre loop
 257         for (PhiNode prePhiNode : preLoopBegin.phis()) {
 258             PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
 259             mainPhiNode.setValueAt(0, prePhiNode);
 260         }
 261 
 262         EndNode mainEndNode = getBlockEndAfterLoopExit(mainLoopBegin);
 263         AbstractMergeNode mainMergeNode = mainEndNode.merge();
 264         AbstractEndNode postEntryNode = postLoopBegin.forwardEnd();
 265 
 266         // In the case of no Bounds tests, we just flow right into the main loop
 267         AbstractBeginNode mainLandingNode = BeginNode.begin(postEntryNode);
 268         LoopExitNode mainLoopExitNode = mainLoopBegin.getSingleLoopExit();
 269         mainLoopExitNode.setNext(mainLandingNode);
 270         preLoopExitNode.setNext(mainLoopBegin.forwardEnd());
 271 
 272         // Add and update any phi edges as per merge usage as needed and update usages
 273         processPreLoopPhis(loop, mainLoop, postLoop);
 274         continuationNode.predecessor().clearSuccessors();
 275         postLoopExitNode.setNext(continuationNode);
 276         cleanupMerge(postMergeNode, postLoopExitNode);
 277         cleanupMerge(mainMergeNode, mainLandingNode);
 278 
 279         // Change the preLoop to execute one iteration for now
 280         updatePreLoopLimit(preCounted);
 281         preLoopBegin.setLoopFrequency(1);
 282         mainLoopBegin.setLoopFrequency(Math.max(0.0, mainLoopBegin.loopFrequency() - 2));
 283         postLoopBegin.setLoopFrequency(Math.max(0.0, postLoopBegin.loopFrequency() - 1));
 284 
 285         // The pre and post loops don't require safepoints at all
 286         for (SafepointNode safepoint : preLoop.nodes().filter(SafepointNode.class)) {
 287             graph.removeFixed(safepoint);
 288         }
 289         for (SafepointNode safepoint : postLoop.nodes().filter(SafepointNode.class)) {
 290             graph.removeFixed(safepoint);
 291         }
 292         graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "InsertPrePostLoops %s", loop);
 293         return mainLoopBegin;
 294     }
 295 
 296     /**
 297      * Cleanup the merge and remove the predecessors too.
 298      */
 299     private static void cleanupMerge(AbstractMergeNode mergeNode, AbstractBeginNode landingNode) {
 300         for (EndNode end : mergeNode.cfgPredecessors().snapshot()) {
 301             mergeNode.removeEnd(end);
 302             end.safeDelete();
 303         }
 304         mergeNode.prepareDelete(landingNode);
 305         mergeNode.safeDelete();
 306     }
 307 
 308     private static void processPreLoopPhis(LoopEx preLoop, LoopFragmentWhole mainLoop, LoopFragmentWhole postLoop) {
 309         // process phis for the post loop
 310         LoopBeginNode preLoopBegin = preLoop.loopBegin();
 311         for (PhiNode prePhiNode : preLoopBegin.phis()) {
 312             PhiNode postPhiNode = postLoop.getDuplicatedNode(prePhiNode);
 313             PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
 314             postPhiNode.setValueAt(0, mainPhiNode);
 315 
 316             // Build a work list to update the pre loop phis to the post loops phis
 317             for (Node usage : prePhiNode.usages().snapshot()) {
 318                 if (usage == mainPhiNode) {
 319                     continue;
 320                 }
 321                 if (preLoop.isOutsideLoop(usage)) {
 322                     usage.replaceFirstInput(prePhiNode, postPhiNode);
 323                 }
 324             }
 325         }
 326         for (Node node : preLoop.inside().nodes()) {
 327             for (Node externalUsage : node.usages().snapshot()) {
 328                 if (preLoop.isOutsideLoop(externalUsage)) {
 329                     Node postUsage = postLoop.getDuplicatedNode(node);
 330                     assert postUsage != null;
 331                     externalUsage.replaceFirstInput(node, postUsage);
 332                 }
 333             }
 334         }
 335     }
 336 
 337     /**
 338      * Find the end of the block following the LoopExit.
 339      */
 340     private static EndNode getBlockEndAfterLoopExit(LoopBeginNode curLoopBegin) {
 341         FixedNode node = curLoopBegin.getSingleLoopExit().next();
 342         // Find the last node after the exit blocks starts
 343         return getBlockEnd(node);
 344     }
 345 
 346     private static EndNode getBlockEnd(FixedNode node) {
 347         FixedNode curNode = node;
 348         while (curNode instanceof FixedWithNextNode) {
 349             curNode = ((FixedWithNextNode) curNode).next();
 350         }
 351         return (EndNode) curNode;
 352     }
 353 
 354     private static void updatePreLoopLimit(CountedLoopInfo preCounted) {
 355         // Update the pre loops limit test
 356         // Make new limit one iteration
 357         ValueNode newLimit = AddNode.add(preCounted.getStart(), preCounted.getCounter().strideNode(), NodeView.DEFAULT);
 358         // Fetch the variable we are not replacing and configure the one we are
 359         ValueNode ub = preCounted.getLimit();
 360         LogicNode entryCheck;
 361         if (preCounted.getDirection() == Direction.Up) {
 362             entryCheck = IntegerLessThanNode.create(newLimit, ub, NodeView.DEFAULT);
 363         } else {
 364             entryCheck = IntegerLessThanNode.create(ub, newLimit, NodeView.DEFAULT);
 365         }
 366         newLimit = ConditionalNode.create(entryCheck, newLimit, ub, NodeView.DEFAULT);
 367         // Re-wire the condition with the new limit
 368         CompareNode compareNode = (CompareNode) preCounted.getLimitTest().condition();
 369         compareNode.replaceFirstInput(ub, compareNode.graph().addOrUniqueWithInputs(newLimit));
 370     }
 371 
 372     public static List<ControlSplitNode> findUnswitchable(LoopEx loop) {
 373         List<ControlSplitNode> controls = null;
 374         ValueNode invariantValue = null;
 375         for (IfNode ifNode : loop.whole().nodes().filter(IfNode.class)) {
 376             if (loop.isOutsideLoop(ifNode.condition())) {
 377                 if (controls == null) {
 378                     invariantValue = ifNode.condition();
 379                     controls = new ArrayList<>();
 380                     controls.add(ifNode);
 381                 } else if (ifNode.condition() == invariantValue) {
 382                     controls.add(ifNode);
 383                 }
 384             }
 385         }
 386         if (controls == null) {
 387             SwitchNode firstSwitch = null;
 388             for (SwitchNode switchNode : loop.whole().nodes().filter(SwitchNode.class)) {
 389                 if (switchNode.successors().count() > 1 && loop.isOutsideLoop(switchNode.value())) {
 390                     if (controls == null) {
 391                         firstSwitch = switchNode;
 392                         invariantValue = switchNode.value();
 393                         controls = new ArrayList<>();
 394                         controls.add(switchNode);
 395                     } else if (switchNode.value() == invariantValue && firstSwitch.structureEquals(switchNode)) {
 396                         // Only collect switches which test the same values in the same order
 397                         controls.add(switchNode);
 398                     }
 399                 }
 400             }
 401         }
 402         return controls;
 403     }
 404 
 405     public static boolean isUnrollableLoop(LoopEx loop) {
 406         if (!loop.isCounted() || !loop.counted().getCounter().isConstantStride() || !loop.loop().getChildren().isEmpty()) {
 407             return false;
 408         }
 409         assert loop.counted().getDirection() != null;
 410         LoopBeginNode loopBegin = loop.loopBegin();
 411         LogicNode condition = loop.counted().getLimitTest().condition();
 412         if (!(condition instanceof CompareNode)) {
 413             return false;
 414         }
 415         if (((CompareNode) condition).condition() == CanonicalCondition.EQ) {
 416             condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s condition unsupported %s ", loopBegin, ((CompareNode) condition).condition());
 417             return false;
 418         }
 419         long stride = loop.counted().getCounter().constantStride();
 420         try {
 421             Math.addExact(stride, stride);
 422         } catch (ArithmeticException ae) {
 423             condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s doubling the stride overflows %d", loopBegin, stride);
 424             return false;
 425         }
 426         if (loopBegin.isMainLoop() || loopBegin.isSimpleLoop()) {
 427             // Flow-less loops to partial unroll for now. 3 blocks corresponds to an if that either
 428             // exits or continues the loop. There might be fixed and floating work within the loop
 429             // as well.
 430             if (loop.loop().getBlocks().size() < 3) {
 431                 return true;
 432             }
 433             condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s too large to unroll %s ", loopBegin, loop.loop().getBlocks().size());
 434         }
 435         return false;
 436     }
 437 }