5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23 package org.graalvm.compiler.loop.phases;
24
25 import static org.graalvm.compiler.core.common.GraalOptions.MaximumDesiredSize;
26 import static org.graalvm.compiler.loop.MathUtil.add;
27 import static org.graalvm.compiler.loop.MathUtil.sub;
28
29 import java.util.ArrayList;
30 import java.util.Iterator;
31 import java.util.List;
32
33 import org.graalvm.compiler.core.common.RetryableBailoutException;
34 import org.graalvm.compiler.core.common.type.Stamp;
35 import org.graalvm.compiler.debug.DebugContext;
36 import org.graalvm.compiler.debug.GraalError;
37 import org.graalvm.compiler.graph.Graph.Mark;
38 import org.graalvm.compiler.graph.Node;
39 import org.graalvm.compiler.graph.NodeWorkList;
40 import org.graalvm.compiler.graph.Position;
41 import org.graalvm.compiler.loop.BasicInductionVariable;
42 import org.graalvm.compiler.loop.CountedLoopInfo;
43 import org.graalvm.compiler.loop.DerivedInductionVariable;
44 import org.graalvm.compiler.loop.InductionVariable;
45 import org.graalvm.compiler.loop.InductionVariable.Direction;
46 import org.graalvm.compiler.loop.LoopEx;
47 import org.graalvm.compiler.loop.LoopFragmentInside;
48 import org.graalvm.compiler.loop.LoopFragmentWhole;
49 import org.graalvm.compiler.nodeinfo.InputType;
50 import org.graalvm.compiler.nodes.AbstractBeginNode;
51 import org.graalvm.compiler.nodes.AbstractEndNode;
52 import org.graalvm.compiler.nodes.AbstractMergeNode;
53 import org.graalvm.compiler.nodes.BeginNode;
54 import org.graalvm.compiler.nodes.ConstantNode;
55 import org.graalvm.compiler.nodes.ControlSplitNode;
56 import org.graalvm.compiler.nodes.EndNode;
57 import org.graalvm.compiler.nodes.FixedNode;
58 import org.graalvm.compiler.nodes.FixedWithNextNode;
59 import org.graalvm.compiler.nodes.IfNode;
60 import org.graalvm.compiler.nodes.LogicNode;
61 import org.graalvm.compiler.nodes.LoopBeginNode;
62 import org.graalvm.compiler.nodes.LoopEndNode;
63 import org.graalvm.compiler.nodes.LoopExitNode;
64 import org.graalvm.compiler.nodes.PhiNode;
65 import org.graalvm.compiler.nodes.StructuredGraph;
66 import org.graalvm.compiler.nodes.ValueNode;
67 import org.graalvm.compiler.nodes.ValuePhiNode;
68 import org.graalvm.compiler.nodes.calc.AddNode;
69 import org.graalvm.compiler.nodes.calc.BinaryArithmeticNode;
70 import org.graalvm.compiler.nodes.calc.CompareNode;
71 import org.graalvm.compiler.nodes.calc.ConditionalNode;
72 import org.graalvm.compiler.nodes.calc.IntegerLessThanNode;
73 import org.graalvm.compiler.nodes.calc.SubNode;
74 import org.graalvm.compiler.nodes.extended.SwitchNode;
75 import org.graalvm.compiler.nodes.util.GraphUtil;
76 import org.graalvm.compiler.phases.common.CanonicalizerPhase;
77 import org.graalvm.compiler.phases.tiers.PhaseContext;
78
79 public abstract class LoopTransformations {
80
81 private LoopTransformations() {
82 // does not need to be instantiated
83 }
84
85 public static void peel(LoopEx loop) {
86 loop.inside().duplicate().insertBefore(loop);
87 loop.loopBegin().setLoopFrequency(Math.max(0.0, loop.loopBegin().loopFrequency() - 1));
88 }
89
90 public static void fullUnroll(LoopEx loop, PhaseContext context, CanonicalizerPhase canonicalizer) {
91 // assert loop.isCounted(); //TODO (gd) strengthen : counted with known trip count
92 LoopBeginNode loopBegin = loop.loopBegin();
93 StructuredGraph graph = loopBegin.graph();
94 int initialNodeCount = graph.getNodeCount();
95 while (!loopBegin.isDeleted()) {
96 Mark mark = graph.getMark();
97 peel(loop);
98 canonicalizer.applyIncremental(graph, context, mark);
137 ControlSplitNode duplicatedControlSplit = duplicateLoop.getDuplicatedNode(controlSplitNode);
138 if (duplicatedControlSplit.isAlive()) {
139 AbstractBeginNode survivingSuccessor = (AbstractBeginNode) position.get(duplicatedControlSplit);
140 survivingSuccessor.replaceAtUsages(InputType.Guard, newBegin);
141 graph.removeSplitPropagate(duplicatedControlSplit, survivingSuccessor);
142 }
143 }
144 }
145 // original loop is simplified last to avoid deleting controlSplitNode too early
146 for (ControlSplitNode controlSplitNode : controlSplitNodeSet) {
147 if (controlSplitNode.isAlive()) {
148 AbstractBeginNode survivingSuccessor = (AbstractBeginNode) firstPosition.get(controlSplitNode);
149 survivingSuccessor.replaceAtUsages(InputType.Guard, originalLoopBegin);
150 graph.removeSplitPropagate(controlSplitNode, survivingSuccessor);
151 }
152 }
153
154 // TODO (gd) probabilities need some amount of fixup.. (probably also in other transforms)
155 }
156
157 public static boolean partialUnroll(LoopEx loop, StructuredGraph graph) {
158 assert loop.loopBegin().isMainLoop();
159 graph.getDebug().log("LoopPartialUnroll %s", loop);
160 boolean changed = false;
161 CountedLoopInfo mainCounted = loop.counted();
162 LoopBeginNode mainLoopBegin = loop.loopBegin();
163 InductionVariable iv = mainCounted.getCounter();
164 IfNode mainLimit = mainCounted.getLimitTest();
165 LogicNode ifTest = mainLimit.condition();
166 CompareNode compareNode = (CompareNode) ifTest;
167 ValueNode compareBound = null;
168 ValueNode curPhi = iv.valueNode();
169 if (compareNode.getX() == curPhi) {
170 compareBound = compareNode.getY();
171 } else if (compareNode.getY() == curPhi) {
172 compareBound = compareNode.getX();
173 }
174 LoopFragmentInside newSegment = loop.inside().duplicate();
175 newSegment.insertWithinAfter(loop);
176 graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, graph, "After duplication inside %s", mainLoopBegin);
177 ValueNode inductionNode = iv.valueNode();
178 Node newStrideNode = null;
179 for (PhiNode mainPhiNode : mainLoopBegin.phis()) {
180 Node segmentOrigOp = null;
181 Node replacementOp = null;
182 changed = false;
183 // Rework each phi with a loop carried dependence
184 for (Node phiUsage : mainPhiNode.usages()) {
185 if (!loop.isOutsideLoop(phiUsage)) {
186 for (int i = 1; i < mainPhiNode.valueCount(); i++) {
187 ValueNode v = mainPhiNode.valueAt(i);
188 if (mainPhiNode != inductionNode) {
189 if (closureOnPhiInputToPhiUse(v, phiUsage, loop, graph)) {
190 segmentOrigOp = v;
191 Node node = newSegment.getDuplicatedNode(v);
192 replacementOp = updateUnrollSegmentValue(mainPhiNode, inductionNode, phiUsage, v, newSegment);
193
194 // Update the induction phi with new stride node
195 mainPhiNode.setValueAt(i, (ValueNode) node);
196 // This is for induction variables not referenced in the loop body
197 if (inductionNode == v) {
198 newStrideNode = node;
199 }
200 changed = true;
201 break;
202 }
203 } else if (v == phiUsage) {
204 segmentOrigOp = phiUsage;
205 Node node = newSegment.getDuplicatedNode(phiUsage);
206 newStrideNode = node;
207 replacementOp = updateUnrollSegmentValue(mainPhiNode, inductionNode, phiUsage, phiUsage, newSegment);
208
209 // Update the induction phi with new stride node
210 mainPhiNode.setValueAt(i, (ValueNode) node);
211 changed = true;
212 break;
213 }
214 }
215 }
216 if (changed) {
217 break;
218 }
219 }
220
221 if (changed) {
222 // Patch the new segments induction uses of replacementOp with the old stride node
223 for (Node usage : mainPhiNode.usages()) {
224 if (usage != segmentOrigOp) {
225 if (!loop.isOutsideLoop(usage)) {
226 Node node = newSegment.getDuplicatedNode(usage);
227 if (node instanceof CompareNode) {
228 continue;
229 }
230 node.replaceFirstInput(replacementOp, segmentOrigOp);
231 }
232 }
233 }
234 }
235 }
236
237 if (changed && newStrideNode == null) {
238 throw GraalError.shouldNotReachHere("Can't find stride node");
239 }
240 if (newStrideNode != null) {
241 // If merge the duplicate code into the loop and remove redundant code
242 placeNewSegmentAndCleanup(mainCounted, mainLoopBegin, newSegment);
243 int unrollFactor = mainLoopBegin.getUnrollFactor();
244 // First restore the old pattern of the loop exit condition so we can update it one way
245 if (unrollFactor > 1) {
246 if (compareBound instanceof SubNode) {
247 SubNode newLimit = (SubNode) compareBound;
248 ValueNode oldcompareBound = newLimit.getX();
249 compareNode.replaceFirstInput(newLimit, oldcompareBound);
250 newLimit.safeDelete();
251 compareBound = oldcompareBound;
252 } else if (compareBound instanceof AddNode) {
253 AddNode newLimit = (AddNode) compareBound;
254 ValueNode oldcompareBound = newLimit.getX();
255 compareNode.replaceFirstInput(newLimit, oldcompareBound);
256 newLimit.safeDelete();
257 compareBound = oldcompareBound;
258 }
259 }
260 unrollFactor *= 2;
261 mainLoopBegin.setUnrollFactor(unrollFactor);
262 // Reset stride to include new segment in loop control.
263 long oldStride = iv.constantStride() * 2;
264 // Now update the induction op and the exit condition
265 if (iv instanceof BasicInductionVariable) {
266 BasicInductionVariable biv = (BasicInductionVariable) iv;
267 BinaryArithmeticNode<?> newOp = (BinaryArithmeticNode<?>) newStrideNode;
268 Stamp strideStamp = newOp.stamp();
269 ConstantNode newStrideVal = graph.unique(ConstantNode.forIntegerStamp(strideStamp, oldStride));
270 newOp.setY(newStrideVal);
271 biv.setOP(newOp);
272 // Now use the current unrollFactor to update the exit condition to power of two
273 if (unrollFactor > 1) {
274 if (iv.direction() == Direction.Up) {
275 int modulas = (unrollFactor - 1);
276 ConstantNode aboveVal = graph.unique(ConstantNode.forIntegerStamp(strideStamp, modulas));
277 ValueNode newLimit = graph.addWithoutUnique(new SubNode(compareBound, aboveVal));
278 compareNode.replaceFirstInput(compareBound, newLimit);
279 } else if (iv.direction() == Direction.Down) {
280 int modulas = (unrollFactor - 1);
281 ConstantNode aboveVal = graph.unique(ConstantNode.forIntegerStamp(strideStamp, modulas));
282 ValueNode newLimit = graph.addWithoutUnique(new AddNode(compareBound, aboveVal));
283 compareNode.replaceFirstInput(compareBound, newLimit);
284 }
285 }
286 mainLoopBegin.setLoopFrequency(mainLoopBegin.loopFrequency() / 2);
287 }
288 changed = true;
289 }
290 if (changed) {
291 graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "LoopPartialUnroll %s", loop);
292 }
293 return changed;
294 }
295
296 private static Node updateUnrollSegmentValue(PhiNode mainPhiNode, Node inductionNode, Node phiUsage, Node patchNode, LoopFragmentInside newSegment) {
297 Node node = newSegment.getDuplicatedNode(phiUsage);
298 assert node != null : phiUsage;
299 Node replacementOp = null;
300 int inputCnt = 0;
301 for (Node input : phiUsage.inputs()) {
302 inputCnt++;
303 if (input == mainPhiNode) {
304 break;
305 }
306 }
307 int newInputCnt = 0;
308 for (Node input : node.inputs()) {
309 newInputCnt++;
310 if (newInputCnt == inputCnt) {
311 replacementOp = input;
312 if (mainPhiNode == inductionNode) {
313 node.replaceFirstInput(input, mainPhiNode);
314 } else {
315 node.replaceFirstInput(input, patchNode);
316 }
317 break;
318 }
319 }
320 return replacementOp;
321 }
322
323 private static boolean closureOnPhiInputToPhiUse(Node inNode, Node usage, LoopEx loop, StructuredGraph graph) {
324 NodeWorkList nodes = graph.createNodeWorkList();
325 nodes.add(inNode);
326 // Now walk from the inNode to usage if we can find it else we do not have closure
327 for (Node node : nodes) {
328 if (node == usage) {
329 return true;
330 }
331 for (Node input : node.inputs()) {
332 if (!loop.isOutsideLoop(input)) {
333 if (input != usage) {
334 nodes.add(input);
335 } else {
336 return true;
337 // For any reason if we have completed a closure, stop processing more
338 }
339 }
340 }
341 }
342 return false;
343 }
344
345 private static void placeNewSegmentAndCleanup(CountedLoopInfo mainCounted, LoopBeginNode mainLoopBegin, LoopFragmentInside newSegment) {
346 // Discard the segment entry and its flow, after if merging it into the loop
347 StructuredGraph graph = mainLoopBegin.graph();
348 IfNode loopTest = mainCounted.getLimitTest();
349 IfNode newSegmentTest = newSegment.getDuplicatedNode(loopTest);
350 AbstractBeginNode trueSuccessor = loopTest.trueSuccessor();
351 AbstractBeginNode falseSuccessor = loopTest.falseSuccessor();
352 FixedNode firstNode;
353 boolean codeInTrueSide = false;
354 if (trueSuccessor == mainCounted.getBody()) {
355 firstNode = trueSuccessor.next();
356 codeInTrueSide = true;
357 } else {
358 assert (falseSuccessor == mainCounted.getBody());
359 firstNode = falseSuccessor.next();
360 }
361 trueSuccessor = newSegmentTest.trueSuccessor();
362 falseSuccessor = newSegmentTest.falseSuccessor();
363 for (Node usage : falseSuccessor.anchored().snapshot()) {
364 usage.replaceFirstInput(falseSuccessor, loopTest.falseSuccessor());
365 }
366 for (Node usage : trueSuccessor.anchored().snapshot()) {
367 usage.replaceFirstInput(trueSuccessor, loopTest.trueSuccessor());
368 }
369 AbstractBeginNode startBlockNode;
370 if (codeInTrueSide) {
371 startBlockNode = trueSuccessor;
372 } else {
373 graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, mainLoopBegin.graph(), "before");
374 startBlockNode = falseSuccessor;
375 }
376 FixedNode lastNode = getBlockEnd(startBlockNode);
377 LoopEndNode loopEndNode = getSingleLoopEndFromLoop(mainLoopBegin);
378 FixedNode lastCodeNode = (FixedNode) loopEndNode.predecessor();
379 FixedNode newSegmentFirstNode = newSegment.getDuplicatedNode(firstNode);
380 FixedNode newSegmentLastNode = newSegment.getDuplicatedNode(lastCodeNode);
381 graph.getDebug().dump(DebugContext.DETAILED_LEVEL, loopEndNode.graph(), "Before placing segment");
382 if (firstNode instanceof LoopEndNode) {
383 GraphUtil.killCFG(newSegment.getDuplicatedNode(mainLoopBegin));
384 } else {
385 newSegmentLastNode.clearSuccessors();
386 startBlockNode.setNext(lastNode);
387 lastCodeNode.replaceFirstSuccessor(loopEndNode, newSegmentFirstNode);
388 newSegmentLastNode.replaceFirstSuccessor(lastNode, loopEndNode);
389 FixedWithNextNode oldLastNode = (FixedWithNextNode) lastCodeNode;
390 oldLastNode.setNext(newSegmentFirstNode);
391 FixedWithNextNode newLastNode = (FixedWithNextNode) newSegmentLastNode;
392 newLastNode.setNext(loopEndNode);
393 startBlockNode.clearSuccessors();
394 lastNode.safeDelete();
395 Node newSegmentTestStart = newSegmentTest.predecessor();
396 LogicNode newSegmentIfTest = newSegmentTest.condition();
397 newSegmentTestStart.clearSuccessors();
398 newSegmentTest.safeDelete();
399 newSegmentIfTest.safeDelete();
400 trueSuccessor.safeDelete();
401 falseSuccessor.safeDelete();
402 newSegmentTestStart.safeDelete();
403 }
404 graph.getDebug().dump(DebugContext.DETAILED_LEVEL, loopEndNode.graph(), "After placing segment");
405 }
406
407 // This function splits candidate loops into pre, main and post loops,
408 // dividing the iteration space to facilitate the majority of iterations
409 // being executed in a main loop, which will have RCE implemented upon it.
410 // The initial loop form is constrained to single entry/exit, but can have
411 // flow. The translation looks like:
412 //
413 // @formatter:off
414 //
415 // (Simple Loop entry) (Pre Loop Entry)
416 // | |
417 // (LoopBeginNode) (LoopBeginNode)
418 // | |
419 // (Loop Control Test)<------ ==> (Loop control Test)<------
420 // / \ \ / \ \
421 // (Loop Exit) (Loop Body) | (Loop Exit) (Loop Body) |
422 // | | | | | |
423 // (continue code) (Loop End) | if (M < length)* (Loop End) |
424 // \ / / \ \ /
458 //
459 // The value "M" is the maximal value of the loop trip for the original
460 // loop. The value of "length" is applicable to the number of arrays found
461 // in the loop but is reduced if some or all of the arrays are known to be
462 // the same length as "M". The maximum number of tests can be equal to the
463 // number of arrays in the loop, where multiple instances of an array are
464 // subsumed into a single test for that arrays length.
465 //
466 // If the optional main loop entry tests are absent, the Pre Loop exit
467 // connects to the Main loops entry and there is no merge hanging off the
468 // main loops exit to converge flow from said tests. All split use data
469 // flow is mitigated through phi(s) in the main merge if present and
470 // passed through the main and post loop phi(s) from the originating pre
471 // loop with final phi(s) and data flow patched to the "continue code".
472 // The pre loop is constrained to one iteration for now and will likely
473 // be updated to produce vector alignment if applicable.
474
475 public static void insertPrePostLoops(LoopEx loop, StructuredGraph graph) {
476 graph.getDebug().log("LoopTransformations.insertPrePostLoops %s", loop);
477 LoopFragmentWhole preLoop = loop.whole();
478 CountedLoopInfo preCounted = preLoop.loop().counted();
479 IfNode preLimit = preCounted.getLimitTest();
480 if (preLimit != null) {
481 LoopBeginNode preLoopBegin = loop.loopBegin();
482 InductionVariable preIv = preCounted.getCounter();
483 LoopExitNode preLoopExitNode = getSingleExitFromLoop(preLoopBegin);
484 FixedNode continuationNode = preLoopExitNode.next();
485
486 // Each duplication is inserted after the original, ergo create the post loop first
487 LoopFragmentWhole mainLoop = preLoop.duplicate();
488 LoopFragmentWhole postLoop = preLoop.duplicate();
489 preLoopBegin.incrementSplits();
490 preLoopBegin.incrementSplits();
491 preLoopBegin.setPreLoop();
492 graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, graph, "After duplication");
493 LoopBeginNode mainLoopBegin = mainLoop.getDuplicatedNode(preLoopBegin);
494 mainLoopBegin.setMainLoop();
495 LoopBeginNode postLoopBegin = postLoop.getDuplicatedNode(preLoopBegin);
496 postLoopBegin.setPostLoop();
497
498 EndNode postEndNode = getBlockEndAfterLoopExit(postLoopBegin);
499 AbstractMergeNode postMergeNode = postEndNode.merge();
500 LoopExitNode postLoopExitNode = getSingleExitFromLoop(postLoopBegin);
501
502 // Update the main loop phi initialization to carry from the pre loop
503 for (PhiNode prePhiNode : preLoopBegin.phis()) {
504 PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
505 mainPhiNode.setValueAt(0, prePhiNode);
506 }
507
508 EndNode mainEndNode = getBlockEndAfterLoopExit(mainLoopBegin);
509 AbstractMergeNode mainMergeNode = mainEndNode.merge();
510 AbstractEndNode postEntryNode = postLoopBegin.forwardEnd();
511
512 // In the case of no Bounds tests, we just flow right into the main loop
513 AbstractBeginNode mainLandingNode = BeginNode.begin(postEntryNode);
514 LoopExitNode mainLoopExitNode = getSingleExitFromLoop(mainLoopBegin);
515 mainLoopExitNode.setNext(mainLandingNode);
516 preLoopExitNode.setNext(mainLoopBegin.forwardEnd());
517
518 // Add and update any phi edges as per merge usage as needed and update usages
519 processPreLoopPhis(loop, mainLoop, postLoop);
520 continuationNode.predecessor().clearSuccessors();
521 postLoopExitNode.setNext(continuationNode);
522 cleanupMerge(postMergeNode, postLoopExitNode);
523 cleanupMerge(mainMergeNode, mainLandingNode);
524
525 // Change the preLoop to execute one iteration for now
526 updateMainLoopLimit(preLimit, preIv, mainLoop);
527 updatePreLoopLimit(preLimit, preIv, preCounted);
528 preLoopBegin.setLoopFrequency(1);
529 mainLoopBegin.setLoopFrequency(Math.max(0.0, mainLoopBegin.loopFrequency() - 2));
530 postLoopBegin.setLoopFrequency(Math.max(0.0, postLoopBegin.loopFrequency() - 1));
531 }
532 graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "InsertPrePostLoops %s", loop);
533 }
534
535 /**
536 * Cleanup the merge and remove the predecessors too.
537 */
538 private static void cleanupMerge(AbstractMergeNode mergeNode, AbstractBeginNode landingNode) {
539 for (EndNode end : mergeNode.cfgPredecessors().snapshot()) {
540 mergeNode.removeEnd(end);
541 end.safeDelete();
542 }
543 mergeNode.prepareDelete(landingNode);
544 mergeNode.safeDelete();
545 }
546
547 private static void processPreLoopPhis(LoopEx preLoop, LoopFragmentWhole mainLoop, LoopFragmentWhole postLoop) {
548 // process phis for the post loop
549 LoopBeginNode preLoopBegin = preLoop.loopBegin();
550 for (PhiNode prePhiNode : preLoopBegin.phis()) {
556 for (Node usage : prePhiNode.usages().snapshot()) {
557 if (usage == mainPhiNode) {
558 continue;
559 }
560 if (preLoop.isOutsideLoop(usage)) {
561 usage.replaceFirstInput(prePhiNode, postPhiNode);
562 }
563 }
564 }
565 for (Node node : preLoop.inside().nodes()) {
566 for (Node externalUsage : node.usages().snapshot()) {
567 if (preLoop.isOutsideLoop(externalUsage)) {
568 Node postUsage = postLoop.getDuplicatedNode(node);
569 assert postUsage != null;
570 externalUsage.replaceFirstInput(node, postUsage);
571 }
572 }
573 }
574 }
575
576 private static LoopExitNode getSingleExitFromLoop(LoopBeginNode curLoopBegin) {
577 assert curLoopBegin.loopExits().count() == 1;
578 return curLoopBegin.loopExits().first();
579 }
580
581 private static LoopEndNode getSingleLoopEndFromLoop(LoopBeginNode curLoopBegin) {
582 assert curLoopBegin.loopEnds().count() == 1;
583 return curLoopBegin.loopEnds().first();
584 }
585
586 /**
587 * Find the end of the block following the LoopExit.
588 */
589 private static EndNode getBlockEndAfterLoopExit(LoopBeginNode curLoopBegin) {
590 FixedNode node = getSingleExitFromLoop(curLoopBegin).next();
591 // Find the last node after the exit blocks starts
592 return getBlockEnd(node);
593 }
594
595 private static EndNode getBlockEnd(FixedNode node) {
596 FixedNode curNode = node;
597 while (curNode instanceof FixedWithNextNode) {
598 curNode = ((FixedWithNextNode) curNode).next();
599 }
600 return (EndNode) curNode;
601 }
602
603 private static void updateMainLoopLimit(IfNode preLimit, InductionVariable preIv, LoopFragmentWhole mainLoop) {
604 // Update the main loops limit test to be different than the post loop
605 StructuredGraph graph = preLimit.graph();
606 IfNode mainLimit = mainLoop.getDuplicatedNode(preLimit);
607 LogicNode ifTest = mainLimit.condition();
608 CompareNode compareNode = (CompareNode) ifTest;
609 ValueNode prePhi = preIv.valueNode();
610 ValueNode mainPhi = mainLoop.getDuplicatedNode(prePhi);
676 if (controls == null) {
677 SwitchNode firstSwitch = null;
678 for (SwitchNode switchNode : loop.whole().nodes().filter(SwitchNode.class)) {
679 if (switchNode.successors().count() > 1 && loop.isOutsideLoop(switchNode.value())) {
680 if (controls == null) {
681 firstSwitch = switchNode;
682 invariantValue = switchNode.value();
683 controls = new ArrayList<>();
684 controls.add(switchNode);
685 } else if (switchNode.value() == invariantValue && firstSwitch.structureEquals(switchNode)) {
686 // Only collect switches which test the same values in the same order
687 controls.add(switchNode);
688 }
689 }
690 }
691 }
692 return controls;
693 }
694
695 public static boolean isUnrollableLoop(LoopEx loop) {
696 if (!loop.isCounted()) {
697 return false;
698 }
699 LoopBeginNode loopBegin = loop.loopBegin();
700 boolean isCanonical = false;
701 if (loopBegin.isMainLoop() || loopBegin.isSimpleLoop()) {
702 // Flow-less loops to partial unroll for now. 3 blocks corresponds to an if that either
703 // exits or continues the loop. There might be fixed and floating work within the loop
704 // as well.
705 if (loop.loop().getBlocks().size() < 3) {
706 isCanonical = true;
707 }
708 }
709 if (!isCanonical) {
710 return false;
711 }
712 for (ValuePhiNode phi : loopBegin.valuePhis()) {
713 if (phi.usages().filter(x -> loopBegin.isPhiAtMerge(x)).isNotEmpty()) {
714 // Filter out Phis which reference Phis at the same merge until the duplication
715 // logic handles it properly.
716 return false;
717 }
718 InductionVariable iv = loop.getInductionVariables().get(phi);
719 if (iv == null) {
720 continue;
721 }
722 if (iv instanceof DerivedInductionVariable) {
723 return false;
724 } else if (iv instanceof BasicInductionVariable) {
725 BasicInductionVariable biv = (BasicInductionVariable) iv;
726 if (!biv.isConstantStride()) {
727 return false;
728 }
729 } else {
730 return false;
731 }
732 }
733 return true;
734 }
735 }
|
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23 package org.graalvm.compiler.loop.phases;
24
25 import org.graalvm.compiler.core.common.RetryableBailoutException;
26 import org.graalvm.compiler.debug.DebugContext;
27 import org.graalvm.compiler.debug.GraalError;
28 import org.graalvm.compiler.graph.Graph.Mark;
29 import org.graalvm.compiler.graph.Node;
30 import org.graalvm.compiler.graph.Position;
31 import org.graalvm.compiler.loop.CountedLoopInfo;
32 import org.graalvm.compiler.loop.InductionVariable;
33 import org.graalvm.compiler.loop.InductionVariable.Direction;
34 import org.graalvm.compiler.loop.LoopEx;
35 import org.graalvm.compiler.loop.LoopFragmentInside;
36 import org.graalvm.compiler.loop.LoopFragmentWhole;
37 import org.graalvm.compiler.nodeinfo.InputType;
38 import org.graalvm.compiler.nodes.AbstractBeginNode;
39 import org.graalvm.compiler.nodes.AbstractEndNode;
40 import org.graalvm.compiler.nodes.AbstractMergeNode;
41 import org.graalvm.compiler.nodes.BeginNode;
42 import org.graalvm.compiler.nodes.ConstantNode;
43 import org.graalvm.compiler.nodes.ControlSplitNode;
44 import org.graalvm.compiler.nodes.EndNode;
45 import org.graalvm.compiler.nodes.FixedNode;
46 import org.graalvm.compiler.nodes.FixedWithNextNode;
47 import org.graalvm.compiler.nodes.IfNode;
48 import org.graalvm.compiler.nodes.LogicNode;
49 import org.graalvm.compiler.nodes.LoopBeginNode;
50 import org.graalvm.compiler.nodes.LoopExitNode;
51 import org.graalvm.compiler.nodes.PhiNode;
52 import org.graalvm.compiler.nodes.SafepointNode;
53 import org.graalvm.compiler.nodes.StructuredGraph;
54 import org.graalvm.compiler.nodes.ValueNode;
55 import org.graalvm.compiler.nodes.calc.CompareNode;
56 import org.graalvm.compiler.nodes.calc.ConditionalNode;
57 import org.graalvm.compiler.nodes.calc.IntegerLessThanNode;
58 import org.graalvm.compiler.nodes.extended.SwitchNode;
59 import org.graalvm.compiler.nodes.util.GraphUtil;
60 import org.graalvm.compiler.phases.common.CanonicalizerPhase;
61 import org.graalvm.compiler.phases.tiers.PhaseContext;
62
63 import java.util.ArrayList;
64 import java.util.Iterator;
65 import java.util.List;
66
67 import static org.graalvm.compiler.core.common.GraalOptions.MaximumDesiredSize;
68 import static org.graalvm.compiler.loop.MathUtil.add;
69 import static org.graalvm.compiler.loop.MathUtil.sub;
70
71 public abstract class LoopTransformations {
72
73 private LoopTransformations() {
74 // does not need to be instantiated
75 }
76
77 public static void peel(LoopEx loop) {
78 loop.inside().duplicate().insertBefore(loop);
79 loop.loopBegin().setLoopFrequency(Math.max(0.0, loop.loopBegin().loopFrequency() - 1));
80 }
81
82 public static void fullUnroll(LoopEx loop, PhaseContext context, CanonicalizerPhase canonicalizer) {
83 // assert loop.isCounted(); //TODO (gd) strengthen : counted with known trip count
84 LoopBeginNode loopBegin = loop.loopBegin();
85 StructuredGraph graph = loopBegin.graph();
86 int initialNodeCount = graph.getNodeCount();
87 while (!loopBegin.isDeleted()) {
88 Mark mark = graph.getMark();
89 peel(loop);
90 canonicalizer.applyIncremental(graph, context, mark);
129 ControlSplitNode duplicatedControlSplit = duplicateLoop.getDuplicatedNode(controlSplitNode);
130 if (duplicatedControlSplit.isAlive()) {
131 AbstractBeginNode survivingSuccessor = (AbstractBeginNode) position.get(duplicatedControlSplit);
132 survivingSuccessor.replaceAtUsages(InputType.Guard, newBegin);
133 graph.removeSplitPropagate(duplicatedControlSplit, survivingSuccessor);
134 }
135 }
136 }
137 // original loop is simplified last to avoid deleting controlSplitNode too early
138 for (ControlSplitNode controlSplitNode : controlSplitNodeSet) {
139 if (controlSplitNode.isAlive()) {
140 AbstractBeginNode survivingSuccessor = (AbstractBeginNode) firstPosition.get(controlSplitNode);
141 survivingSuccessor.replaceAtUsages(InputType.Guard, originalLoopBegin);
142 graph.removeSplitPropagate(controlSplitNode, survivingSuccessor);
143 }
144 }
145
146 // TODO (gd) probabilities need some amount of fixup.. (probably also in other transforms)
147 }
148
149 public static void partialUnroll(LoopEx loop, StructuredGraph graph) {
150 assert loop.loopBegin().isMainLoop();
151 graph.getDebug().log("LoopPartialUnroll %s", loop);
152
153 LoopFragmentInside newSegment = loop.inside().duplicate();
154 newSegment.insertWithinAfter(loop);
155
156 }
157
158 // This function splits candidate loops into pre, main and post loops,
159 // dividing the iteration space to facilitate the majority of iterations
160 // being executed in a main loop, which will have RCE implemented upon it.
161 // The initial loop form is constrained to single entry/exit, but can have
162 // flow. The translation looks like:
163 //
164 // @formatter:off
165 //
166 // (Simple Loop entry) (Pre Loop Entry)
167 // | |
168 // (LoopBeginNode) (LoopBeginNode)
169 // | |
170 // (Loop Control Test)<------ ==> (Loop control Test)<------
171 // / \ \ / \ \
172 // (Loop Exit) (Loop Body) | (Loop Exit) (Loop Body) |
173 // | | | | | |
174 // (continue code) (Loop End) | if (M < length)* (Loop End) |
175 // \ / / \ \ /
209 //
210 // The value "M" is the maximal value of the loop trip for the original
211 // loop. The value of "length" is applicable to the number of arrays found
212 // in the loop but is reduced if some or all of the arrays are known to be
213 // the same length as "M". The maximum number of tests can be equal to the
214 // number of arrays in the loop, where multiple instances of an array are
215 // subsumed into a single test for that arrays length.
216 //
217 // If the optional main loop entry tests are absent, the Pre Loop exit
218 // connects to the Main loops entry and there is no merge hanging off the
219 // main loops exit to converge flow from said tests. All split use data
220 // flow is mitigated through phi(s) in the main merge if present and
221 // passed through the main and post loop phi(s) from the originating pre
222 // loop with final phi(s) and data flow patched to the "continue code".
223 // The pre loop is constrained to one iteration for now and will likely
224 // be updated to produce vector alignment if applicable.
225
226 public static void insertPrePostLoops(LoopEx loop, StructuredGraph graph) {
227 graph.getDebug().log("LoopTransformations.insertPrePostLoops %s", loop);
228 LoopFragmentWhole preLoop = loop.whole();
229 CountedLoopInfo preCounted = loop.counted();
230 IfNode preLimit = preCounted.getLimitTest();
231 if (preLimit != null) {
232 LoopBeginNode preLoopBegin = loop.loopBegin();
233 InductionVariable preIv = preCounted.getCounter();
234 LoopExitNode preLoopExitNode = preLoopBegin.getSingleLoopExit();
235 FixedNode continuationNode = preLoopExitNode.next();
236
237 // Each duplication is inserted after the original, ergo create the post loop first
238 LoopFragmentWhole mainLoop = preLoop.duplicate();
239 LoopFragmentWhole postLoop = preLoop.duplicate();
240 preLoopBegin.incrementSplits();
241 preLoopBegin.incrementSplits();
242 preLoopBegin.setPreLoop();
243 graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, graph, "After duplication");
244 LoopBeginNode mainLoopBegin = mainLoop.getDuplicatedNode(preLoopBegin);
245 mainLoopBegin.setMainLoop();
246 LoopBeginNode postLoopBegin = postLoop.getDuplicatedNode(preLoopBegin);
247 postLoopBegin.setPostLoop();
248
249 EndNode postEndNode = getBlockEndAfterLoopExit(postLoopBegin);
250 AbstractMergeNode postMergeNode = postEndNode.merge();
251 LoopExitNode postLoopExitNode = postLoopBegin.getSingleLoopExit();
252
253 // Update the main loop phi initialization to carry from the pre loop
254 for (PhiNode prePhiNode : preLoopBegin.phis()) {
255 PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
256 mainPhiNode.setValueAt(0, prePhiNode);
257 }
258
259 EndNode mainEndNode = getBlockEndAfterLoopExit(mainLoopBegin);
260 AbstractMergeNode mainMergeNode = mainEndNode.merge();
261 AbstractEndNode postEntryNode = postLoopBegin.forwardEnd();
262
263 // In the case of no Bounds tests, we just flow right into the main loop
264 AbstractBeginNode mainLandingNode = BeginNode.begin(postEntryNode);
265 LoopExitNode mainLoopExitNode = mainLoopBegin.getSingleLoopExit();
266 mainLoopExitNode.setNext(mainLandingNode);
267 preLoopExitNode.setNext(mainLoopBegin.forwardEnd());
268
269 // Add and update any phi edges as per merge usage as needed and update usages
270 processPreLoopPhis(loop, mainLoop, postLoop);
271 continuationNode.predecessor().clearSuccessors();
272 postLoopExitNode.setNext(continuationNode);
273 cleanupMerge(postMergeNode, postLoopExitNode);
274 cleanupMerge(mainMergeNode, mainLandingNode);
275
276 // Change the preLoop to execute one iteration for now
277 updateMainLoopLimit(preLimit, preIv, mainLoop);
278 updatePreLoopLimit(preLimit, preIv, preCounted);
279 preLoopBegin.setLoopFrequency(1);
280 mainLoopBegin.setLoopFrequency(Math.max(0.0, mainLoopBegin.loopFrequency() - 2));
281 postLoopBegin.setLoopFrequency(Math.max(0.0, postLoopBegin.loopFrequency() - 1));
282
283 // The pre and post loops don't require safepoints at all
284 for (SafepointNode safepoint : preLoop.nodes().filter(SafepointNode.class)) {
285 GraphUtil.removeFixedWithUnusedInputs(safepoint);
286 }
287 for (SafepointNode safepoint : postLoop.nodes().filter(SafepointNode.class)) {
288 GraphUtil.removeFixedWithUnusedInputs(safepoint);
289 }
290 }
291 graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "InsertPrePostLoops %s", loop);
292 }
293
294 /**
295 * Cleanup the merge and remove the predecessors too.
296 */
297 private static void cleanupMerge(AbstractMergeNode mergeNode, AbstractBeginNode landingNode) {
298 for (EndNode end : mergeNode.cfgPredecessors().snapshot()) {
299 mergeNode.removeEnd(end);
300 end.safeDelete();
301 }
302 mergeNode.prepareDelete(landingNode);
303 mergeNode.safeDelete();
304 }
305
306 private static void processPreLoopPhis(LoopEx preLoop, LoopFragmentWhole mainLoop, LoopFragmentWhole postLoop) {
307 // process phis for the post loop
308 LoopBeginNode preLoopBegin = preLoop.loopBegin();
309 for (PhiNode prePhiNode : preLoopBegin.phis()) {
315 for (Node usage : prePhiNode.usages().snapshot()) {
316 if (usage == mainPhiNode) {
317 continue;
318 }
319 if (preLoop.isOutsideLoop(usage)) {
320 usage.replaceFirstInput(prePhiNode, postPhiNode);
321 }
322 }
323 }
324 for (Node node : preLoop.inside().nodes()) {
325 for (Node externalUsage : node.usages().snapshot()) {
326 if (preLoop.isOutsideLoop(externalUsage)) {
327 Node postUsage = postLoop.getDuplicatedNode(node);
328 assert postUsage != null;
329 externalUsage.replaceFirstInput(node, postUsage);
330 }
331 }
332 }
333 }
334
335 /**
336 * Find the end of the block following the LoopExit.
337 */
338 private static EndNode getBlockEndAfterLoopExit(LoopBeginNode curLoopBegin) {
339 FixedNode node = curLoopBegin.getSingleLoopExit().next();
340 // Find the last node after the exit blocks starts
341 return getBlockEnd(node);
342 }
343
344 private static EndNode getBlockEnd(FixedNode node) {
345 FixedNode curNode = node;
346 while (curNode instanceof FixedWithNextNode) {
347 curNode = ((FixedWithNextNode) curNode).next();
348 }
349 return (EndNode) curNode;
350 }
351
352 private static void updateMainLoopLimit(IfNode preLimit, InductionVariable preIv, LoopFragmentWhole mainLoop) {
353 // Update the main loops limit test to be different than the post loop
354 StructuredGraph graph = preLimit.graph();
355 IfNode mainLimit = mainLoop.getDuplicatedNode(preLimit);
356 LogicNode ifTest = mainLimit.condition();
357 CompareNode compareNode = (CompareNode) ifTest;
358 ValueNode prePhi = preIv.valueNode();
359 ValueNode mainPhi = mainLoop.getDuplicatedNode(prePhi);
425 if (controls == null) {
426 SwitchNode firstSwitch = null;
427 for (SwitchNode switchNode : loop.whole().nodes().filter(SwitchNode.class)) {
428 if (switchNode.successors().count() > 1 && loop.isOutsideLoop(switchNode.value())) {
429 if (controls == null) {
430 firstSwitch = switchNode;
431 invariantValue = switchNode.value();
432 controls = new ArrayList<>();
433 controls.add(switchNode);
434 } else if (switchNode.value() == invariantValue && firstSwitch.structureEquals(switchNode)) {
435 // Only collect switches which test the same values in the same order
436 controls.add(switchNode);
437 }
438 }
439 }
440 }
441 return controls;
442 }
443
444 public static boolean isUnrollableLoop(LoopEx loop) {
445 if (!loop.isCounted() || !loop.counted().getCounter().isConstantStride()) {
446 return false;
447 }
448 LoopBeginNode loopBegin = loop.loopBegin();
449 if (loopBegin.isMainLoop() || loopBegin.isSimpleLoop()) {
450 // Flow-less loops to partial unroll for now. 3 blocks corresponds to an if that either
451 // exits or continues the loop. There might be fixed and floating work within the loop
452 // as well.
453 if (loop.loop().getBlocks().size() < 3) {
454 return true;
455 }
456 }
457 return false;
458 }
459 }
|