31
32 // Optimization - Graph Style
33
34 class Block;
35 class CFGLoop;
36 class MachCallNode;
37 class Matcher;
38 class RootNode;
39 class VectorSet;
40 struct Tarjan;
41
42 //------------------------------Block_Array------------------------------------
43 // Map dense integer indices to Blocks. Uses classic doubling-array trick.
44 // Abstractly provides an infinite array of Block*'s, initialized to NULL.
45 // Note that the constructor just zeros things, and since I use Arena
46 // allocation I do not need a destructor to reclaim storage.
47 class Block_Array : public ResourceObj {
48 friend class VMStructs;
49 uint _size; // allocated size, as opposed to formal limit
50 debug_only(uint _limit;) // limit to formal domain
51 protected:
52 Block **_blocks;
53 void grow( uint i ); // Grow array node to fit
54
55 public:
56 Arena *_arena; // Arena to allocate in
57
58 Block_Array(Arena *a) : _arena(a), _size(OptoBlockListSize) {
59 debug_only(_limit=0);
60 _blocks = NEW_ARENA_ARRAY( a, Block *, OptoBlockListSize );
61 for( int i = 0; i < OptoBlockListSize; i++ ) {
62 _blocks[i] = NULL;
63 }
64 }
65 Block *lookup( uint i ) const // Lookup, or NULL for not mapped
66 { return (i<Max()) ? _blocks[i] : (Block*)NULL; }
67 Block *operator[] ( uint i ) const // Lookup, or assert for not mapped
68 { assert( i < Max(), "oob" ); return _blocks[i]; }
69 // Extend the mapping: index i maps to Block *n.
70 void map( uint i, Block *n ) { if( i>=Max() ) grow(i); _blocks[i] = n; }
71 uint Max() const { debug_only(return _limit); return _size; }
72 };
73
74
75 class Block_List : public Block_Array {
76 friend class VMStructs;
77 public:
267 assert(last->is_block_proj() == last || last->is_block_proj() == _nodes[last_idx - _num_succs], "");
268 return (last->is_block_proj() == last) ? last_idx : (last_idx - _num_succs);
269 }
270
271 // Basic blocks have a Node which ends them. This Node determines which
272 // basic block follows this one in the program flow. This Node is either an
273 // IfNode, a GotoNode, a JmpNode, or a ReturnNode.
274 Node *end() const { return _nodes[end_idx()]; }
275
276 // Add an instruction to an existing block. It must go after the head
277 // instruction and before the end instruction.
278 void add_inst( Node *n ) { _nodes.insert(end_idx(),n); }
279 // Find node in block
280 uint find_node( const Node *n ) const;
281 // Find and remove n from block list
282 void find_remove( const Node *n );
283
284 // helper function that adds caller save registers to MachProjNode
285 void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe);
286 // Schedule a call next in the block
287 uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call);
288
289 // Perform basic-block local scheduling
290 Node *select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot);
291 void set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs );
292 void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs);
293 bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray<int> &ready_cnt, VectorSet &next_call);
294 // Cleanup if any code lands between a Call and his Catch
295 void call_catch_cleanup(Block_Array &bbs, Compile *C);
296 // Detect implicit-null-check opportunities. Basically, find NULL checks
297 // with suitable memory ops nearby. Use the memory op to do the NULL check.
298 // I can generate a memory op if there is not one nearby.
299 void implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons);
300
301 // Return the empty status of a block
302 enum { not_empty, empty_with_goto, completely_empty };
303 int is_Empty() const;
304
305 // Forward through connectors
306 Block* non_connector() {
307 Block* s = this;
308 while (s->is_connector()) {
309 s = s->_succs[0];
310 }
311 return s;
312 }
313
314 // Return true if b is a successor of this block
315 bool has_successor(Block* b) const {
316 for (uint i = 0; i < _num_succs; i++ ) {
317 if (non_connector_successor(i) == b) {
318 return true;
319 }
320 }
321 return false;
322 }
323
324 // Successor block, after forwarding through connectors
325 Block* non_connector_successor(int i) const {
326 return _succs[i]->non_connector();
327 }
328
329 // Examine block's code shape to predict if it is not commonly executed.
330 bool has_uncommon_code() const;
331
332 // Use frequency calculations and code shape to predict if the block
333 // is uncommon.
334 bool is_uncommon( Block_Array &bbs ) const;
335
336 #ifndef PRODUCT
337 // Debugging print of basic block
338 void dump_bidx(const Block* orig, outputStream* st = tty) const;
339 void dump_pred(const Block_Array *bbs, Block* orig, outputStream* st = tty) const;
340 void dump_head( const Block_Array *bbs, outputStream* st = tty ) const;
341 void dump() const;
342 void dump( const Block_Array *bbs ) const;
343 #endif
344 };
345
346
347 //------------------------------PhaseCFG---------------------------------------
348 // Build an array of Basic Block pointers, one per Node.
349 class PhaseCFG : public Phase {
350 friend class VMStructs;
351 private:
352 // Build a proper looking cfg. Return count of basic blocks
353 uint build_cfg();
354
355 // Perform DFS search.
356 // Setup 'vertex' as DFS to vertex mapping.
357 // Setup 'semi' as vertex to DFS mapping.
358 // Set 'parent' to DFS parent.
359 uint DFS( Tarjan *tarjan );
360
361 // Helper function to insert a node into a block
362 void schedule_node_into_block( Node *n, Block *b );
363
364 void replace_block_proj_ctrl( Node *n );
365
366 // Set the basic block for pinned Nodes
367 void schedule_pinned_nodes( VectorSet &visited );
368
369 // I'll need a few machine-specific GotoNodes. Clone from this one.
370 MachNode *_goto;
371
372 Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
373 void verify_anti_dependences(Block* LCA, Node* load) {
374 assert(LCA == _bbs[load->_idx], "should already be scheduled");
375 insert_anti_dependences(LCA, load, true);
376 }
377
378 public:
379 PhaseCFG( Arena *a, RootNode *r, Matcher &m );
380
381 uint _num_blocks; // Count of basic blocks
382 Block_List _blocks; // List of basic blocks
383 RootNode *_root; // Root of whole program
384 Block_Array _bbs; // Map Nodes to owning Basic Block
385 Block *_broot; // Basic block of root
386 uint _rpo_ctr;
387 CFGLoop* _root_loop;
388 float _outer_loop_freq; // Outmost loop frequency
389
390 // Per node latency estimation, valid only during GCM
391 GrowableArray<uint> *_node_latency;
392
393 #ifndef PRODUCT
394 bool _trace_opto_pipelining; // tracing flag
395 #endif
396
397 #ifdef ASSERT
398 Unique_Node_List _raw_oops;
399 #endif
400
401 // Build dominators
402 void Dominators();
403
404 // Estimate block frequencies based on IfNode probabilities
405 void Estimate_Block_Frequency();
406
407 // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific
408 // basic blocks; i.e. _bbs now maps _idx for all Nodes to some Block.
409 void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
410
411 // Compute the (backwards) latency of a node from the uses
412 void latency_from_uses(Node *n);
413
414 // Compute the (backwards) latency of a node from a single use
415 int latency_from_use(Node *n, const Node *def, Node *use);
416
417 // Compute the (backwards) latency of a node from the uses of this instruction
418 void partial_latency_of_defs(Node *n);
419
420 // Schedule Nodes early in their basic blocks.
421 bool schedule_early(VectorSet &visited, Node_List &roots);
422
423 // For each node, find the latest block it can be scheduled into
424 // and then select the cheapest block between the latest and earliest
425 // block to place the node.
426 void schedule_late(VectorSet &visited, Node_List &stack);
427
428 // Pick a block between early and late that is a cheaper alternative
437
438 // Remove empty basic blocks
439 void remove_empty();
440 void fixup_flow();
441 bool move_to_next(Block* bx, uint b_index);
442 void move_to_end(Block* bx, uint b_index);
443 void insert_goto_at(uint block_no, uint succ_no);
444
445 // Check for NeverBranch at block end. This needs to become a GOTO to the
446 // true target. NeverBranch are treated as a conditional branch that always
447 // goes the same direction for most of the optimizer and are used to give a
448 // fake exit path to infinite loops. At this late stage they need to turn
449 // into Goto's so that when you enter the infinite loop you indeed hang.
450 void convert_NeverBranch_to_Goto(Block *b);
451
452 CFGLoop* create_loop_tree();
453
454 // Insert a node into a block, and update the _bbs
455 void insert( Block *b, uint idx, Node *n ) {
456 b->_nodes.insert( idx, n );
457 _bbs.map( n->_idx, b );
458 }
459
460 #ifndef PRODUCT
461 bool trace_opto_pipelining() const { return _trace_opto_pipelining; }
462
463 // Debugging print of CFG
464 void dump( ) const; // CFG only
465 void _dump_cfg( const Node *end, VectorSet &visited ) const;
466 void verify() const;
467 void dump_headers();
468 #else
469 bool trace_opto_pipelining() const { return false; }
470 #endif
471 };
472
473
474 //------------------------------UnionFind--------------------------------------
475 // Map Block indices to a block-index for a cfg-cover.
476 // Array lookup in the optimized case.
477 class UnionFind : public ResourceObj {
526 int _id;
527 int _depth;
528 CFGLoop *_parent; // root of loop tree is the method level "pseudo" loop, it's parent is null
529 CFGLoop *_sibling; // null terminated list
530 CFGLoop *_child; // first child, use child's sibling to visit all immediately nested loops
531 GrowableArray<CFGElement*> _members; // list of members of loop
532 GrowableArray<BlockProbPair> _exits; // list of successor blocks and their probabilities
533 float _exit_prob; // probability any loop exit is taken on a single loop iteration
534 void update_succ_freq(Block* b, float freq);
535
536 public:
537 CFGLoop(int id) :
538 CFGElement(),
539 _id(id),
540 _depth(0),
541 _parent(NULL),
542 _sibling(NULL),
543 _child(NULL),
544 _exit_prob(1.0f) {}
545 CFGLoop* parent() { return _parent; }
546 void push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk);
547 void add_member(CFGElement *s) { _members.push(s); }
548 void add_nested_loop(CFGLoop* cl);
549 Block* head() {
550 assert(_members.at(0)->is_block(), "head must be a block");
551 Block* hd = _members.at(0)->as_Block();
552 assert(hd->_loop == this, "just checking");
553 assert(hd->head()->is_Loop(), "must begin with loop head node");
554 return hd;
555 }
556 Block* backedge_block(); // Return the block on the backedge of the loop (else NULL)
557 void compute_loop_depth(int depth);
558 void compute_freq(); // compute frequency with loop assuming head freq 1.0f
559 void scale_freq(); // scale frequency by loop trip count (including outer loops)
560 float outer_loop_freq() const; // frequency of outer loop
561 bool in_loop_nest(Block* b);
562 float trip_count() const { return 1.0f / _exit_prob; }
563 virtual bool is_loop() { return true; }
564 int id() { return _id; }
565
566 #ifndef PRODUCT
|
31
32 // Optimization - Graph Style
33
34 class Block;
35 class CFGLoop;
36 class MachCallNode;
37 class Matcher;
38 class RootNode;
39 class VectorSet;
40 struct Tarjan;
41
42 //------------------------------Block_Array------------------------------------
43 // Map dense integer indices to Blocks. Uses classic doubling-array trick.
44 // Abstractly provides an infinite array of Block*'s, initialized to NULL.
45 // Note that the constructor just zeros things, and since I use Arena
46 // allocation I do not need a destructor to reclaim storage.
47 class Block_Array : public ResourceObj {
48 friend class VMStructs;
49 uint _size; // allocated size, as opposed to formal limit
50 debug_only(uint _limit;) // limit to formal domain
51 Arena *_arena; // Arena to allocate in
52 protected:
53 Block **_blocks;
54 void grow( uint i ); // Grow array node to fit
55
56 public:
57 Block_Array(Arena *a) : _arena(a), _size(OptoBlockListSize) {
58 debug_only(_limit=0);
59 _blocks = NEW_ARENA_ARRAY( a, Block *, OptoBlockListSize );
60 for( int i = 0; i < OptoBlockListSize; i++ ) {
61 _blocks[i] = NULL;
62 }
63 }
64 Block *lookup( uint i ) const // Lookup, or NULL for not mapped
65 { return (i<Max()) ? _blocks[i] : (Block*)NULL; }
66 Block *operator[] ( uint i ) const // Lookup, or assert for not mapped
67 { assert( i < Max(), "oob" ); return _blocks[i]; }
68 // Extend the mapping: index i maps to Block *n.
69 void map( uint i, Block *n ) { if( i>=Max() ) grow(i); _blocks[i] = n; }
70 uint Max() const { debug_only(return _limit); return _size; }
71 };
72
73
74 class Block_List : public Block_Array {
75 friend class VMStructs;
76 public:
266 assert(last->is_block_proj() == last || last->is_block_proj() == _nodes[last_idx - _num_succs], "");
267 return (last->is_block_proj() == last) ? last_idx : (last_idx - _num_succs);
268 }
269
270 // Basic blocks have a Node which ends them. This Node determines which
271 // basic block follows this one in the program flow. This Node is either an
272 // IfNode, a GotoNode, a JmpNode, or a ReturnNode.
273 Node *end() const { return _nodes[end_idx()]; }
274
275 // Add an instruction to an existing block. It must go after the head
276 // instruction and before the end instruction.
277 void add_inst( Node *n ) { _nodes.insert(end_idx(),n); }
278 // Find node in block
279 uint find_node( const Node *n ) const;
280 // Find and remove n from block list
281 void find_remove( const Node *n );
282
283 // helper function that adds caller save registers to MachProjNode
284 void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe);
285 // Schedule a call next in the block
286 uint sched_call(Matcher &matcher, PhaseCFG* cfg, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call);
287
288 // Perform basic-block local scheduling
289 Node *select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot);
290 void set_next_call( Node *n, VectorSet &next_call, PhaseCFG* cfg);
291 void needed_for_next_call(Node *this_call, VectorSet &next_call, PhaseCFG* cfg);
292 bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray<int> &ready_cnt, VectorSet &next_call);
293 // Cleanup if any code lands between a Call and his Catch
294 void call_catch_cleanup(PhaseCFG* cfg, Compile *C);
295 // Detect implicit-null-check opportunities. Basically, find NULL checks
296 // with suitable memory ops nearby. Use the memory op to do the NULL check.
297 // I can generate a memory op if there is not one nearby.
298 void implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons);
299
300 // Return the empty status of a block
301 enum { not_empty, empty_with_goto, completely_empty };
302 int is_Empty() const;
303
304 // Forward through connectors
305 Block* non_connector() {
306 Block* s = this;
307 while (s->is_connector()) {
308 s = s->_succs[0];
309 }
310 return s;
311 }
312
313 // Return true if b is a successor of this block
314 bool has_successor(Block* b) const {
315 for (uint i = 0; i < _num_succs; i++ ) {
316 if (non_connector_successor(i) == b) {
317 return true;
318 }
319 }
320 return false;
321 }
322
323 // Successor block, after forwarding through connectors
324 Block* non_connector_successor(int i) const {
325 return _succs[i]->non_connector();
326 }
327
328 // Examine block's code shape to predict if it is not commonly executed.
329 bool has_uncommon_code() const;
330
331 // Use frequency calculations and code shape to predict if the block
332 // is uncommon.
333 bool is_uncommon(PhaseCFG* cfg) const;
334
335 #ifndef PRODUCT
336 // Debugging print of basic block
337 void dump_bidx(const Block* orig, outputStream* st = tty) const;
338 void dump_pred(const PhaseCFG* cfg, Block* orig, outputStream* st = tty) const;
339 void dump_head(const PhaseCFG* cfg, outputStream* st = tty) const;
340 void dump() const;
341 void dump(const PhaseCFG* cfg) const;
342 #endif
343 };
344
345
346 //------------------------------PhaseCFG---------------------------------------
347 // Build an array of Basic Block pointers, one per Node.
348 class PhaseCFG : public Phase {
349 friend class VMStructs;
350 private:
351 // Arena for the blocks to be stored in
352 Arena* _block_arena;
353
354 // Map nodes to owning basic block
355 Block_Array _node_to_block_mapping;
356
357 // Build a proper looking cfg. Return count of basic blocks
358 uint build_cfg();
359
360 // Perform DFS search.
361 // Setup 'vertex' as DFS to vertex mapping.
362 // Setup 'semi' as vertex to DFS mapping.
363 // Set 'parent' to DFS parent.
364 uint DFS( Tarjan *tarjan );
365
366 // Helper function to insert a node into a block
367 void schedule_node_into_block( Node *n, Block *b );
368
369 void replace_block_proj_ctrl( Node *n );
370
371 // Set the basic block for pinned Nodes
372 void schedule_pinned_nodes( VectorSet &visited );
373
374 // I'll need a few machine-specific GotoNodes. Clone from this one.
375 MachNode *_goto;
376
377 Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
378 void verify_anti_dependences(Block* LCA, Node* load) {
379 assert(LCA == get_block_for_node(load), "should already be scheduled");
380 insert_anti_dependences(LCA, load, true);
381 }
382
383 public:
384 PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher);
385
386 uint _num_blocks; // Count of basic blocks
387 Block_List _blocks; // List of basic blocks
388 RootNode *_root; // Root of whole program
389 Block *_broot; // Basic block of root
390 uint _rpo_ctr;
391 CFGLoop* _root_loop;
392 float _outer_loop_freq; // Outmost loop frequency
393
394
395 // set which block this node should reside in
396 void map_node_to_block(const Node* node, Block* block) {
397 _node_to_block_mapping.map(node->_idx, block);
398 }
399
400 // removes the mapping from a node to a block
401 void unmap_node_from_block(const Node* node) {
402 _node_to_block_mapping.map(node->_idx, NULL);
403 }
404
405 // get the block in which this node resides
406 Block* get_block_for_node(const Node* node) const {
407 return _node_to_block_mapping[node->_idx];
408 }
409
410 // does this node reside in a block; return true
411 bool has_block(const Node* node) const {
412 return (_node_to_block_mapping.lookup(node->_idx) != NULL);
413 }
414
415 // Per node latency estimation, valid only during GCM
416 GrowableArray<uint> *_node_latency;
417
418 #ifndef PRODUCT
419 bool _trace_opto_pipelining; // tracing flag
420 #endif
421
422 #ifdef ASSERT
423 Unique_Node_List _raw_oops;
424 #endif
425
426 // Build dominators
427 void Dominators();
428
429 // Estimate block frequencies based on IfNode probabilities
430 void Estimate_Block_Frequency();
431
432 // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific
433 // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
434 void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
435
436 // Compute the (backwards) latency of a node from the uses
437 void latency_from_uses(Node *n);
438
439 // Compute the (backwards) latency of a node from a single use
440 int latency_from_use(Node *n, const Node *def, Node *use);
441
442 // Compute the (backwards) latency of a node from the uses of this instruction
443 void partial_latency_of_defs(Node *n);
444
445 // Schedule Nodes early in their basic blocks.
446 bool schedule_early(VectorSet &visited, Node_List &roots);
447
448 // For each node, find the latest block it can be scheduled into
449 // and then select the cheapest block between the latest and earliest
450 // block to place the node.
451 void schedule_late(VectorSet &visited, Node_List &stack);
452
453 // Pick a block between early and late that is a cheaper alternative
462
463 // Remove empty basic blocks
464 void remove_empty();
465 void fixup_flow();
466 bool move_to_next(Block* bx, uint b_index);
467 void move_to_end(Block* bx, uint b_index);
468 void insert_goto_at(uint block_no, uint succ_no);
469
470 // Check for NeverBranch at block end. This needs to become a GOTO to the
471 // true target. NeverBranch are treated as a conditional branch that always
472 // goes the same direction for most of the optimizer and are used to give a
473 // fake exit path to infinite loops. At this late stage they need to turn
474 // into Goto's so that when you enter the infinite loop you indeed hang.
475 void convert_NeverBranch_to_Goto(Block *b);
476
477 CFGLoop* create_loop_tree();
478
479 // Insert a node into a block, and update the _bbs
480 void insert( Block *b, uint idx, Node *n ) {
481 b->_nodes.insert( idx, n );
482 map_node_to_block(n, b);
483 }
484
485 #ifndef PRODUCT
486 bool trace_opto_pipelining() const { return _trace_opto_pipelining; }
487
488 // Debugging print of CFG
489 void dump( ) const; // CFG only
490 void _dump_cfg( const Node *end, VectorSet &visited ) const;
491 void verify() const;
492 void dump_headers();
493 #else
494 bool trace_opto_pipelining() const { return false; }
495 #endif
496 };
497
498
499 //------------------------------UnionFind--------------------------------------
500 // Map Block indices to a block-index for a cfg-cover.
501 // Array lookup in the optimized case.
502 class UnionFind : public ResourceObj {
551 int _id;
552 int _depth;
553 CFGLoop *_parent; // root of loop tree is the method level "pseudo" loop, it's parent is null
554 CFGLoop *_sibling; // null terminated list
555 CFGLoop *_child; // first child, use child's sibling to visit all immediately nested loops
556 GrowableArray<CFGElement*> _members; // list of members of loop
557 GrowableArray<BlockProbPair> _exits; // list of successor blocks and their probabilities
558 float _exit_prob; // probability any loop exit is taken on a single loop iteration
559 void update_succ_freq(Block* b, float freq);
560
561 public:
562 CFGLoop(int id) :
563 CFGElement(),
564 _id(id),
565 _depth(0),
566 _parent(NULL),
567 _sibling(NULL),
568 _child(NULL),
569 _exit_prob(1.0f) {}
570 CFGLoop* parent() { return _parent; }
571 void push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg);
572 void add_member(CFGElement *s) { _members.push(s); }
573 void add_nested_loop(CFGLoop* cl);
574 Block* head() {
575 assert(_members.at(0)->is_block(), "head must be a block");
576 Block* hd = _members.at(0)->as_Block();
577 assert(hd->_loop == this, "just checking");
578 assert(hd->head()->is_Loop(), "must begin with loop head node");
579 return hd;
580 }
581 Block* backedge_block(); // Return the block on the backedge of the loop (else NULL)
582 void compute_loop_depth(int depth);
583 void compute_freq(); // compute frequency with loop assuming head freq 1.0f
584 void scale_freq(); // scale frequency by loop trip count (including outer loops)
585 float outer_loop_freq() const; // frequency of outer loop
586 bool in_loop_nest(Block* b);
587 float trip_count() const { return 1.0f / _exit_prob; }
588 virtual bool is_loop() { return true; }
589 int id() { return _id; }
590
591 #ifndef PRODUCT
|