src/share/vm/opto/postaloc.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File hotspot Sdiff src/share/vm/opto

src/share/vm/opto/postaloc.cpp

Print this page




 246     blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd);
 247     if (n->in(k) != copy) {
 248       break; // Failed for some cutout?
 249     }
 250     x = copy;                   // Progress, try again
 251   }
 252 
 253   // Phis and 2-address instructions cannot change registers so easily - their
 254   // outputs must match their input.
 255   if( !can_change_regs )
 256     return blk_adjust;          // Only check stupid copies!
 257 
 258   // Loop backedges won't have a value-mapping yet
 259   if( &value == NULL ) return blk_adjust;
 260 
 261   // Skip through all copies to the _value_ being used.  Do not change from
 262   // int to pointer.  This attempts to jump through a chain of copies, where
 263   // intermediate copies might be illegal, i.e., value is stored down to stack
 264   // then reloaded BUT survives in a register the whole way.
 265   Node *val = skip_copies(n->in(k));
 266 
 267   if (val == x && nk_idx != 0 &&
 268       regnd[nk_reg] != NULL && regnd[nk_reg] != x &&
 269       _lrg_map.live_range_id(x) == _lrg_map.live_range_id(regnd[nk_reg])) {
 270     // When rematerialzing nodes and stretching lifetimes, the
 271     // allocator will reuse the original def for multidef LRG instead
 272     // of the current reaching def because it can't know it's safe to
 273     // do so.  After allocation completes if they are in the same LRG
 274     // then it should use the current reaching def instead.
 275     n->set_req(k, regnd[nk_reg]);
 276     blk_adjust += yank_if_dead(val, current_block, &value, &regnd);
 277     val = skip_copies(n->in(k));
 278   }
 279 
 280   if (val == x) return blk_adjust; // No progress?
 281 
 282   int n_regs = RegMask::num_registers(val->ideal_reg());
 283   uint val_idx = _lrg_map.live_range_id(val);
 284   OptoReg::Name val_reg = lrgs(val_idx).reg();
 285 
 286   // See if it happens to already be in the correct register!
 287   // (either Phi's direct register, or the common case of the name
 288   // never-clobbered original-def register)
 289   if (register_contains_value(val, val_reg, n_regs, value)) {
 290     blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
 291     if( n->in(k) == regnd[val_reg] ) // Success!  Quit trying
 292       return blk_adjust;
 293   }
 294 
 295   // See if we can skip the copy by changing registers.  Don't change from
 296   // using a register to using the stack unless we know we can remove a
 297   // copy-load.  Otherwise we might end up making a pile of Intel cisc-spill
 298   // ops reading from memory instead of just loading once and using the
 299   // register.


 365     //
 366     // n will be replaced with the old value but n might have
 367     // kills projections associated with it so remove them now so that
 368     // yank_if_dead will be able to eliminate the copy once the uses
 369     // have been transferred to the old[value].
 370     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 371       Node* use = n->fast_out(i);
 372       if (use->is_Proj() && use->outcnt() == 0) {
 373         // Kill projections have no users and one input
 374         use->set_req(0, C->top());
 375         yank_if_dead(use, current_block, &value, &regnd);
 376         --i; --imax;
 377       }
 378     }
 379     _post_alloc++;
 380     return true;
 381   }
 382   return false;
 383 }
 384 



























































































 385 
 386 //------------------------------post_allocate_copy_removal---------------------
 387 // Post-Allocation peephole copy removal.  We do this in 1 pass over the
 388 // basic blocks.  We maintain a mapping of registers to Nodes (an  array of
 389 // Nodes indexed by machine register or stack slot number).  NULL means that a
 390 // register is not mapped to any Node.  We can (want to have!) have several
 391 // registers map to the same Node.  We walk forward over the instructions
 392 // updating the mapping as we go.  At merge points we force a NULL if we have
 393 // to merge 2 different Nodes into the same register.  Phi functions will give
 394 // us a new Node if there is a proper value merging.  Since the blocks are
 395 // arranged in some RPO, we will visit all parent blocks before visiting any
 396 // successor blocks (except at loops).
 397 //
 398 // If we find a Copy we look to see if the Copy's source register is a stack
 399 // slot and that value has already been loaded into some machine register; if
 400 // so we use machine register directly.  This turns a Load into a reg-reg
 401 // Move.  We also look for reloads of identical constants.
 402 //
 403 // When we see a use from a reg-reg Copy, we will attempt to use the copy's
 404 // source directly and make the copy go dead.




 246     blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd);
 247     if (n->in(k) != copy) {
 248       break; // Failed for some cutout?
 249     }
 250     x = copy;                   // Progress, try again
 251   }
 252 
 253   // Phis and 2-address instructions cannot change registers so easily - their
 254   // outputs must match their input.
 255   if( !can_change_regs )
 256     return blk_adjust;          // Only check stupid copies!
 257 
 258   // Loop backedges won't have a value-mapping yet
 259   if( &value == NULL ) return blk_adjust;
 260 
 261   // Skip through all copies to the _value_ being used.  Do not change from
 262   // int to pointer.  This attempts to jump through a chain of copies, where
 263   // intermediate copies might be illegal, i.e., value is stored down to stack
 264   // then reloaded BUT survives in a register the whole way.
 265   Node *val = skip_copies(n->in(k));














 266   if (val == x) return blk_adjust; // No progress?
 267 
 268   int n_regs = RegMask::num_registers(val->ideal_reg());
 269   uint val_idx = _lrg_map.live_range_id(val);
 270   OptoReg::Name val_reg = lrgs(val_idx).reg();
 271 
 272   // See if it happens to already be in the correct register!
 273   // (either Phi's direct register, or the common case of the name
 274   // never-clobbered original-def register)
 275   if (register_contains_value(val, val_reg, n_regs, value)) {
 276     blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
 277     if( n->in(k) == regnd[val_reg] ) // Success!  Quit trying
 278       return blk_adjust;
 279   }
 280 
 281   // See if we can skip the copy by changing registers.  Don't change from
 282   // using a register to using the stack unless we know we can remove a
 283   // copy-load.  Otherwise we might end up making a pile of Intel cisc-spill
 284   // ops reading from memory instead of just loading once and using the
 285   // register.


 351     //
 352     // n will be replaced with the old value but n might have
 353     // kills projections associated with it so remove them now so that
 354     // yank_if_dead will be able to eliminate the copy once the uses
 355     // have been transferred to the old[value].
 356     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 357       Node* use = n->fast_out(i);
 358       if (use->is_Proj() && use->outcnt() == 0) {
 359         // Kill projections have no users and one input
 360         use->set_req(0, C->top());
 361         yank_if_dead(use, current_block, &value, &regnd);
 362         --i; --imax;
 363       }
 364     }
 365     _post_alloc++;
 366     return true;
 367   }
 368   return false;
 369 }
 370 
 371 // The algorithms works as follows:
 372 // We traverse the block top to bottom. possibly_merge_multidef() is invoked for every input edge k
 373 // of the instruction n. We check to see if the input is a multidef lrg. If it is, we record the fact that we've
 374 // seen a definition (coming as an input) and add that fact to the reg2defuse array. The array maps registers to their
 375 // current reaching definitions (we track only multidefs though). With each definition we also associate the first
 376 // instruction we saw use it. If we encounter the situation when we observe an def (an input) that is a part of the
 377 // same lrg but is different from the previous seen def we merge the two with a MachMerge node and substitute
 378 // all the uses that we've seen so far to use the merge. After that we keep replacing the new defs in the same lrg
 379 // as they get encountered with the merge node and keep adding these defs to the merge inputs.
 380 void PhaseChaitin::merge_multidefs() {
 381   Compile::TracePhase tp("mergeMultidefs", &timers[_t_mergeMultidefs]);
 382   ResourceMark rm;
 383   // Keep track of the defs seen in registers and collect their uses in the block.
 384   RegToDefUseMap reg2defuse(_max_reg, _max_reg, RegDefUse());
 385   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
 386     Block* block = _cfg.get_block(i);
 387     for (uint j = 1; j < block->number_of_nodes(); j++) {
 388       Node* n = block->get_node(j);
 389       if (n->is_Phi()) continue;
 390       for (uint k = 1; k < n->req(); k++) {
 391         j += possibly_merge_multidef(n, k, block, reg2defuse);
 392       }
 393     }
 394     // Clear reg->def->use tracking for the next block
 395     for (int j = 0; j < reg2defuse.length(); j++) {
 396       reg2defuse.at(j).clear();
 397     }
 398   }
 399 }
 400 
 401 int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
 402   int blk_adjust = 0;
 403 
 404   uint lrg = _lrg_map.live_range_id(n->in(k));
 405   if (lrg > 0 && lrgs(lrg).is_multidef()) {
 406     OptoReg::Name reg = lrgs(lrg).reg();
 407 
 408     Node* def = reg2defuse.at(reg).def();
 409     if (def != NULL && lrg == _lrg_map.live_range_id(def) && def != n->in(k)) {
 410       // Same lrg but different node, we have to merge.
 411       MachMergeNode* merge;
 412       if (def->is_MachMerge()) { // is it already a merge?
 413         merge = def->as_MachMerge();
 414       } else {
 415         merge = new MachMergeNode(def);
 416 
 417         // Insert the merge node into the block before the first use.
 418         uint use_index = block->find_node(reg2defuse.at(reg).first_use());
 419         block->insert_node(merge, use_index++);
 420 
 421         // Let the allocator know about the new node, use the same lrg
 422         _lrg_map.extend(merge->_idx, lrg);
 423         blk_adjust++;
 424 
 425         // Fixup all the uses (there is at least one) that happened between the first
 426         // use and before the current one.
 427         for (; use_index < block->number_of_nodes(); use_index++) {
 428           Node* use = block->get_node(use_index);
 429           if (use == n) {
 430             break;
 431           }
 432           use->replace_edge(def, merge);
 433         }
 434       }
 435       if (merge->find_edge(n->in(k)) == -1) {
 436         merge->add_req(n->in(k));
 437       }
 438       n->set_req(k, merge);
 439     }
 440 
 441     // update the uses
 442     reg2defuse.at(reg).update(n->in(k), n);
 443   }
 444 
 445   if (k == n->req() - 1) {
 446     // We just updated the last edge, now null out the value produced by the instruction itself,
 447     // since we're only interested in defs implicitly defined by the uses. We are actually interested
 448     // in tracking only redefinitions of the multidef lrgs in the same register. For that matter it's enough
 449     // to track changes in the base register only and ignore other effects of multi-register lrgs
 450     // and fat projections. It is also ok to ignore defs coming from singledefs. After an implicit
 451     // overwrite by one of those our register is guaranteed to be used by another lrg and we won't
 452     // attempt to merge it.
 453     lrg = _lrg_map.live_range_id(n);
 454     if (lrg > 0 && lrgs(lrg).is_multidef()) {
 455       OptoReg::Name reg = lrgs(lrg).reg();
 456       reg2defuse.at(reg).clear();
 457     }
 458   }
 459   return blk_adjust;
 460 }
 461 
 462 
 463 //------------------------------post_allocate_copy_removal---------------------
 464 // Post-Allocation peephole copy removal.  We do this in 1 pass over the
 465 // basic blocks.  We maintain a mapping of registers to Nodes (an  array of
 466 // Nodes indexed by machine register or stack slot number).  NULL means that a
 467 // register is not mapped to any Node.  We can (want to have!) have several
 468 // registers map to the same Node.  We walk forward over the instructions
 469 // updating the mapping as we go.  At merge points we force a NULL if we have
 470 // to merge 2 different Nodes into the same register.  Phi functions will give
 471 // us a new Node if there is a proper value merging.  Since the blocks are
 472 // arranged in some RPO, we will visit all parent blocks before visiting any
 473 // successor blocks (except at loops).
 474 //
 475 // If we find a Copy we look to see if the Copy's source register is a stack
 476 // slot and that value has already been loaded into some machine register; if
 477 // so we use machine register directly.  This turns a Load into a reg-reg
 478 // Move.  We also look for reloads of identical constants.
 479 //
 480 // When we see a use from a reg-reg Copy, we will attempt to use the copy's
 481 // source directly and make the copy go dead.


src/share/vm/opto/postaloc.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File