hotspot Sdiff src/share/vm/opto

src/share/vm/opto/postaloc.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File hotspot Sdiff src/share/vm/opto

src/share/vm/opto/postaloc.cpp

Print this page

 246     blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd);
 247     if (n->in(k) != copy) {
 248       break; // Failed for some cutout?
 249     }
 250     x = copy;                   // Progress, try again
 251   }
 252 
 253   // Phis and 2-address instructions cannot change registers so easily - their
 254   // outputs must match their input.
 255   if( !can_change_regs )
 256     return blk_adjust;          // Only check stupid copies!
 257 
 258   // Loop backedges won't have a value-mapping yet
 259   if( &value == NULL ) return blk_adjust;
 260 
 261   // Skip through all copies to the _value_ being used.  Do not change from
 262   // int to pointer.  This attempts to jump through a chain of copies, where
 263   // intermediate copies might be illegal, i.e., value is stored down to stack
 264   // then reloaded BUT survives in a register the whole way.
 265   Node *val = skip_copies(n->in(k));
 266 
 267   if (val == x && nk_idx != 0 &&
 268       regnd[nk_reg] != NULL && regnd[nk_reg] != x &&
 269       _lrg_map.live_range_id(x) == _lrg_map.live_range_id(regnd[nk_reg])) {
 270     // When rematerialzing nodes and stretching lifetimes, the
 271     // allocator will reuse the original def for multidef LRG instead
 272     // of the current reaching def because it can't know it's safe to
 273     // do so.  After allocation completes if they are in the same LRG
 274     // then it should use the current reaching def instead.
 275     n->set_req(k, regnd[nk_reg]);
 276     blk_adjust += yank_if_dead(val, current_block, &value, &regnd);
 277     val = skip_copies(n->in(k));
 278   }
 279 
 280   if (val == x) return blk_adjust; // No progress?
 281 
 282   int n_regs = RegMask::num_registers(val->ideal_reg());
 283   uint val_idx = _lrg_map.live_range_id(val);
 284   OptoReg::Name val_reg = lrgs(val_idx).reg();
 285 
 286   // See if it happens to already be in the correct register!
 287   // (either Phi's direct register, or the common case of the name
 288   // never-clobbered original-def register)
 289   if (register_contains_value(val, val_reg, n_regs, value)) {
 290     blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
 291     if( n->in(k) == regnd[val_reg] ) // Success!  Quit trying
 292       return blk_adjust;
 293   }
 294 
 295   // See if we can skip the copy by changing registers.  Don't change from
 296   // using a register to using the stack unless we know we can remove a
 297   // copy-load.  Otherwise we might end up making a pile of Intel cisc-spill
 298   // ops reading from memory instead of just loading once and using the
 299   // register.

 365     //
 366     // n will be replaced with the old value but n might have
 367     // kills projections associated with it so remove them now so that
 368     // yank_if_dead will be able to eliminate the copy once the uses
 369     // have been transferred to the old[value].
 370     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 371       Node* use = n->fast_out(i);
 372       if (use->is_Proj() && use->outcnt() == 0) {
 373         // Kill projections have no users and one input
 374         use->set_req(0, C->top());
 375         yank_if_dead(use, current_block, &value, &regnd);
 376         --i; --imax;
 377       }
 378     }
 379     _post_alloc++;
 380     return true;
 381   }
 382   return false;
 383 }
 384 
























































































 385 
 386 //------------------------------post_allocate_copy_removal---------------------
 387 // Post-Allocation peephole copy removal.  We do this in 1 pass over the
 388 // basic blocks.  We maintain a mapping of registers to Nodes (an  array of
 389 // Nodes indexed by machine register or stack slot number).  NULL means that a
 390 // register is not mapped to any Node.  We can (want to have!) have several
 391 // registers map to the same Node.  We walk forward over the instructions
 392 // updating the mapping as we go.  At merge points we force a NULL if we have
 393 // to merge 2 different Nodes into the same register.  Phi functions will give
 394 // us a new Node if there is a proper value merging.  Since the blocks are
 395 // arranged in some RPO, we will visit all parent blocks before visiting any
 396 // successor blocks (except at loops).
 397 //
 398 // If we find a Copy we look to see if the Copy's source register is a stack
 399 // slot and that value has already been loaded into some machine register; if
 400 // so we use machine register directly.  This turns a Load into a reg-reg
 401 // Move.  We also look for reloads of identical constants.
 402 //
 403 // When we see a use from a reg-reg Copy, we will attempt to use the copy's
 404 // source directly and make the copy go dead.

 246     blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd);
 247     if (n->in(k) != copy) {
 248       break; // Failed for some cutout?
 249     }
 250     x = copy;                   // Progress, try again
 251   }
 252 
 253   // Phis and 2-address instructions cannot change registers so easily - their
 254   // outputs must match their input.
 255   if( !can_change_regs )
 256     return blk_adjust;          // Only check stupid copies!
 257 
 258   // Loop backedges won't have a value-mapping yet
 259   if( &value == NULL ) return blk_adjust;
 260 
 261   // Skip through all copies to the _value_ being used.  Do not change from
 262   // int to pointer.  This attempts to jump through a chain of copies, where
 263   // intermediate copies might be illegal, i.e., value is stored down to stack
 264   // then reloaded BUT survives in a register the whole way.
 265   Node *val = skip_copies(n->in(k));














 266   if (val == x) return blk_adjust; // No progress?
 267 
 268   int n_regs = RegMask::num_registers(val->ideal_reg());
 269   uint val_idx = _lrg_map.live_range_id(val);
 270   OptoReg::Name val_reg = lrgs(val_idx).reg();
 271 
 272   // See if it happens to already be in the correct register!
 273   // (either Phi's direct register, or the common case of the name
 274   // never-clobbered original-def register)
 275   if (register_contains_value(val, val_reg, n_regs, value)) {
 276     blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
 277     if( n->in(k) == regnd[val_reg] ) // Success!  Quit trying
 278       return blk_adjust;
 279   }
 280 
 281   // See if we can skip the copy by changing registers.  Don't change from
 282   // using a register to using the stack unless we know we can remove a
 283   // copy-load.  Otherwise we might end up making a pile of Intel cisc-spill
 284   // ops reading from memory instead of just loading once and using the
 285   // register.

 351     //
 352     // n will be replaced with the old value but n might have
 353     // kills projections associated with it so remove them now so that
 354     // yank_if_dead will be able to eliminate the copy once the uses
 355     // have been transferred to the old[value].
 356     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 357       Node* use = n->fast_out(i);
 358       if (use->is_Proj() && use->outcnt() == 0) {
 359         // Kill projections have no users and one input
 360         use->set_req(0, C->top());
 361         yank_if_dead(use, current_block, &value, &regnd);
 362         --i; --imax;
 363       }
 364     }
 365     _post_alloc++;
 366     return true;
 367   }
 368   return false;
 369 }
 370 
 371 // The algorithms works as follows:
 372 // We traverse the block top to bottom. possibly_merge_multidef() is invoked for every input edge k
 373 // of the instruction n. We check to see if the input is a multidef lrg. If it is, we record the fact that we've
 374 // seen a definition (coming as an input) and add that fact to the reg2defuse array. The array maps registers to their
 375 // current reaching definitions (we track only multidefs though). With each definition we also associate the first
 376 // instruction we saw use it. If we encounter the situation when we observe an def (an input) that is a part of the
 377 // same lrg but is different from the previous seen def we merge the two with a MachMerge node and substitute
 378 // all the uses that we've seen so far to use the merge. After that we keep replacing the new defs in the same lrg
 379 // as they get encountered with the merge node and keep adding these defs to the merge inputs.
 380 void PhaseChaitin::merge_multidefs() {
 381   Compile::TracePhase tp("mergeMultidefs", &timers[_t_mergeMultidefs]);
 382   ResourceMark rm;
 383   // Keep track of the defs seen in registers and collect their uses in the block.
 384   RegToDefUseMap reg2defuse(_max_reg, _max_reg, RegDefUse());
 385   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
 386     Block* block = _cfg.get_block(i);
 387     for (uint j = 1; j < block->number_of_nodes(); j++) {
 388       Node* n = block->get_node(j);
 389       if (n->is_Phi()) continue;
 390       for (uint k = 1; k < n->req(); k++) {
 391         j += possibly_merge_multidef(n, k, block, reg2defuse);
 392       }
 393       // Null out the value produced by the instruction itself, since we're only interested in defs
 394       // implicitly defined by the uses. We are actually interested in tracking only redefinitions
 395       // of the multidef lrgs in the same register. For that matter it's enough to track changes in
 396       // the base register only and ignore other effects of multi-register lrgs and fat projections.
 397       // It is also ok to ignore defs coming from singledefs. After an implicit overwrite by one of
 398       // those our register is guaranteed to be used by another lrg and we won't attempt to merge it.
 399       uint lrg = _lrg_map.live_range_id(n);
 400       if (lrg > 0 && lrgs(lrg).is_multidef()) {
 401         OptoReg::Name reg = lrgs(lrg).reg();
 402         reg2defuse.at(reg).clear();
 403       }
 404     }
 405     // Clear reg->def->use tracking for the next block
 406     for (int j = 0; j < reg2defuse.length(); j++) {
 407       reg2defuse.at(j).clear();
 408     }
 409   }
 410 }
 411 
 412 int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
 413   int blk_adjust = 0;
 414 
 415   uint lrg = _lrg_map.live_range_id(n->in(k));
 416   if (lrg > 0 && lrgs(lrg).is_multidef()) {
 417     OptoReg::Name reg = lrgs(lrg).reg();
 418 
 419     Node* def = reg2defuse.at(reg).def();
 420     if (def != NULL && lrg == _lrg_map.live_range_id(def) && def != n->in(k)) {
 421       // Same lrg but different node, we have to merge.
 422       MachMergeNode* merge;
 423       if (def->is_MachMerge()) { // is it already a merge?
 424         merge = def->as_MachMerge();
 425       } else {
 426         merge = new MachMergeNode(def);
 427 
 428         // Insert the merge node into the block before the first use.
 429         uint use_index = block->find_node(reg2defuse.at(reg).first_use());
 430         block->insert_node(merge, use_index++);
 431 
 432         // Let the allocator know about the new node, use the same lrg
 433         _lrg_map.extend(merge->_idx, lrg);
 434         blk_adjust++;
 435 
 436         // Fixup all the uses (there is at least one) that happened between the first
 437         // use and before the current one.
 438         for (; use_index < block->number_of_nodes(); use_index++) {
 439           Node* use = block->get_node(use_index);
 440           if (use == n) {
 441             break;
 442           }
 443           use->replace_edge(def, merge);
 444         }
 445       }
 446       if (merge->find_edge(n->in(k)) == -1) {
 447         merge->add_req(n->in(k));
 448       }
 449       n->set_req(k, merge);
 450     }
 451 
 452     // update the uses
 453     reg2defuse.at(reg).update(n->in(k), n);
 454   }
 455 
 456   return blk_adjust;
 457 }
 458 
 459 
 460 //------------------------------post_allocate_copy_removal---------------------
 461 // Post-Allocation peephole copy removal.  We do this in 1 pass over the
 462 // basic blocks.  We maintain a mapping of registers to Nodes (an  array of
 463 // Nodes indexed by machine register or stack slot number).  NULL means that a
 464 // register is not mapped to any Node.  We can (want to have!) have several
 465 // registers map to the same Node.  We walk forward over the instructions
 466 // updating the mapping as we go.  At merge points we force a NULL if we have
 467 // to merge 2 different Nodes into the same register.  Phi functions will give
 468 // us a new Node if there is a proper value merging.  Since the blocks are
 469 // arranged in some RPO, we will visit all parent blocks before visiting any
 470 // successor blocks (except at loops).
 471 //
 472 // If we find a Copy we look to see if the Copy's source register is a stack
 473 // slot and that value has already been loaded into some machine register; if
 474 // so we use machine register directly.  This turns a Load into a reg-reg
 475 // Move.  We also look for reloads of identical constants.
 476 //
 477 // When we see a use from a reg-reg Copy, we will attempt to use the copy's
 478 // source directly and make the copy go dead.

src/share/vm/opto/postaloc.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File