246 blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd); 247 if (n->in(k) != copy) { 248 break; // Failed for some cutout? 249 } 250 x = copy; // Progress, try again 251 } 252 253 // Phis and 2-address instructions cannot change registers so easily - their 254 // outputs must match their input. 255 if( !can_change_regs ) 256 return blk_adjust; // Only check stupid copies! 257 258 // Loop backedges won't have a value-mapping yet 259 if( &value == NULL ) return blk_adjust; 260 261 // Skip through all copies to the _value_ being used. Do not change from 262 // int to pointer. This attempts to jump through a chain of copies, where 263 // intermediate copies might be illegal, i.e., value is stored down to stack 264 // then reloaded BUT survives in a register the whole way. 265 Node *val = skip_copies(n->in(k)); 266 267 if (val == x && nk_idx != 0 && 268 regnd[nk_reg] != NULL && regnd[nk_reg] != x && 269 _lrg_map.live_range_id(x) == _lrg_map.live_range_id(regnd[nk_reg])) { 270 // When rematerialzing nodes and stretching lifetimes, the 271 // allocator will reuse the original def for multidef LRG instead 272 // of the current reaching def because it can't know it's safe to 273 // do so. After allocation completes if they are in the same LRG 274 // then it should use the current reaching def instead. 275 n->set_req(k, regnd[nk_reg]); 276 blk_adjust += yank_if_dead(val, current_block, &value, ®nd); 277 val = skip_copies(n->in(k)); 278 } 279 280 if (val == x) return blk_adjust; // No progress? 281 282 int n_regs = RegMask::num_registers(val->ideal_reg()); 283 uint val_idx = _lrg_map.live_range_id(val); 284 OptoReg::Name val_reg = lrgs(val_idx).reg(); 285 286 // See if it happens to already be in the correct register! 287 // (either Phi's direct register, or the common case of the name 288 // never-clobbered original-def register) 289 if (register_contains_value(val, val_reg, n_regs, value)) { 290 blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd); 291 if( n->in(k) == regnd[val_reg] ) // Success! Quit trying 292 return blk_adjust; 293 } 294 295 // See if we can skip the copy by changing registers. Don't change from 296 // using a register to using the stack unless we know we can remove a 297 // copy-load. Otherwise we might end up making a pile of Intel cisc-spill 298 // ops reading from memory instead of just loading once and using the 299 // register. 365 // 366 // n will be replaced with the old value but n might have 367 // kills projections associated with it so remove them now so that 368 // yank_if_dead will be able to eliminate the copy once the uses 369 // have been transferred to the old[value]. 370 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 371 Node* use = n->fast_out(i); 372 if (use->is_Proj() && use->outcnt() == 0) { 373 // Kill projections have no users and one input 374 use->set_req(0, C->top()); 375 yank_if_dead(use, current_block, &value, ®nd); 376 --i; --imax; 377 } 378 } 379 _post_alloc++; 380 return true; 381 } 382 return false; 383 } 384 385 386 //------------------------------post_allocate_copy_removal--------------------- 387 // Post-Allocation peephole copy removal. We do this in 1 pass over the 388 // basic blocks. We maintain a mapping of registers to Nodes (an array of 389 // Nodes indexed by machine register or stack slot number). NULL means that a 390 // register is not mapped to any Node. We can (want to have!) have several 391 // registers map to the same Node. We walk forward over the instructions 392 // updating the mapping as we go. At merge points we force a NULL if we have 393 // to merge 2 different Nodes into the same register. Phi functions will give 394 // us a new Node if there is a proper value merging. Since the blocks are 395 // arranged in some RPO, we will visit all parent blocks before visiting any 396 // successor blocks (except at loops). 397 // 398 // If we find a Copy we look to see if the Copy's source register is a stack 399 // slot and that value has already been loaded into some machine register; if 400 // so we use machine register directly. This turns a Load into a reg-reg 401 // Move. We also look for reloads of identical constants. 402 // 403 // When we see a use from a reg-reg Copy, we will attempt to use the copy's 404 // source directly and make the copy go dead. | 246 blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd); 247 if (n->in(k) != copy) { 248 break; // Failed for some cutout? 249 } 250 x = copy; // Progress, try again 251 } 252 253 // Phis and 2-address instructions cannot change registers so easily - their 254 // outputs must match their input. 255 if( !can_change_regs ) 256 return blk_adjust; // Only check stupid copies! 257 258 // Loop backedges won't have a value-mapping yet 259 if( &value == NULL ) return blk_adjust; 260 261 // Skip through all copies to the _value_ being used. Do not change from 262 // int to pointer. This attempts to jump through a chain of copies, where 263 // intermediate copies might be illegal, i.e., value is stored down to stack 264 // then reloaded BUT survives in a register the whole way. 265 Node *val = skip_copies(n->in(k)); 266 if (val == x) return blk_adjust; // No progress? 267 268 int n_regs = RegMask::num_registers(val->ideal_reg()); 269 uint val_idx = _lrg_map.live_range_id(val); 270 OptoReg::Name val_reg = lrgs(val_idx).reg(); 271 272 // See if it happens to already be in the correct register! 273 // (either Phi's direct register, or the common case of the name 274 // never-clobbered original-def register) 275 if (register_contains_value(val, val_reg, n_regs, value)) { 276 blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd); 277 if( n->in(k) == regnd[val_reg] ) // Success! Quit trying 278 return blk_adjust; 279 } 280 281 // See if we can skip the copy by changing registers. Don't change from 282 // using a register to using the stack unless we know we can remove a 283 // copy-load. Otherwise we might end up making a pile of Intel cisc-spill 284 // ops reading from memory instead of just loading once and using the 285 // register. 351 // 352 // n will be replaced with the old value but n might have 353 // kills projections associated with it so remove them now so that 354 // yank_if_dead will be able to eliminate the copy once the uses 355 // have been transferred to the old[value]. 356 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 357 Node* use = n->fast_out(i); 358 if (use->is_Proj() && use->outcnt() == 0) { 359 // Kill projections have no users and one input 360 use->set_req(0, C->top()); 361 yank_if_dead(use, current_block, &value, ®nd); 362 --i; --imax; 363 } 364 } 365 _post_alloc++; 366 return true; 367 } 368 return false; 369 } 370 371 // The algorithms works as follows: 372 // We traverse the block top to bottom. possibly_merge_multidef() is invoked for every input edge k 373 // of the instruction n. We check to see if the input is a multidef lrg. If it is, we record the fact that we've 374 // seen a definition (coming as an input) and add that fact to the reg2defuse array. The array maps registers to their 375 // current reaching definitions (we track only multidefs though). With each definition we also associate the first 376 // instruction we saw use it. If we encounter the situation when we observe an def (an input) that is a part of the 377 // same lrg but is different from the previous seen def we merge the two with a MachMerge node and substitute 378 // all the uses that we've seen so far to use the merge. After that we keep replacing the new defs in the same lrg 379 // as they get encountered with the merge node and keep adding these defs to the merge inputs. 380 void PhaseChaitin::merge_multidefs() { 381 Compile::TracePhase tp("mergeMultidefs", &timers[_t_mergeMultidefs]); 382 ResourceMark rm; 383 // Keep track of the defs seen in registers and collect their uses in the block. 384 RegToDefUseMap reg2defuse(_max_reg, _max_reg, RegDefUse()); 385 for (uint i = 0; i < _cfg.number_of_blocks(); i++) { 386 Block* block = _cfg.get_block(i); 387 for (uint j = 1; j < block->number_of_nodes(); j++) { 388 Node* n = block->get_node(j); 389 if (n->is_Phi()) continue; 390 for (uint k = 1; k < n->req(); k++) { 391 j += possibly_merge_multidef(n, k, block, reg2defuse); 392 } 393 } 394 // Clear reg->def->use tracking for the next block 395 for (int j = 0; j < reg2defuse.length(); j++) { 396 reg2defuse.at(j).clear(); 397 } 398 } 399 } 400 401 int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) { 402 int blk_adjust = 0; 403 404 uint lrg = _lrg_map.live_range_id(n->in(k)); 405 if (lrg > 0 && lrgs(lrg).is_multidef()) { 406 OptoReg::Name reg = lrgs(lrg).reg(); 407 408 Node* def = reg2defuse.at(reg).def(); 409 if (def != NULL && lrg == _lrg_map.live_range_id(def) && def != n->in(k)) { 410 // Same lrg but different node, we have to merge. 411 MachMergeNode* merge; 412 if (def->is_MachMerge()) { // is it already a merge? 413 merge = def->as_MachMerge(); 414 } else { 415 merge = new MachMergeNode(def); 416 417 // Insert the merge node into the block before the first use. 418 uint use_index = block->find_node(reg2defuse.at(reg).first_use()); 419 block->insert_node(merge, use_index++); 420 421 // Let the allocator know about the new node, use the same lrg 422 _lrg_map.extend(merge->_idx, lrg); 423 blk_adjust++; 424 425 // Fixup all the uses (there is at least one) that happened between the first 426 // use and before the current one. 427 for (; use_index < block->number_of_nodes(); use_index++) { 428 Node* use = block->get_node(use_index); 429 if (use == n) { 430 break; 431 } 432 use->replace_edge(def, merge); 433 } 434 } 435 if (merge->find_edge(n->in(k)) == -1) { 436 merge->add_req(n->in(k)); 437 } 438 n->set_req(k, merge); 439 } 440 441 // update the uses 442 reg2defuse.at(reg).update(n->in(k), n); 443 } 444 445 if (k == n->req() - 1) { 446 // We just updated the last edge, now null out the value produced by the instruction itself, 447 // since we're only interested in defs implicitly defined by the uses. We are actually interested 448 // in tracking only redefinitions of the multidef lrgs in the same register. For that matter it's enough 449 // to track changes in the base register only and ignore other effects of multi-register lrgs 450 // and fat projections. It is also ok to ignore defs coming from singledefs. After an implicit 451 // overwrite by one of those our register is guaranteed to be used by another lrg and we won't 452 // attempt to merge it. 453 lrg = _lrg_map.live_range_id(n); 454 if (lrg > 0 && lrgs(lrg).is_multidef()) { 455 OptoReg::Name reg = lrgs(lrg).reg(); 456 reg2defuse.at(reg).clear(); 457 } 458 } 459 return blk_adjust; 460 } 461 462 463 //------------------------------post_allocate_copy_removal--------------------- 464 // Post-Allocation peephole copy removal. We do this in 1 pass over the 465 // basic blocks. We maintain a mapping of registers to Nodes (an array of 466 // Nodes indexed by machine register or stack slot number). NULL means that a 467 // register is not mapped to any Node. We can (want to have!) have several 468 // registers map to the same Node. We walk forward over the instructions 469 // updating the mapping as we go. At merge points we force a NULL if we have 470 // to merge 2 different Nodes into the same register. Phi functions will give 471 // us a new Node if there is a proper value merging. Since the blocks are 472 // arranged in some RPO, we will visit all parent blocks before visiting any 473 // successor blocks (except at loops). 474 // 475 // If we find a Copy we look to see if the Copy's source register is a stack 476 // slot and that value has already been loaded into some machine register; if 477 // so we use machine register directly. This turns a Load into a reg-reg 478 // Move. We also look for reloads of identical constants. 479 // 480 // When we see a use from a reg-reg Copy, we will attempt to use the copy's 481 // source directly and make the copy go dead. |