1 /* 2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "memory/allocation.inline.hpp" 27 #include "opto/addnode.hpp" 28 #include "opto/callnode.hpp" 29 #include "opto/connode.hpp" 30 #include "opto/idealGraphPrinter.hpp" 31 #include "opto/matcher.hpp" 32 #include "opto/memnode.hpp" 33 #include "opto/opcodes.hpp" 34 #include "opto/regmask.hpp" 35 #include "opto/rootnode.hpp" 36 #include "opto/runtime.hpp" 37 #include "opto/type.hpp" 38 #include "runtime/atomic.hpp" 39 #include "runtime/hpi.hpp" 40 #include "runtime/os.hpp" 41 #ifdef TARGET_ARCH_MODEL_x86_32 42 # include "adfiles/ad_x86_32.hpp" 43 #endif 44 #ifdef TARGET_ARCH_MODEL_x86_64 45 # include "adfiles/ad_x86_64.hpp" 46 #endif 47 #ifdef TARGET_ARCH_MODEL_sparc 48 # include "adfiles/ad_sparc.hpp" 49 #endif 50 #ifdef TARGET_ARCH_MODEL_zero 51 # include "adfiles/ad_zero.hpp" 52 #endif 53 54 OptoReg::Name OptoReg::c_frame_pointer; 55 56 57 58 const int Matcher::base2reg[Type::lastype] = { 59 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, 60 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ 61 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ 62 0, 0/*abio*/, 63 Op_RegP /* Return address */, 0, /* the memories */ 64 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, 65 0 /*bottom*/ 66 }; 67 68 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf]; 69 RegMask Matcher::mreg2regmask[_last_Mach_Reg]; 70 RegMask Matcher::STACK_ONLY_mask; 71 RegMask Matcher::c_frame_ptr_mask; 72 const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE; 73 const uint Matcher::_end_rematerialize = _END_REMATERIALIZE; 74 75 //---------------------------Matcher------------------------------------------- 76 Matcher::Matcher( Node_List &proj_list ) : 77 PhaseTransform( Phase::Ins_Select ), 78 #ifdef ASSERT 79 _old2new_map(C->comp_arena()), 80 _new2old_map(C->comp_arena()), 81 #endif 82 _shared_nodes(C->comp_arena()), 83 _reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp), 84 _swallowed(swallowed), 85 _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE), 86 _end_inst_chain_rule(_END_INST_CHAIN_RULE), 87 _must_clone(must_clone), _proj_list(proj_list), 88 _register_save_policy(register_save_policy), 89 _c_reg_save_policy(c_reg_save_policy), 90 _register_save_type(register_save_type), 91 _ruleName(ruleName), 92 _allocation_started(false), 93 _states_arena(Chunk::medium_size), 94 _visited(&_states_arena), 95 _shared(&_states_arena), 96 _dontcare(&_states_arena) { 97 C->set_matcher(this); 98 99 idealreg2spillmask [Op_RegI] = NULL; 100 idealreg2spillmask [Op_RegN] = NULL; 101 idealreg2spillmask [Op_RegL] = NULL; 102 idealreg2spillmask [Op_RegF] = NULL; 103 idealreg2spillmask [Op_RegD] = NULL; 104 idealreg2spillmask [Op_RegP] = NULL; 105 106 idealreg2debugmask [Op_RegI] = NULL; 107 idealreg2debugmask [Op_RegN] = NULL; 108 idealreg2debugmask [Op_RegL] = NULL; 109 idealreg2debugmask [Op_RegF] = NULL; 110 idealreg2debugmask [Op_RegD] = NULL; 111 idealreg2debugmask [Op_RegP] = NULL; 112 113 idealreg2mhdebugmask[Op_RegI] = NULL; 114 idealreg2mhdebugmask[Op_RegN] = NULL; 115 idealreg2mhdebugmask[Op_RegL] = NULL; 116 idealreg2mhdebugmask[Op_RegF] = NULL; 117 idealreg2mhdebugmask[Op_RegD] = NULL; 118 idealreg2mhdebugmask[Op_RegP] = NULL; 119 120 debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node 121 } 122 123 //------------------------------warp_incoming_stk_arg------------------------ 124 // This warps a VMReg into an OptoReg::Name 125 OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) { 126 OptoReg::Name warped; 127 if( reg->is_stack() ) { // Stack slot argument? 128 warped = OptoReg::add(_old_SP, reg->reg2stack() ); 129 warped = OptoReg::add(warped, C->out_preserve_stack_slots()); 130 if( warped >= _in_arg_limit ) 131 _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen 132 if (!RegMask::can_represent(warped)) { 133 // the compiler cannot represent this method's calling sequence 134 C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence"); 135 return OptoReg::Bad; 136 } 137 return warped; 138 } 139 return OptoReg::as_OptoReg(reg); 140 } 141 142 //---------------------------compute_old_SP------------------------------------ 143 OptoReg::Name Compile::compute_old_SP() { 144 int fixed = fixed_slots(); 145 int preserve = in_preserve_stack_slots(); 146 return OptoReg::stack2reg(round_to(fixed + preserve, Matcher::stack_alignment_in_slots())); 147 } 148 149 150 151 #ifdef ASSERT 152 void Matcher::verify_new_nodes_only(Node* xroot) { 153 // Make sure that the new graph only references new nodes 154 ResourceMark rm; 155 Unique_Node_List worklist; 156 VectorSet visited(Thread::current()->resource_area()); 157 worklist.push(xroot); 158 while (worklist.size() > 0) { 159 Node* n = worklist.pop(); 160 visited <<= n->_idx; 161 assert(C->node_arena()->contains(n), "dead node"); 162 for (uint j = 0; j < n->req(); j++) { 163 Node* in = n->in(j); 164 if (in != NULL) { 165 assert(C->node_arena()->contains(in), "dead node"); 166 if (!visited.test(in->_idx)) { 167 worklist.push(in); 168 } 169 } 170 } 171 } 172 } 173 #endif 174 175 176 //---------------------------match--------------------------------------------- 177 void Matcher::match( ) { 178 if( MaxLabelRootDepth < 100 ) { // Too small? 179 assert(false, "invalid MaxLabelRootDepth, increase it to 100 minimum"); 180 MaxLabelRootDepth = 100; 181 } 182 // One-time initialization of some register masks. 183 init_spill_mask( C->root()->in(1) ); 184 _return_addr_mask = return_addr(); 185 #ifdef _LP64 186 // Pointers take 2 slots in 64-bit land 187 _return_addr_mask.Insert(OptoReg::add(return_addr(),1)); 188 #endif 189 190 // Map a Java-signature return type into return register-value 191 // machine registers for 0, 1 and 2 returned values. 192 const TypeTuple *range = C->tf()->range(); 193 if( range->cnt() > TypeFunc::Parms ) { // If not a void function 194 // Get ideal-register return type 195 int ireg = base2reg[range->field_at(TypeFunc::Parms)->base()]; 196 // Get machine return register 197 uint sop = C->start()->Opcode(); 198 OptoRegPair regs = return_value(ireg, false); 199 200 // And mask for same 201 _return_value_mask = RegMask(regs.first()); 202 if( OptoReg::is_valid(regs.second()) ) 203 _return_value_mask.Insert(regs.second()); 204 } 205 206 // --------------- 207 // Frame Layout 208 209 // Need the method signature to determine the incoming argument types, 210 // because the types determine which registers the incoming arguments are 211 // in, and this affects the matched code. 212 const TypeTuple *domain = C->tf()->domain(); 213 uint argcnt = domain->cnt() - TypeFunc::Parms; 214 BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt ); 215 VMRegPair *vm_parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt ); 216 _parm_regs = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt ); 217 _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt ); 218 uint i; 219 for( i = 0; i<argcnt; i++ ) { 220 sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type(); 221 } 222 223 // Pass array of ideal registers and length to USER code (from the AD file) 224 // that will convert this to an array of register numbers. 225 const StartNode *start = C->start(); 226 start->calling_convention( sig_bt, vm_parm_regs, argcnt ); 227 #ifdef ASSERT 228 // Sanity check users' calling convention. Real handy while trying to 229 // get the initial port correct. 230 { for (uint i = 0; i<argcnt; i++) { 231 if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) { 232 assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" ); 233 _parm_regs[i].set_bad(); 234 continue; 235 } 236 VMReg parm_reg = vm_parm_regs[i].first(); 237 assert(parm_reg->is_valid(), "invalid arg?"); 238 if (parm_reg->is_reg()) { 239 OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg); 240 assert(can_be_java_arg(opto_parm_reg) || 241 C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) || 242 opto_parm_reg == inline_cache_reg(), 243 "parameters in register must be preserved by runtime stubs"); 244 } 245 for (uint j = 0; j < i; j++) { 246 assert(parm_reg != vm_parm_regs[j].first(), 247 "calling conv. must produce distinct regs"); 248 } 249 } 250 } 251 #endif 252 253 // Do some initial frame layout. 254 255 // Compute the old incoming SP (may be called FP) as 256 // OptoReg::stack0() + locks + in_preserve_stack_slots + pad2. 257 _old_SP = C->compute_old_SP(); 258 assert( is_even(_old_SP), "must be even" ); 259 260 // Compute highest incoming stack argument as 261 // _old_SP + out_preserve_stack_slots + incoming argument size. 262 _in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots()); 263 assert( is_even(_in_arg_limit), "out_preserve must be even" ); 264 for( i = 0; i < argcnt; i++ ) { 265 // Permit args to have no register 266 _calling_convention_mask[i].Clear(); 267 if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) { 268 continue; 269 } 270 // calling_convention returns stack arguments as a count of 271 // slots beyond OptoReg::stack0()/VMRegImpl::stack0. We need to convert this to 272 // the allocators point of view, taking into account all the 273 // preserve area, locks & pad2. 274 275 OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first()); 276 if( OptoReg::is_valid(reg1)) 277 _calling_convention_mask[i].Insert(reg1); 278 279 OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second()); 280 if( OptoReg::is_valid(reg2)) 281 _calling_convention_mask[i].Insert(reg2); 282 283 // Saved biased stack-slot register number 284 _parm_regs[i].set_pair(reg2, reg1); 285 } 286 287 // Finally, make sure the incoming arguments take up an even number of 288 // words, in case the arguments or locals need to contain doubleword stack 289 // slots. The rest of the system assumes that stack slot pairs (in 290 // particular, in the spill area) which look aligned will in fact be 291 // aligned relative to the stack pointer in the target machine. Double 292 // stack slots will always be allocated aligned. 293 _new_SP = OptoReg::Name(round_to(_in_arg_limit, RegMask::SlotsPerLong)); 294 295 // Compute highest outgoing stack argument as 296 // _new_SP + out_preserve_stack_slots + max(outgoing argument size). 297 _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); 298 assert( is_even(_out_arg_limit), "out_preserve must be even" ); 299 300 if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) { 301 // the compiler cannot represent this method's calling sequence 302 C->record_method_not_compilable("must be able to represent all call arguments in reg mask"); 303 } 304 305 if (C->failing()) return; // bailed out on incoming arg failure 306 307 // --------------- 308 // Collect roots of matcher trees. Every node for which 309 // _shared[_idx] is cleared is guaranteed to not be shared, and thus 310 // can be a valid interior of some tree. 311 find_shared( C->root() ); 312 find_shared( C->top() ); 313 314 C->print_method("Before Matching"); 315 316 // Create new ideal node ConP #NULL even if it does exist in old space 317 // to avoid false sharing if the corresponding mach node is not used. 318 // The corresponding mach node is only used in rare cases for derived 319 // pointers. 320 Node* new_ideal_null = ConNode::make(C, TypePtr::NULL_PTR); 321 322 // Swap out to old-space; emptying new-space 323 Arena *old = C->node_arena()->move_contents(C->old_arena()); 324 325 // Save debug and profile information for nodes in old space: 326 _old_node_note_array = C->node_note_array(); 327 if (_old_node_note_array != NULL) { 328 C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*> 329 (C->comp_arena(), _old_node_note_array->length(), 330 0, NULL)); 331 } 332 333 // Pre-size the new_node table to avoid the need for range checks. 334 grow_new_node_array(C->unique()); 335 336 // Reset node counter so MachNodes start with _idx at 0 337 int nodes = C->unique(); // save value 338 C->set_unique(0); 339 340 // Recursively match trees from old space into new space. 341 // Correct leaves of new-space Nodes; they point to old-space. 342 _visited.Clear(); // Clear visit bits for xform call 343 C->set_cached_top_node(xform( C->top(), nodes )); 344 if (!C->failing()) { 345 Node* xroot = xform( C->root(), 1 ); 346 if (xroot == NULL) { 347 Matcher::soft_match_failure(); // recursive matching process failed 348 C->record_method_not_compilable("instruction match failed"); 349 } else { 350 // During matching shared constants were attached to C->root() 351 // because xroot wasn't available yet, so transfer the uses to 352 // the xroot. 353 for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) { 354 Node* n = C->root()->fast_out(j); 355 if (C->node_arena()->contains(n)) { 356 assert(n->in(0) == C->root(), "should be control user"); 357 n->set_req(0, xroot); 358 --j; 359 --jmax; 360 } 361 } 362 363 // Generate new mach node for ConP #NULL 364 assert(new_ideal_null != NULL, "sanity"); 365 _mach_null = match_tree(new_ideal_null); 366 // Don't set control, it will confuse GCM since there are no uses. 367 // The control will be set when this node is used first time 368 // in find_base_for_derived(). 369 assert(_mach_null != NULL, ""); 370 371 C->set_root(xroot->is_Root() ? xroot->as_Root() : NULL); 372 373 #ifdef ASSERT 374 verify_new_nodes_only(xroot); 375 #endif 376 } 377 } 378 if (C->top() == NULL || C->root() == NULL) { 379 C->record_method_not_compilable("graph lost"); // %%% cannot happen? 380 } 381 if (C->failing()) { 382 // delete old; 383 old->destruct_contents(); 384 return; 385 } 386 assert( C->top(), "" ); 387 assert( C->root(), "" ); 388 validate_null_checks(); 389 390 // Now smoke old-space 391 NOT_DEBUG( old->destruct_contents() ); 392 393 // ------------------------ 394 // Set up save-on-entry registers 395 Fixup_Save_On_Entry( ); 396 } 397 398 399 //------------------------------Fixup_Save_On_Entry---------------------------- 400 // The stated purpose of this routine is to take care of save-on-entry 401 // registers. However, the overall goal of the Match phase is to convert into 402 // machine-specific instructions which have RegMasks to guide allocation. 403 // So what this procedure really does is put a valid RegMask on each input 404 // to the machine-specific variations of all Return, TailCall and Halt 405 // instructions. It also adds edgs to define the save-on-entry values (and of 406 // course gives them a mask). 407 408 static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { 409 RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size ); 410 // Do all the pre-defined register masks 411 rms[TypeFunc::Control ] = RegMask::Empty; 412 rms[TypeFunc::I_O ] = RegMask::Empty; 413 rms[TypeFunc::Memory ] = RegMask::Empty; 414 rms[TypeFunc::ReturnAdr] = ret_adr; 415 rms[TypeFunc::FramePtr ] = fp; 416 return rms; 417 } 418 419 //---------------------------init_first_stack_mask----------------------------- 420 // Create the initial stack mask used by values spilling to the stack. 421 // Disallow any debug info in outgoing argument areas by setting the 422 // initial mask accordingly. 423 void Matcher::init_first_stack_mask() { 424 425 // Allocate storage for spill masks as masks for the appropriate load type. 426 RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * 3*6); 427 428 idealreg2spillmask [Op_RegN] = &rms[0]; 429 idealreg2spillmask [Op_RegI] = &rms[1]; 430 idealreg2spillmask [Op_RegL] = &rms[2]; 431 idealreg2spillmask [Op_RegF] = &rms[3]; 432 idealreg2spillmask [Op_RegD] = &rms[4]; 433 idealreg2spillmask [Op_RegP] = &rms[5]; 434 435 idealreg2debugmask [Op_RegN] = &rms[6]; 436 idealreg2debugmask [Op_RegI] = &rms[7]; 437 idealreg2debugmask [Op_RegL] = &rms[8]; 438 idealreg2debugmask [Op_RegF] = &rms[9]; 439 idealreg2debugmask [Op_RegD] = &rms[10]; 440 idealreg2debugmask [Op_RegP] = &rms[11]; 441 442 idealreg2mhdebugmask[Op_RegN] = &rms[12]; 443 idealreg2mhdebugmask[Op_RegI] = &rms[13]; 444 idealreg2mhdebugmask[Op_RegL] = &rms[14]; 445 idealreg2mhdebugmask[Op_RegF] = &rms[15]; 446 idealreg2mhdebugmask[Op_RegD] = &rms[16]; 447 idealreg2mhdebugmask[Op_RegP] = &rms[17]; 448 449 OptoReg::Name i; 450 451 // At first, start with the empty mask 452 C->FIRST_STACK_mask().Clear(); 453 454 // Add in the incoming argument area 455 OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots()); 456 for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1)) 457 C->FIRST_STACK_mask().Insert(i); 458 459 // Add in all bits past the outgoing argument area 460 guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)), 461 "must be able to represent all call arguments in reg mask"); 462 init = _out_arg_limit; 463 for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) 464 C->FIRST_STACK_mask().Insert(i); 465 466 // Finally, set the "infinite stack" bit. 467 C->FIRST_STACK_mask().set_AllStack(); 468 469 // Make spill masks. Registers for their class, plus FIRST_STACK_mask. 470 #ifdef _LP64 471 *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN]; 472 idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask()); 473 #endif 474 *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI]; 475 idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask()); 476 *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL]; 477 idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask()); 478 *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF]; 479 idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask()); 480 *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD]; 481 idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask()); 482 *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; 483 idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); 484 485 if (UseFPUForSpilling) { 486 // This mask logic assumes that the spill operations are 487 // symmetric and that the registers involved are the same size. 488 // On sparc for instance we may have to use 64 bit moves will 489 // kill 2 registers when used with F0-F31. 490 idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); 491 idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); 492 #ifdef _LP64 493 idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); 494 idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); 495 idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); 496 idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); 497 #else 498 idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); 499 #endif 500 } 501 502 // Make up debug masks. Any spill slot plus callee-save registers. 503 // Caller-save registers are assumed to be trashable by the various 504 // inline-cache fixup routines. 505 *idealreg2debugmask [Op_RegN]= *idealreg2spillmask[Op_RegN]; 506 *idealreg2debugmask [Op_RegI]= *idealreg2spillmask[Op_RegI]; 507 *idealreg2debugmask [Op_RegL]= *idealreg2spillmask[Op_RegL]; 508 *idealreg2debugmask [Op_RegF]= *idealreg2spillmask[Op_RegF]; 509 *idealreg2debugmask [Op_RegD]= *idealreg2spillmask[Op_RegD]; 510 *idealreg2debugmask [Op_RegP]= *idealreg2spillmask[Op_RegP]; 511 512 *idealreg2mhdebugmask[Op_RegN]= *idealreg2spillmask[Op_RegN]; 513 *idealreg2mhdebugmask[Op_RegI]= *idealreg2spillmask[Op_RegI]; 514 *idealreg2mhdebugmask[Op_RegL]= *idealreg2spillmask[Op_RegL]; 515 *idealreg2mhdebugmask[Op_RegF]= *idealreg2spillmask[Op_RegF]; 516 *idealreg2mhdebugmask[Op_RegD]= *idealreg2spillmask[Op_RegD]; 517 *idealreg2mhdebugmask[Op_RegP]= *idealreg2spillmask[Op_RegP]; 518 519 // Prevent stub compilations from attempting to reference 520 // callee-saved registers from debug info 521 bool exclude_soe = !Compile::current()->is_method_compilation(); 522 523 for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) { 524 // registers the caller has to save do not work 525 if( _register_save_policy[i] == 'C' || 526 _register_save_policy[i] == 'A' || 527 (_register_save_policy[i] == 'E' && exclude_soe) ) { 528 idealreg2debugmask [Op_RegN]->Remove(i); 529 idealreg2debugmask [Op_RegI]->Remove(i); // Exclude save-on-call 530 idealreg2debugmask [Op_RegL]->Remove(i); // registers from debug 531 idealreg2debugmask [Op_RegF]->Remove(i); // masks 532 idealreg2debugmask [Op_RegD]->Remove(i); 533 idealreg2debugmask [Op_RegP]->Remove(i); 534 535 idealreg2mhdebugmask[Op_RegN]->Remove(i); 536 idealreg2mhdebugmask[Op_RegI]->Remove(i); 537 idealreg2mhdebugmask[Op_RegL]->Remove(i); 538 idealreg2mhdebugmask[Op_RegF]->Remove(i); 539 idealreg2mhdebugmask[Op_RegD]->Remove(i); 540 idealreg2mhdebugmask[Op_RegP]->Remove(i); 541 } 542 } 543 544 // Subtract the register we use to save the SP for MethodHandle 545 // invokes to from the debug mask. 546 const RegMask save_mask = method_handle_invoke_SP_save_mask(); 547 idealreg2mhdebugmask[Op_RegN]->SUBTRACT(save_mask); 548 idealreg2mhdebugmask[Op_RegI]->SUBTRACT(save_mask); 549 idealreg2mhdebugmask[Op_RegL]->SUBTRACT(save_mask); 550 idealreg2mhdebugmask[Op_RegF]->SUBTRACT(save_mask); 551 idealreg2mhdebugmask[Op_RegD]->SUBTRACT(save_mask); 552 idealreg2mhdebugmask[Op_RegP]->SUBTRACT(save_mask); 553 } 554 555 //---------------------------is_save_on_entry---------------------------------- 556 bool Matcher::is_save_on_entry( int reg ) { 557 return 558 _register_save_policy[reg] == 'E' || 559 _register_save_policy[reg] == 'A' || // Save-on-entry register? 560 // Also save argument registers in the trampolining stubs 561 (C->save_argument_registers() && is_spillable_arg(reg)); 562 } 563 564 //---------------------------Fixup_Save_On_Entry------------------------------- 565 void Matcher::Fixup_Save_On_Entry( ) { 566 init_first_stack_mask(); 567 568 Node *root = C->root(); // Short name for root 569 // Count number of save-on-entry registers. 570 uint soe_cnt = number_of_saved_registers(); 571 uint i; 572 573 // Find the procedure Start Node 574 StartNode *start = C->start(); 575 assert( start, "Expect a start node" ); 576 577 // Save argument registers in the trampolining stubs 578 if( C->save_argument_registers() ) 579 for( i = 0; i < _last_Mach_Reg; i++ ) 580 if( is_spillable_arg(i) ) 581 soe_cnt++; 582 583 // Input RegMask array shared by all Returns. 584 // The type for doubles and longs has a count of 2, but 585 // there is only 1 returned value 586 uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1); 587 RegMask *ret_rms = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); 588 // Returns have 0 or 1 returned values depending on call signature. 589 // Return register is specified by return_value in the AD file. 590 if (ret_edge_cnt > TypeFunc::Parms) 591 ret_rms[TypeFunc::Parms+0] = _return_value_mask; 592 593 // Input RegMask array shared by all Rethrows. 594 uint reth_edge_cnt = TypeFunc::Parms+1; 595 RegMask *reth_rms = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); 596 // Rethrow takes exception oop only, but in the argument 0 slot. 597 reth_rms[TypeFunc::Parms] = mreg2regmask[find_receiver(false)]; 598 #ifdef _LP64 599 // Need two slots for ptrs in 64-bit land 600 reth_rms[TypeFunc::Parms].Insert(OptoReg::add(OptoReg::Name(find_receiver(false)),1)); 601 #endif 602 603 // Input RegMask array shared by all TailCalls 604 uint tail_call_edge_cnt = TypeFunc::Parms+2; 605 RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); 606 607 // Input RegMask array shared by all TailJumps 608 uint tail_jump_edge_cnt = TypeFunc::Parms+2; 609 RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); 610 611 // TailCalls have 2 returned values (target & moop), whose masks come 612 // from the usual MachNode/MachOper mechanism. Find a sample 613 // TailCall to extract these masks and put the correct masks into 614 // the tail_call_rms array. 615 for( i=1; i < root->req(); i++ ) { 616 MachReturnNode *m = root->in(i)->as_MachReturn(); 617 if( m->ideal_Opcode() == Op_TailCall ) { 618 tail_call_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0); 619 tail_call_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1); 620 break; 621 } 622 } 623 624 // TailJumps have 2 returned values (target & ex_oop), whose masks come 625 // from the usual MachNode/MachOper mechanism. Find a sample 626 // TailJump to extract these masks and put the correct masks into 627 // the tail_jump_rms array. 628 for( i=1; i < root->req(); i++ ) { 629 MachReturnNode *m = root->in(i)->as_MachReturn(); 630 if( m->ideal_Opcode() == Op_TailJump ) { 631 tail_jump_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0); 632 tail_jump_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1); 633 break; 634 } 635 } 636 637 // Input RegMask array shared by all Halts 638 uint halt_edge_cnt = TypeFunc::Parms; 639 RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); 640 641 // Capture the return input masks into each exit flavor 642 for( i=1; i < root->req(); i++ ) { 643 MachReturnNode *exit = root->in(i)->as_MachReturn(); 644 switch( exit->ideal_Opcode() ) { 645 case Op_Return : exit->_in_rms = ret_rms; break; 646 case Op_Rethrow : exit->_in_rms = reth_rms; break; 647 case Op_TailCall : exit->_in_rms = tail_call_rms; break; 648 case Op_TailJump : exit->_in_rms = tail_jump_rms; break; 649 case Op_Halt : exit->_in_rms = halt_rms; break; 650 default : ShouldNotReachHere(); 651 } 652 } 653 654 // Next unused projection number from Start. 655 int proj_cnt = C->tf()->domain()->cnt(); 656 657 // Do all the save-on-entry registers. Make projections from Start for 658 // them, and give them a use at the exit points. To the allocator, they 659 // look like incoming register arguments. 660 for( i = 0; i < _last_Mach_Reg; i++ ) { 661 if( is_save_on_entry(i) ) { 662 663 // Add the save-on-entry to the mask array 664 ret_rms [ ret_edge_cnt] = mreg2regmask[i]; 665 reth_rms [ reth_edge_cnt] = mreg2regmask[i]; 666 tail_call_rms[tail_call_edge_cnt] = mreg2regmask[i]; 667 tail_jump_rms[tail_jump_edge_cnt] = mreg2regmask[i]; 668 // Halts need the SOE registers, but only in the stack as debug info. 669 // A just-prior uncommon-trap or deoptimization will use the SOE regs. 670 halt_rms [ halt_edge_cnt] = *idealreg2spillmask[_register_save_type[i]]; 671 672 Node *mproj; 673 674 // Is this a RegF low half of a RegD? Double up 2 adjacent RegF's 675 // into a single RegD. 676 if( (i&1) == 0 && 677 _register_save_type[i ] == Op_RegF && 678 _register_save_type[i+1] == Op_RegF && 679 is_save_on_entry(i+1) ) { 680 // Add other bit for double 681 ret_rms [ ret_edge_cnt].Insert(OptoReg::Name(i+1)); 682 reth_rms [ reth_edge_cnt].Insert(OptoReg::Name(i+1)); 683 tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1)); 684 tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1)); 685 halt_rms [ halt_edge_cnt].Insert(OptoReg::Name(i+1)); 686 mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD ); 687 proj_cnt += 2; // Skip 2 for doubles 688 } 689 else if( (i&1) == 1 && // Else check for high half of double 690 _register_save_type[i-1] == Op_RegF && 691 _register_save_type[i ] == Op_RegF && 692 is_save_on_entry(i-1) ) { 693 ret_rms [ ret_edge_cnt] = RegMask::Empty; 694 reth_rms [ reth_edge_cnt] = RegMask::Empty; 695 tail_call_rms[tail_call_edge_cnt] = RegMask::Empty; 696 tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty; 697 halt_rms [ halt_edge_cnt] = RegMask::Empty; 698 mproj = C->top(); 699 } 700 // Is this a RegI low half of a RegL? Double up 2 adjacent RegI's 701 // into a single RegL. 702 else if( (i&1) == 0 && 703 _register_save_type[i ] == Op_RegI && 704 _register_save_type[i+1] == Op_RegI && 705 is_save_on_entry(i+1) ) { 706 // Add other bit for long 707 ret_rms [ ret_edge_cnt].Insert(OptoReg::Name(i+1)); 708 reth_rms [ reth_edge_cnt].Insert(OptoReg::Name(i+1)); 709 tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1)); 710 tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1)); 711 halt_rms [ halt_edge_cnt].Insert(OptoReg::Name(i+1)); 712 mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL ); 713 proj_cnt += 2; // Skip 2 for longs 714 } 715 else if( (i&1) == 1 && // Else check for high half of long 716 _register_save_type[i-1] == Op_RegI && 717 _register_save_type[i ] == Op_RegI && 718 is_save_on_entry(i-1) ) { 719 ret_rms [ ret_edge_cnt] = RegMask::Empty; 720 reth_rms [ reth_edge_cnt] = RegMask::Empty; 721 tail_call_rms[tail_call_edge_cnt] = RegMask::Empty; 722 tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty; 723 halt_rms [ halt_edge_cnt] = RegMask::Empty; 724 mproj = C->top(); 725 } else { 726 // Make a projection for it off the Start 727 mproj = new (C, 1) MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] ); 728 } 729 730 ret_edge_cnt ++; 731 reth_edge_cnt ++; 732 tail_call_edge_cnt ++; 733 tail_jump_edge_cnt ++; 734 halt_edge_cnt ++; 735 736 // Add a use of the SOE register to all exit paths 737 for( uint j=1; j < root->req(); j++ ) 738 root->in(j)->add_req(mproj); 739 } // End of if a save-on-entry register 740 } // End of for all machine registers 741 } 742 743 //------------------------------init_spill_mask-------------------------------- 744 void Matcher::init_spill_mask( Node *ret ) { 745 if( idealreg2regmask[Op_RegI] ) return; // One time only init 746 747 OptoReg::c_frame_pointer = c_frame_pointer(); 748 c_frame_ptr_mask = c_frame_pointer(); 749 #ifdef _LP64 750 // pointers are twice as big 751 c_frame_ptr_mask.Insert(OptoReg::add(c_frame_pointer(),1)); 752 #endif 753 754 // Start at OptoReg::stack0() 755 STACK_ONLY_mask.Clear(); 756 OptoReg::Name init = OptoReg::stack2reg(0); 757 // STACK_ONLY_mask is all stack bits 758 OptoReg::Name i; 759 for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) 760 STACK_ONLY_mask.Insert(i); 761 // Also set the "infinite stack" bit. 762 STACK_ONLY_mask.set_AllStack(); 763 764 // Copy the register names over into the shared world 765 for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) { 766 // SharedInfo::regName[i] = regName[i]; 767 // Handy RegMasks per machine register 768 mreg2regmask[i].Insert(i); 769 } 770 771 // Grab the Frame Pointer 772 Node *fp = ret->in(TypeFunc::FramePtr); 773 Node *mem = ret->in(TypeFunc::Memory); 774 const TypePtr* atp = TypePtr::BOTTOM; 775 // Share frame pointer while making spill ops 776 set_shared(fp); 777 778 // Compute generic short-offset Loads 779 #ifdef _LP64 780 MachNode *spillCP = match_tree(new (C, 3) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM)); 781 #endif 782 MachNode *spillI = match_tree(new (C, 3) LoadINode(NULL,mem,fp,atp)); 783 MachNode *spillL = match_tree(new (C, 3) LoadLNode(NULL,mem,fp,atp)); 784 MachNode *spillF = match_tree(new (C, 3) LoadFNode(NULL,mem,fp,atp)); 785 MachNode *spillD = match_tree(new (C, 3) LoadDNode(NULL,mem,fp,atp)); 786 MachNode *spillP = match_tree(new (C, 3) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM)); 787 assert(spillI != NULL && spillL != NULL && spillF != NULL && 788 spillD != NULL && spillP != NULL, ""); 789 790 // Get the ADLC notion of the right regmask, for each basic type. 791 #ifdef _LP64 792 idealreg2regmask[Op_RegN] = &spillCP->out_RegMask(); 793 #endif 794 idealreg2regmask[Op_RegI] = &spillI->out_RegMask(); 795 idealreg2regmask[Op_RegL] = &spillL->out_RegMask(); 796 idealreg2regmask[Op_RegF] = &spillF->out_RegMask(); 797 idealreg2regmask[Op_RegD] = &spillD->out_RegMask(); 798 idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); 799 } 800 801 #ifdef ASSERT 802 static void match_alias_type(Compile* C, Node* n, Node* m) { 803 if (!VerifyAliases) return; // do not go looking for trouble by default 804 const TypePtr* nat = n->adr_type(); 805 const TypePtr* mat = m->adr_type(); 806 int nidx = C->get_alias_index(nat); 807 int midx = C->get_alias_index(mat); 808 // Detune the assert for cases like (AndI 0xFF (LoadB p)). 809 if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) { 810 for (uint i = 1; i < n->req(); i++) { 811 Node* n1 = n->in(i); 812 const TypePtr* n1at = n1->adr_type(); 813 if (n1at != NULL) { 814 nat = n1at; 815 nidx = C->get_alias_index(n1at); 816 } 817 } 818 } 819 // %%% Kludgery. Instead, fix ideal adr_type methods for all these cases: 820 if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) { 821 switch (n->Opcode()) { 822 case Op_PrefetchRead: 823 case Op_PrefetchWrite: 824 nidx = Compile::AliasIdxRaw; 825 nat = TypeRawPtr::BOTTOM; 826 break; 827 } 828 } 829 if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) { 830 switch (n->Opcode()) { 831 case Op_ClearArray: 832 midx = Compile::AliasIdxRaw; 833 mat = TypeRawPtr::BOTTOM; 834 break; 835 } 836 } 837 if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) { 838 switch (n->Opcode()) { 839 case Op_Return: 840 case Op_Rethrow: 841 case Op_Halt: 842 case Op_TailCall: 843 case Op_TailJump: 844 nidx = Compile::AliasIdxBot; 845 nat = TypePtr::BOTTOM; 846 break; 847 } 848 } 849 if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) { 850 switch (n->Opcode()) { 851 case Op_StrComp: 852 case Op_StrEquals: 853 case Op_StrIndexOf: 854 case Op_AryEq: 855 case Op_MemBarVolatile: 856 case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type? 857 nidx = Compile::AliasIdxTop; 858 nat = NULL; 859 break; 860 } 861 } 862 if (nidx != midx) { 863 if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) { 864 tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx); 865 n->dump(); 866 m->dump(); 867 } 868 assert(C->subsume_loads() && C->must_alias(nat, midx), 869 "must not lose alias info when matching"); 870 } 871 } 872 #endif 873 874 875 //------------------------------MStack----------------------------------------- 876 // State and MStack class used in xform() and find_shared() iterative methods. 877 enum Node_State { Pre_Visit, // node has to be pre-visited 878 Visit, // visit node 879 Post_Visit, // post-visit node 880 Alt_Post_Visit // alternative post-visit path 881 }; 882 883 class MStack: public Node_Stack { 884 public: 885 MStack(int size) : Node_Stack(size) { } 886 887 void push(Node *n, Node_State ns) { 888 Node_Stack::push(n, (uint)ns); 889 } 890 void push(Node *n, Node_State ns, Node *parent, int indx) { 891 ++_inode_top; 892 if ((_inode_top + 1) >= _inode_max) grow(); 893 _inode_top->node = parent; 894 _inode_top->indx = (uint)indx; 895 ++_inode_top; 896 _inode_top->node = n; 897 _inode_top->indx = (uint)ns; 898 } 899 Node *parent() { 900 pop(); 901 return node(); 902 } 903 Node_State state() const { 904 return (Node_State)index(); 905 } 906 void set_state(Node_State ns) { 907 set_index((uint)ns); 908 } 909 }; 910 911 912 //------------------------------xform------------------------------------------ 913 // Given a Node in old-space, Match him (Label/Reduce) to produce a machine 914 // Node in new-space. Given a new-space Node, recursively walk his children. 915 Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; } 916 Node *Matcher::xform( Node *n, int max_stack ) { 917 // Use one stack to keep both: child's node/state and parent's node/index 918 MStack mstack(max_stack * 2 * 2); // C->unique() * 2 * 2 919 mstack.push(n, Visit, NULL, -1); // set NULL as parent to indicate root 920 921 while (mstack.is_nonempty()) { 922 n = mstack.node(); // Leave node on stack 923 Node_State nstate = mstack.state(); 924 if (nstate == Visit) { 925 mstack.set_state(Post_Visit); 926 Node *oldn = n; 927 // Old-space or new-space check 928 if (!C->node_arena()->contains(n)) { 929 // Old space! 930 Node* m; 931 if (has_new_node(n)) { // Not yet Label/Reduced 932 m = new_node(n); 933 } else { 934 if (!is_dontcare(n)) { // Matcher can match this guy 935 // Calls match special. They match alone with no children. 936 // Their children, the incoming arguments, match normally. 937 m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n); 938 if (C->failing()) return NULL; 939 if (m == NULL) { Matcher::soft_match_failure(); return NULL; } 940 } else { // Nothing the matcher cares about 941 if( n->is_Proj() && n->in(0)->is_Multi()) { // Projections? 942 // Convert to machine-dependent projection 943 m = n->in(0)->as_Multi()->match( n->as_Proj(), this ); 944 #ifdef ASSERT 945 _new2old_map.map(m->_idx, n); 946 #endif 947 if (m->in(0) != NULL) // m might be top 948 collect_null_checks(m, n); 949 } else { // Else just a regular 'ol guy 950 m = n->clone(); // So just clone into new-space 951 #ifdef ASSERT 952 _new2old_map.map(m->_idx, n); 953 #endif 954 // Def-Use edges will be added incrementally as Uses 955 // of this node are matched. 956 assert(m->outcnt() == 0, "no Uses of this clone yet"); 957 } 958 } 959 960 set_new_node(n, m); // Map old to new 961 if (_old_node_note_array != NULL) { 962 Node_Notes* nn = C->locate_node_notes(_old_node_note_array, 963 n->_idx); 964 C->set_node_notes_at(m->_idx, nn); 965 } 966 debug_only(match_alias_type(C, n, m)); 967 } 968 n = m; // n is now a new-space node 969 mstack.set_node(n); 970 } 971 972 // New space! 973 if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty()) 974 975 int i; 976 // Put precedence edges on stack first (match them last). 977 for (i = oldn->req(); (uint)i < oldn->len(); i++) { 978 Node *m = oldn->in(i); 979 if (m == NULL) break; 980 // set -1 to call add_prec() instead of set_req() during Step1 981 mstack.push(m, Visit, n, -1); 982 } 983 984 // For constant debug info, I'd rather have unmatched constants. 985 int cnt = n->req(); 986 JVMState* jvms = n->jvms(); 987 int debug_cnt = jvms ? jvms->debug_start() : cnt; 988 989 // Now do only debug info. Clone constants rather than matching. 990 // Constants are represented directly in the debug info without 991 // the need for executable machine instructions. 992 // Monitor boxes are also represented directly. 993 for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do 994 Node *m = n->in(i); // Get input 995 int op = m->Opcode(); 996 assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites"); 997 if( op == Op_ConI || op == Op_ConP || op == Op_ConN || 998 op == Op_ConF || op == Op_ConD || op == Op_ConL 999 // || op == Op_BoxLock // %%%% enable this and remove (+++) in chaitin.cpp 1000 ) { 1001 m = m->clone(); 1002 #ifdef ASSERT 1003 _new2old_map.map(m->_idx, n); 1004 #endif 1005 mstack.push(m, Post_Visit, n, i); // Don't need to visit 1006 mstack.push(m->in(0), Visit, m, 0); 1007 } else { 1008 mstack.push(m, Visit, n, i); 1009 } 1010 } 1011 1012 // And now walk his children, and convert his inputs to new-space. 1013 for( ; i >= 0; --i ) { // For all normal inputs do 1014 Node *m = n->in(i); // Get input 1015 if(m != NULL) 1016 mstack.push(m, Visit, n, i); 1017 } 1018 1019 } 1020 else if (nstate == Post_Visit) { 1021 // Set xformed input 1022 Node *p = mstack.parent(); 1023 if (p != NULL) { // root doesn't have parent 1024 int i = (int)mstack.index(); 1025 if (i >= 0) 1026 p->set_req(i, n); // required input 1027 else if (i == -1) 1028 p->add_prec(n); // precedence input 1029 else 1030 ShouldNotReachHere(); 1031 } 1032 mstack.pop(); // remove processed node from stack 1033 } 1034 else { 1035 ShouldNotReachHere(); 1036 } 1037 } // while (mstack.is_nonempty()) 1038 return n; // Return new-space Node 1039 } 1040 1041 //------------------------------warp_outgoing_stk_arg------------------------ 1042 OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) { 1043 // Convert outgoing argument location to a pre-biased stack offset 1044 if (reg->is_stack()) { 1045 OptoReg::Name warped = reg->reg2stack(); 1046 // Adjust the stack slot offset to be the register number used 1047 // by the allocator. 1048 warped = OptoReg::add(begin_out_arg_area, warped); 1049 // Keep track of the largest numbered stack slot used for an arg. 1050 // Largest used slot per call-site indicates the amount of stack 1051 // that is killed by the call. 1052 if( warped >= out_arg_limit_per_call ) 1053 out_arg_limit_per_call = OptoReg::add(warped,1); 1054 if (!RegMask::can_represent(warped)) { 1055 C->record_method_not_compilable_all_tiers("unsupported calling sequence"); 1056 return OptoReg::Bad; 1057 } 1058 return warped; 1059 } 1060 return OptoReg::as_OptoReg(reg); 1061 } 1062 1063 1064 //------------------------------match_sfpt------------------------------------- 1065 // Helper function to match call instructions. Calls match special. 1066 // They match alone with no children. Their children, the incoming 1067 // arguments, match normally. 1068 MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) { 1069 MachSafePointNode *msfpt = NULL; 1070 MachCallNode *mcall = NULL; 1071 uint cnt; 1072 // Split out case for SafePoint vs Call 1073 CallNode *call; 1074 const TypeTuple *domain; 1075 ciMethod* method = NULL; 1076 bool is_method_handle_invoke = false; // for special kill effects 1077 if( sfpt->is_Call() ) { 1078 call = sfpt->as_Call(); 1079 domain = call->tf()->domain(); 1080 cnt = domain->cnt(); 1081 1082 // Match just the call, nothing else 1083 MachNode *m = match_tree(call); 1084 if (C->failing()) return NULL; 1085 if( m == NULL ) { Matcher::soft_match_failure(); return NULL; } 1086 1087 // Copy data from the Ideal SafePoint to the machine version 1088 mcall = m->as_MachCall(); 1089 1090 mcall->set_tf( call->tf()); 1091 mcall->set_entry_point(call->entry_point()); 1092 mcall->set_cnt( call->cnt()); 1093 1094 if( mcall->is_MachCallJava() ) { 1095 MachCallJavaNode *mcall_java = mcall->as_MachCallJava(); 1096 const CallJavaNode *call_java = call->as_CallJava(); 1097 method = call_java->method(); 1098 mcall_java->_method = method; 1099 mcall_java->_bci = call_java->_bci; 1100 mcall_java->_optimized_virtual = call_java->is_optimized_virtual(); 1101 is_method_handle_invoke = call_java->is_method_handle_invoke(); 1102 mcall_java->_method_handle_invoke = is_method_handle_invoke; 1103 if( mcall_java->is_MachCallStaticJava() ) 1104 mcall_java->as_MachCallStaticJava()->_name = 1105 call_java->as_CallStaticJava()->_name; 1106 if( mcall_java->is_MachCallDynamicJava() ) 1107 mcall_java->as_MachCallDynamicJava()->_vtable_index = 1108 call_java->as_CallDynamicJava()->_vtable_index; 1109 } 1110 else if( mcall->is_MachCallRuntime() ) { 1111 mcall->as_MachCallRuntime()->_name = call->as_CallRuntime()->_name; 1112 } 1113 msfpt = mcall; 1114 } 1115 // This is a non-call safepoint 1116 else { 1117 call = NULL; 1118 domain = NULL; 1119 MachNode *mn = match_tree(sfpt); 1120 if (C->failing()) return NULL; 1121 msfpt = mn->as_MachSafePoint(); 1122 cnt = TypeFunc::Parms; 1123 } 1124 1125 // Advertise the correct memory effects (for anti-dependence computation). 1126 msfpt->set_adr_type(sfpt->adr_type()); 1127 1128 // Allocate a private array of RegMasks. These RegMasks are not shared. 1129 msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt ); 1130 // Empty them all. 1131 memset( msfpt->_in_rms, 0, sizeof(RegMask)*cnt ); 1132 1133 // Do all the pre-defined non-Empty register masks 1134 msfpt->_in_rms[TypeFunc::ReturnAdr] = _return_addr_mask; 1135 msfpt->_in_rms[TypeFunc::FramePtr ] = c_frame_ptr_mask; 1136 1137 // Place first outgoing argument can possibly be put. 1138 OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); 1139 assert( is_even(begin_out_arg_area), "" ); 1140 // Compute max outgoing register number per call site. 1141 OptoReg::Name out_arg_limit_per_call = begin_out_arg_area; 1142 // Calls to C may hammer extra stack slots above and beyond any arguments. 1143 // These are usually backing store for register arguments for varargs. 1144 if( call != NULL && call->is_CallRuntime() ) 1145 out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed()); 1146 1147 1148 // Do the normal argument list (parameters) register masks 1149 int argcnt = cnt - TypeFunc::Parms; 1150 if( argcnt > 0 ) { // Skip it all if we have no args 1151 BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt ); 1152 VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt ); 1153 int i; 1154 for( i = 0; i < argcnt; i++ ) { 1155 sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type(); 1156 } 1157 // V-call to pick proper calling convention 1158 call->calling_convention( sig_bt, parm_regs, argcnt ); 1159 1160 #ifdef ASSERT 1161 // Sanity check users' calling convention. Really handy during 1162 // the initial porting effort. Fairly expensive otherwise. 1163 { for (int i = 0; i<argcnt; i++) { 1164 if( !parm_regs[i].first()->is_valid() && 1165 !parm_regs[i].second()->is_valid() ) continue; 1166 VMReg reg1 = parm_regs[i].first(); 1167 VMReg reg2 = parm_regs[i].second(); 1168 for (int j = 0; j < i; j++) { 1169 if( !parm_regs[j].first()->is_valid() && 1170 !parm_regs[j].second()->is_valid() ) continue; 1171 VMReg reg3 = parm_regs[j].first(); 1172 VMReg reg4 = parm_regs[j].second(); 1173 if( !reg1->is_valid() ) { 1174 assert( !reg2->is_valid(), "valid halvsies" ); 1175 } else if( !reg3->is_valid() ) { 1176 assert( !reg4->is_valid(), "valid halvsies" ); 1177 } else { 1178 assert( reg1 != reg2, "calling conv. must produce distinct regs"); 1179 assert( reg1 != reg3, "calling conv. must produce distinct regs"); 1180 assert( reg1 != reg4, "calling conv. must produce distinct regs"); 1181 assert( reg2 != reg3, "calling conv. must produce distinct regs"); 1182 assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs"); 1183 assert( reg3 != reg4, "calling conv. must produce distinct regs"); 1184 } 1185 } 1186 } 1187 } 1188 #endif 1189 1190 // Visit each argument. Compute its outgoing register mask. 1191 // Return results now can have 2 bits returned. 1192 // Compute max over all outgoing arguments both per call-site 1193 // and over the entire method. 1194 for( i = 0; i < argcnt; i++ ) { 1195 // Address of incoming argument mask to fill in 1196 RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms]; 1197 if( !parm_regs[i].first()->is_valid() && 1198 !parm_regs[i].second()->is_valid() ) { 1199 continue; // Avoid Halves 1200 } 1201 // Grab first register, adjust stack slots and insert in mask. 1202 OptoReg::Name reg1 = warp_outgoing_stk_arg(parm_regs[i].first(), begin_out_arg_area, out_arg_limit_per_call ); 1203 if (OptoReg::is_valid(reg1)) 1204 rm->Insert( reg1 ); 1205 // Grab second register (if any), adjust stack slots and insert in mask. 1206 OptoReg::Name reg2 = warp_outgoing_stk_arg(parm_regs[i].second(), begin_out_arg_area, out_arg_limit_per_call ); 1207 if (OptoReg::is_valid(reg2)) 1208 rm->Insert( reg2 ); 1209 } // End of for all arguments 1210 1211 // Compute number of stack slots needed to restore stack in case of 1212 // Pascal-style argument popping. 1213 mcall->_argsize = out_arg_limit_per_call - begin_out_arg_area; 1214 } 1215 1216 if (is_method_handle_invoke) { 1217 // Kill some extra stack space in case method handles want to do 1218 // a little in-place argument insertion. 1219 int regs_per_word = NOT_LP64(1) LP64_ONLY(2); // %%% make a global const! 1220 out_arg_limit_per_call += MethodHandlePushLimit * regs_per_word; 1221 // Do not update mcall->_argsize because (a) the extra space is not 1222 // pushed as arguments and (b) _argsize is dead (not used anywhere). 1223 } 1224 1225 // Compute the max stack slot killed by any call. These will not be 1226 // available for debug info, and will be used to adjust FIRST_STACK_mask 1227 // after all call sites have been visited. 1228 if( _out_arg_limit < out_arg_limit_per_call) 1229 _out_arg_limit = out_arg_limit_per_call; 1230 1231 if (mcall) { 1232 // Kill the outgoing argument area, including any non-argument holes and 1233 // any legacy C-killed slots. Use Fat-Projections to do the killing. 1234 // Since the max-per-method covers the max-per-call-site and debug info 1235 // is excluded on the max-per-method basis, debug info cannot land in 1236 // this killed area. 1237 uint r_cnt = mcall->tf()->range()->cnt(); 1238 MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj ); 1239 if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) { 1240 C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence"); 1241 } else { 1242 for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) 1243 proj->_rout.Insert(OptoReg::Name(i)); 1244 } 1245 if( proj->_rout.is_NotEmpty() ) 1246 _proj_list.push(proj); 1247 } 1248 // Transfer the safepoint information from the call to the mcall 1249 // Move the JVMState list 1250 msfpt->set_jvms(sfpt->jvms()); 1251 for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) { 1252 jvms->set_map(sfpt); 1253 } 1254 1255 // Debug inputs begin just after the last incoming parameter 1256 assert( (mcall == NULL) || (mcall->jvms() == NULL) || 1257 (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "" ); 1258 1259 // Move the OopMap 1260 msfpt->_oop_map = sfpt->_oop_map; 1261 1262 // Registers killed by the call are set in the local scheduling pass 1263 // of Global Code Motion. 1264 return msfpt; 1265 } 1266 1267 //---------------------------match_tree---------------------------------------- 1268 // Match a Ideal Node DAG - turn it into a tree; Label & Reduce. Used as part 1269 // of the whole-sale conversion from Ideal to Mach Nodes. Also used for 1270 // making GotoNodes while building the CFG and in init_spill_mask() to identify 1271 // a Load's result RegMask for memoization in idealreg2regmask[] 1272 MachNode *Matcher::match_tree( const Node *n ) { 1273 assert( n->Opcode() != Op_Phi, "cannot match" ); 1274 assert( !n->is_block_start(), "cannot match" ); 1275 // Set the mark for all locally allocated State objects. 1276 // When this call returns, the _states_arena arena will be reset 1277 // freeing all State objects. 1278 ResourceMark rm( &_states_arena ); 1279 1280 LabelRootDepth = 0; 1281 1282 // StoreNodes require their Memory input to match any LoadNodes 1283 Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ; 1284 #ifdef ASSERT 1285 Node* save_mem_node = _mem_node; 1286 _mem_node = n->is_Store() ? (Node*)n : NULL; 1287 #endif 1288 // State object for root node of match tree 1289 // Allocate it on _states_arena - stack allocation can cause stack overflow. 1290 State *s = new (&_states_arena) State; 1291 s->_kids[0] = NULL; 1292 s->_kids[1] = NULL; 1293 s->_leaf = (Node*)n; 1294 // Label the input tree, allocating labels from top-level arena 1295 Label_Root( n, s, n->in(0), mem ); 1296 if (C->failing()) return NULL; 1297 1298 // The minimum cost match for the whole tree is found at the root State 1299 uint mincost = max_juint; 1300 uint cost = max_juint; 1301 uint i; 1302 for( i = 0; i < NUM_OPERANDS; i++ ) { 1303 if( s->valid(i) && // valid entry and 1304 s->_cost[i] < cost && // low cost and 1305 s->_rule[i] >= NUM_OPERANDS ) // not an operand 1306 cost = s->_cost[mincost=i]; 1307 } 1308 if (mincost == max_juint) { 1309 #ifndef PRODUCT 1310 tty->print("No matching rule for:"); 1311 s->dump(); 1312 #endif 1313 Matcher::soft_match_failure(); 1314 return NULL; 1315 } 1316 // Reduce input tree based upon the state labels to machine Nodes 1317 MachNode *m = ReduceInst( s, s->_rule[mincost], mem ); 1318 #ifdef ASSERT 1319 _old2new_map.map(n->_idx, m); 1320 _new2old_map.map(m->_idx, (Node*)n); 1321 #endif 1322 1323 // Add any Matcher-ignored edges 1324 uint cnt = n->req(); 1325 uint start = 1; 1326 if( mem != (Node*)1 ) start = MemNode::Memory+1; 1327 if( n->is_AddP() ) { 1328 assert( mem == (Node*)1, "" ); 1329 start = AddPNode::Base+1; 1330 } 1331 for( i = start; i < cnt; i++ ) { 1332 if( !n->match_edge(i) ) { 1333 if( i < m->req() ) 1334 m->ins_req( i, n->in(i) ); 1335 else 1336 m->add_req( n->in(i) ); 1337 } 1338 } 1339 1340 debug_only( _mem_node = save_mem_node; ) 1341 return m; 1342 } 1343 1344 1345 //------------------------------match_into_reg--------------------------------- 1346 // Choose to either match this Node in a register or part of the current 1347 // match tree. Return true for requiring a register and false for matching 1348 // as part of the current match tree. 1349 static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) { 1350 1351 const Type *t = m->bottom_type(); 1352 1353 if( t->singleton() ) { 1354 // Never force constants into registers. Allow them to match as 1355 // constants or registers. Copies of the same value will share 1356 // the same register. See find_shared_node. 1357 return false; 1358 } else { // Not a constant 1359 // Stop recursion if they have different Controls. 1360 // Slot 0 of constants is not really a Control. 1361 if( control && m->in(0) && control != m->in(0) ) { 1362 1363 // Actually, we can live with the most conservative control we 1364 // find, if it post-dominates the others. This allows us to 1365 // pick up load/op/store trees where the load can float a little 1366 // above the store. 1367 Node *x = control; 1368 const uint max_scan = 6; // Arbitrary scan cutoff 1369 uint j; 1370 for( j=0; j<max_scan; j++ ) { 1371 if( x->is_Region() ) // Bail out at merge points 1372 return true; 1373 x = x->in(0); 1374 if( x == m->in(0) ) // Does 'control' post-dominate 1375 break; // m->in(0)? If so, we can use it 1376 } 1377 if( j == max_scan ) // No post-domination before scan end? 1378 return true; // Then break the match tree up 1379 } 1380 if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) { 1381 // These are commonly used in address expressions and can 1382 // efficiently fold into them on X64 in some cases. 1383 return false; 1384 } 1385 } 1386 1387 // Not forceable cloning. If shared, put it into a register. 1388 return shared; 1389 } 1390 1391 1392 //------------------------------Instruction Selection-------------------------- 1393 // Label method walks a "tree" of nodes, using the ADLC generated DFA to match 1394 // ideal nodes to machine instructions. Trees are delimited by shared Nodes, 1395 // things the Matcher does not match (e.g., Memory), and things with different 1396 // Controls (hence forced into different blocks). We pass in the Control 1397 // selected for this entire State tree. 1398 1399 // The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the 1400 // Store and the Load must have identical Memories (as well as identical 1401 // pointers). Since the Matcher does not have anything for Memory (and 1402 // does not handle DAGs), I have to match the Memory input myself. If the 1403 // Tree root is a Store, I require all Loads to have the identical memory. 1404 Node *Matcher::Label_Root( const Node *n, State *svec, Node *control, const Node *mem){ 1405 // Since Label_Root is a recursive function, its possible that we might run 1406 // out of stack space. See bugs 6272980 & 6227033 for more info. 1407 LabelRootDepth++; 1408 if (LabelRootDepth > MaxLabelRootDepth) { 1409 C->record_method_not_compilable_all_tiers("Out of stack space, increase MaxLabelRootDepth"); 1410 return NULL; 1411 } 1412 uint care = 0; // Edges matcher cares about 1413 uint cnt = n->req(); 1414 uint i = 0; 1415 1416 // Examine children for memory state 1417 // Can only subsume a child into your match-tree if that child's memory state 1418 // is not modified along the path to another input. 1419 // It is unsafe even if the other inputs are separate roots. 1420 Node *input_mem = NULL; 1421 for( i = 1; i < cnt; i++ ) { 1422 if( !n->match_edge(i) ) continue; 1423 Node *m = n->in(i); // Get ith input 1424 assert( m, "expect non-null children" ); 1425 if( m->is_Load() ) { 1426 if( input_mem == NULL ) { 1427 input_mem = m->in(MemNode::Memory); 1428 } else if( input_mem != m->in(MemNode::Memory) ) { 1429 input_mem = NodeSentinel; 1430 } 1431 } 1432 } 1433 1434 for( i = 1; i < cnt; i++ ){// For my children 1435 if( !n->match_edge(i) ) continue; 1436 Node *m = n->in(i); // Get ith input 1437 // Allocate states out of a private arena 1438 State *s = new (&_states_arena) State; 1439 svec->_kids[care++] = s; 1440 assert( care <= 2, "binary only for now" ); 1441 1442 // Recursively label the State tree. 1443 s->_kids[0] = NULL; 1444 s->_kids[1] = NULL; 1445 s->_leaf = m; 1446 1447 // Check for leaves of the State Tree; things that cannot be a part of 1448 // the current tree. If it finds any, that value is matched as a 1449 // register operand. If not, then the normal matching is used. 1450 if( match_into_reg(n, m, control, i, is_shared(m)) || 1451 // 1452 // Stop recursion if this is LoadNode and the root of this tree is a 1453 // StoreNode and the load & store have different memories. 1454 ((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) || 1455 // Can NOT include the match of a subtree when its memory state 1456 // is used by any of the other subtrees 1457 (input_mem == NodeSentinel) ) { 1458 #ifndef PRODUCT 1459 // Print when we exclude matching due to different memory states at input-loads 1460 if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel) 1461 && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) { 1462 tty->print_cr("invalid input_mem"); 1463 } 1464 #endif 1465 // Switch to a register-only opcode; this value must be in a register 1466 // and cannot be subsumed as part of a larger instruction. 1467 s->DFA( m->ideal_reg(), m ); 1468 1469 } else { 1470 // If match tree has no control and we do, adopt it for entire tree 1471 if( control == NULL && m->in(0) != NULL && m->req() > 1 ) 1472 control = m->in(0); // Pick up control 1473 // Else match as a normal part of the match tree. 1474 control = Label_Root(m,s,control,mem); 1475 if (C->failing()) return NULL; 1476 } 1477 } 1478 1479 1480 // Call DFA to match this node, and return 1481 svec->DFA( n->Opcode(), n ); 1482 1483 #ifdef ASSERT 1484 uint x; 1485 for( x = 0; x < _LAST_MACH_OPER; x++ ) 1486 if( svec->valid(x) ) 1487 break; 1488 1489 if (x >= _LAST_MACH_OPER) { 1490 n->dump(); 1491 svec->dump(); 1492 assert( false, "bad AD file" ); 1493 } 1494 #endif 1495 return control; 1496 } 1497 1498 1499 // Con nodes reduced using the same rule can share their MachNode 1500 // which reduces the number of copies of a constant in the final 1501 // program. The register allocator is free to split uses later to 1502 // split live ranges. 1503 MachNode* Matcher::find_shared_node(Node* leaf, uint rule) { 1504 if (!leaf->is_Con() && !leaf->is_DecodeN()) return NULL; 1505 1506 // See if this Con has already been reduced using this rule. 1507 if (_shared_nodes.Size() <= leaf->_idx) return NULL; 1508 MachNode* last = (MachNode*)_shared_nodes.at(leaf->_idx); 1509 if (last != NULL && rule == last->rule()) { 1510 // Don't expect control change for DecodeN 1511 if (leaf->is_DecodeN()) 1512 return last; 1513 // Get the new space root. 1514 Node* xroot = new_node(C->root()); 1515 if (xroot == NULL) { 1516 // This shouldn't happen give the order of matching. 1517 return NULL; 1518 } 1519 1520 // Shared constants need to have their control be root so they 1521 // can be scheduled properly. 1522 Node* control = last->in(0); 1523 if (control != xroot) { 1524 if (control == NULL || control == C->root()) { 1525 last->set_req(0, xroot); 1526 } else { 1527 assert(false, "unexpected control"); 1528 return NULL; 1529 } 1530 } 1531 return last; 1532 } 1533 return NULL; 1534 } 1535 1536 1537 //------------------------------ReduceInst------------------------------------- 1538 // Reduce a State tree (with given Control) into a tree of MachNodes. 1539 // This routine (and it's cohort ReduceOper) convert Ideal Nodes into 1540 // complicated machine Nodes. Each MachNode covers some tree of Ideal Nodes. 1541 // Each MachNode has a number of complicated MachOper operands; each 1542 // MachOper also covers a further tree of Ideal Nodes. 1543 1544 // The root of the Ideal match tree is always an instruction, so we enter 1545 // the recursion here. After building the MachNode, we need to recurse 1546 // the tree checking for these cases: 1547 // (1) Child is an instruction - 1548 // Build the instruction (recursively), add it as an edge. 1549 // Build a simple operand (register) to hold the result of the instruction. 1550 // (2) Child is an interior part of an instruction - 1551 // Skip over it (do nothing) 1552 // (3) Child is the start of a operand - 1553 // Build the operand, place it inside the instruction 1554 // Call ReduceOper. 1555 MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) { 1556 assert( rule >= NUM_OPERANDS, "called with operand rule" ); 1557 1558 MachNode* shared_node = find_shared_node(s->_leaf, rule); 1559 if (shared_node != NULL) { 1560 return shared_node; 1561 } 1562 1563 // Build the object to represent this state & prepare for recursive calls 1564 MachNode *mach = s->MachNodeGenerator( rule, C ); 1565 mach->_opnds[0] = s->MachOperGenerator( _reduceOp[rule], C ); 1566 assert( mach->_opnds[0] != NULL, "Missing result operand" ); 1567 Node *leaf = s->_leaf; 1568 // Check for instruction or instruction chain rule 1569 if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) { 1570 assert(C->node_arena()->contains(s->_leaf) || !has_new_node(s->_leaf), 1571 "duplicating node that's already been matched"); 1572 // Instruction 1573 mach->add_req( leaf->in(0) ); // Set initial control 1574 // Reduce interior of complex instruction 1575 ReduceInst_Interior( s, rule, mem, mach, 1 ); 1576 } else { 1577 // Instruction chain rules are data-dependent on their inputs 1578 mach->add_req(0); // Set initial control to none 1579 ReduceInst_Chain_Rule( s, rule, mem, mach ); 1580 } 1581 1582 // If a Memory was used, insert a Memory edge 1583 if( mem != (Node*)1 ) { 1584 mach->ins_req(MemNode::Memory,mem); 1585 #ifdef ASSERT 1586 // Verify adr type after matching memory operation 1587 const MachOper* oper = mach->memory_operand(); 1588 if (oper != NULL && oper != (MachOper*)-1) { 1589 // It has a unique memory operand. Find corresponding ideal mem node. 1590 Node* m = NULL; 1591 if (leaf->is_Mem()) { 1592 m = leaf; 1593 } else { 1594 m = _mem_node; 1595 assert(m != NULL && m->is_Mem(), "expecting memory node"); 1596 } 1597 const Type* mach_at = mach->adr_type(); 1598 // DecodeN node consumed by an address may have different type 1599 // then its input. Don't compare types for such case. 1600 if (m->adr_type() != mach_at && 1601 (m->in(MemNode::Address)->is_DecodeN() || 1602 m->in(MemNode::Address)->is_AddP() && 1603 m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN() || 1604 m->in(MemNode::Address)->is_AddP() && 1605 m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() && 1606 m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeN())) { 1607 mach_at = m->adr_type(); 1608 } 1609 if (m->adr_type() != mach_at) { 1610 m->dump(); 1611 tty->print_cr("mach:"); 1612 mach->dump(1); 1613 } 1614 assert(m->adr_type() == mach_at, "matcher should not change adr type"); 1615 } 1616 #endif 1617 } 1618 1619 // If the _leaf is an AddP, insert the base edge 1620 if( leaf->is_AddP() ) 1621 mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base)); 1622 1623 uint num_proj = _proj_list.size(); 1624 1625 // Perform any 1-to-many expansions required 1626 MachNode *ex = mach->Expand(s,_proj_list, mem); 1627 if( ex != mach ) { 1628 assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match"); 1629 if( ex->in(1)->is_Con() ) 1630 ex->in(1)->set_req(0, C->root()); 1631 // Remove old node from the graph 1632 for( uint i=0; i<mach->req(); i++ ) { 1633 mach->set_req(i,NULL); 1634 } 1635 #ifdef ASSERT 1636 _new2old_map.map(ex->_idx, s->_leaf); 1637 #endif 1638 } 1639 1640 // PhaseChaitin::fixup_spills will sometimes generate spill code 1641 // via the matcher. By the time, nodes have been wired into the CFG, 1642 // and any further nodes generated by expand rules will be left hanging 1643 // in space, and will not get emitted as output code. Catch this. 1644 // Also, catch any new register allocation constraints ("projections") 1645 // generated belatedly during spill code generation. 1646 if (_allocation_started) { 1647 guarantee(ex == mach, "no expand rules during spill generation"); 1648 guarantee(_proj_list.size() == num_proj, "no allocation during spill generation"); 1649 } 1650 1651 if (leaf->is_Con() || leaf->is_DecodeN()) { 1652 // Record the con for sharing 1653 _shared_nodes.map(leaf->_idx, ex); 1654 } 1655 1656 return ex; 1657 } 1658 1659 void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) { 1660 // 'op' is what I am expecting to receive 1661 int op = _leftOp[rule]; 1662 // Operand type to catch childs result 1663 // This is what my child will give me. 1664 int opnd_class_instance = s->_rule[op]; 1665 // Choose between operand class or not. 1666 // This is what I will receive. 1667 int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op; 1668 // New rule for child. Chase operand classes to get the actual rule. 1669 int newrule = s->_rule[catch_op]; 1670 1671 if( newrule < NUM_OPERANDS ) { 1672 // Chain from operand or operand class, may be output of shared node 1673 assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS, 1674 "Bad AD file: Instruction chain rule must chain from operand"); 1675 // Insert operand into array of operands for this instruction 1676 mach->_opnds[1] = s->MachOperGenerator( opnd_class_instance, C ); 1677 1678 ReduceOper( s, newrule, mem, mach ); 1679 } else { 1680 // Chain from the result of an instruction 1681 assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand"); 1682 mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C ); 1683 Node *mem1 = (Node*)1; 1684 debug_only(Node *save_mem_node = _mem_node;) 1685 mach->add_req( ReduceInst(s, newrule, mem1) ); 1686 debug_only(_mem_node = save_mem_node;) 1687 } 1688 return; 1689 } 1690 1691 1692 uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) { 1693 if( s->_leaf->is_Load() ) { 1694 Node *mem2 = s->_leaf->in(MemNode::Memory); 1695 assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" ); 1696 debug_only( if( mem == (Node*)1 ) _mem_node = s->_leaf;) 1697 mem = mem2; 1698 } 1699 if( s->_leaf->in(0) != NULL && s->_leaf->req() > 1) { 1700 if( mach->in(0) == NULL ) 1701 mach->set_req(0, s->_leaf->in(0)); 1702 } 1703 1704 // Now recursively walk the state tree & add operand list. 1705 for( uint i=0; i<2; i++ ) { // binary tree 1706 State *newstate = s->_kids[i]; 1707 if( newstate == NULL ) break; // Might only have 1 child 1708 // 'op' is what I am expecting to receive 1709 int op; 1710 if( i == 0 ) { 1711 op = _leftOp[rule]; 1712 } else { 1713 op = _rightOp[rule]; 1714 } 1715 // Operand type to catch childs result 1716 // This is what my child will give me. 1717 int opnd_class_instance = newstate->_rule[op]; 1718 // Choose between operand class or not. 1719 // This is what I will receive. 1720 int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op; 1721 // New rule for child. Chase operand classes to get the actual rule. 1722 int newrule = newstate->_rule[catch_op]; 1723 1724 if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction? 1725 // Operand/operandClass 1726 // Insert operand into array of operands for this instruction 1727 mach->_opnds[num_opnds++] = newstate->MachOperGenerator( opnd_class_instance, C ); 1728 ReduceOper( newstate, newrule, mem, mach ); 1729 1730 } else { // Child is internal operand or new instruction 1731 if( newrule < _LAST_MACH_OPER ) { // internal operand or instruction? 1732 // internal operand --> call ReduceInst_Interior 1733 // Interior of complex instruction. Do nothing but recurse. 1734 num_opnds = ReduceInst_Interior( newstate, newrule, mem, mach, num_opnds ); 1735 } else { 1736 // instruction --> call build operand( ) to catch result 1737 // --> ReduceInst( newrule ) 1738 mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C ); 1739 Node *mem1 = (Node*)1; 1740 debug_only(Node *save_mem_node = _mem_node;) 1741 mach->add_req( ReduceInst( newstate, newrule, mem1 ) ); 1742 debug_only(_mem_node = save_mem_node;) 1743 } 1744 } 1745 assert( mach->_opnds[num_opnds-1], "" ); 1746 } 1747 return num_opnds; 1748 } 1749 1750 // This routine walks the interior of possible complex operands. 1751 // At each point we check our children in the match tree: 1752 // (1) No children - 1753 // We are a leaf; add _leaf field as an input to the MachNode 1754 // (2) Child is an internal operand - 1755 // Skip over it ( do nothing ) 1756 // (3) Child is an instruction - 1757 // Call ReduceInst recursively and 1758 // and instruction as an input to the MachNode 1759 void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) { 1760 assert( rule < _LAST_MACH_OPER, "called with operand rule" ); 1761 State *kid = s->_kids[0]; 1762 assert( kid == NULL || s->_leaf->in(0) == NULL, "internal operands have no control" ); 1763 1764 // Leaf? And not subsumed? 1765 if( kid == NULL && !_swallowed[rule] ) { 1766 mach->add_req( s->_leaf ); // Add leaf pointer 1767 return; // Bail out 1768 } 1769 1770 if( s->_leaf->is_Load() ) { 1771 assert( mem == (Node*)1, "multiple Memories being matched at once?" ); 1772 mem = s->_leaf->in(MemNode::Memory); 1773 debug_only(_mem_node = s->_leaf;) 1774 } 1775 if( s->_leaf->in(0) && s->_leaf->req() > 1) { 1776 if( !mach->in(0) ) 1777 mach->set_req(0,s->_leaf->in(0)); 1778 else { 1779 assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" ); 1780 } 1781 } 1782 1783 for( uint i=0; kid != NULL && i<2; kid = s->_kids[1], i++ ) { // binary tree 1784 int newrule; 1785 if( i == 0 ) 1786 newrule = kid->_rule[_leftOp[rule]]; 1787 else 1788 newrule = kid->_rule[_rightOp[rule]]; 1789 1790 if( newrule < _LAST_MACH_OPER ) { // Operand or instruction? 1791 // Internal operand; recurse but do nothing else 1792 ReduceOper( kid, newrule, mem, mach ); 1793 1794 } else { // Child is a new instruction 1795 // Reduce the instruction, and add a direct pointer from this 1796 // machine instruction to the newly reduced one. 1797 Node *mem1 = (Node*)1; 1798 debug_only(Node *save_mem_node = _mem_node;) 1799 mach->add_req( ReduceInst( kid, newrule, mem1 ) ); 1800 debug_only(_mem_node = save_mem_node;) 1801 } 1802 } 1803 } 1804 1805 1806 // ------------------------------------------------------------------------- 1807 // Java-Java calling convention 1808 // (what you use when Java calls Java) 1809 1810 //------------------------------find_receiver---------------------------------- 1811 // For a given signature, return the OptoReg for parameter 0. 1812 OptoReg::Name Matcher::find_receiver( bool is_outgoing ) { 1813 VMRegPair regs; 1814 BasicType sig_bt = T_OBJECT; 1815 calling_convention(&sig_bt, ®s, 1, is_outgoing); 1816 // Return argument 0 register. In the LP64 build pointers 1817 // take 2 registers, but the VM wants only the 'main' name. 1818 return OptoReg::as_OptoReg(regs.first()); 1819 } 1820 1821 // A method-klass-holder may be passed in the inline_cache_reg 1822 // and then expanded into the inline_cache_reg and a method_oop register 1823 // defined in ad_<arch>.cpp 1824 1825 1826 //------------------------------find_shared------------------------------------ 1827 // Set bits if Node is shared or otherwise a root 1828 void Matcher::find_shared( Node *n ) { 1829 // Allocate stack of size C->unique() * 2 to avoid frequent realloc 1830 MStack mstack(C->unique() * 2); 1831 // Mark nodes as address_visited if they are inputs to an address expression 1832 VectorSet address_visited(Thread::current()->resource_area()); 1833 mstack.push(n, Visit); // Don't need to pre-visit root node 1834 while (mstack.is_nonempty()) { 1835 n = mstack.node(); // Leave node on stack 1836 Node_State nstate = mstack.state(); 1837 uint nop = n->Opcode(); 1838 if (nstate == Pre_Visit) { 1839 if (address_visited.test(n->_idx)) { // Visited in address already? 1840 // Flag as visited and shared now. 1841 set_visited(n); 1842 } 1843 if (is_visited(n)) { // Visited already? 1844 // Node is shared and has no reason to clone. Flag it as shared. 1845 // This causes it to match into a register for the sharing. 1846 set_shared(n); // Flag as shared and 1847 mstack.pop(); // remove node from stack 1848 continue; 1849 } 1850 nstate = Visit; // Not already visited; so visit now 1851 } 1852 if (nstate == Visit) { 1853 mstack.set_state(Post_Visit); 1854 set_visited(n); // Flag as visited now 1855 bool mem_op = false; 1856 1857 switch( nop ) { // Handle some opcodes special 1858 case Op_Phi: // Treat Phis as shared roots 1859 case Op_Parm: 1860 case Op_Proj: // All handled specially during matching 1861 case Op_SafePointScalarObject: 1862 set_shared(n); 1863 set_dontcare(n); 1864 break; 1865 case Op_If: 1866 case Op_CountedLoopEnd: 1867 mstack.set_state(Alt_Post_Visit); // Alternative way 1868 // Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)). Helps 1869 // with matching cmp/branch in 1 instruction. The Matcher needs the 1870 // Bool and CmpX side-by-side, because it can only get at constants 1871 // that are at the leaves of Match trees, and the Bool's condition acts 1872 // as a constant here. 1873 mstack.push(n->in(1), Visit); // Clone the Bool 1874 mstack.push(n->in(0), Pre_Visit); // Visit control input 1875 continue; // while (mstack.is_nonempty()) 1876 case Op_ConvI2D: // These forms efficiently match with a prior 1877 case Op_ConvI2F: // Load but not a following Store 1878 if( n->in(1)->is_Load() && // Prior load 1879 n->outcnt() == 1 && // Not already shared 1880 n->unique_out()->is_Store() ) // Following store 1881 set_shared(n); // Force it to be a root 1882 break; 1883 case Op_ReverseBytesI: 1884 case Op_ReverseBytesL: 1885 if( n->in(1)->is_Load() && // Prior load 1886 n->outcnt() == 1 ) // Not already shared 1887 set_shared(n); // Force it to be a root 1888 break; 1889 case Op_BoxLock: // Cant match until we get stack-regs in ADLC 1890 case Op_IfFalse: 1891 case Op_IfTrue: 1892 case Op_MachProj: 1893 case Op_MergeMem: 1894 case Op_Catch: 1895 case Op_CatchProj: 1896 case Op_CProj: 1897 case Op_JumpProj: 1898 case Op_JProj: 1899 case Op_NeverBranch: 1900 set_dontcare(n); 1901 break; 1902 case Op_Jump: 1903 mstack.push(n->in(1), Visit); // Switch Value 1904 mstack.push(n->in(0), Pre_Visit); // Visit Control input 1905 continue; // while (mstack.is_nonempty()) 1906 case Op_StrComp: 1907 case Op_StrEquals: 1908 case Op_StrIndexOf: 1909 case Op_AryEq: 1910 set_shared(n); // Force result into register (it will be anyways) 1911 break; 1912 case Op_ConP: { // Convert pointers above the centerline to NUL 1913 TypeNode *tn = n->as_Type(); // Constants derive from type nodes 1914 const TypePtr* tp = tn->type()->is_ptr(); 1915 if (tp->_ptr == TypePtr::AnyNull) { 1916 tn->set_type(TypePtr::NULL_PTR); 1917 } 1918 break; 1919 } 1920 case Op_ConN: { // Convert narrow pointers above the centerline to NUL 1921 TypeNode *tn = n->as_Type(); // Constants derive from type nodes 1922 const TypePtr* tp = tn->type()->make_ptr(); 1923 if (tp && tp->_ptr == TypePtr::AnyNull) { 1924 tn->set_type(TypeNarrowOop::NULL_PTR); 1925 } 1926 break; 1927 } 1928 case Op_Binary: // These are introduced in the Post_Visit state. 1929 ShouldNotReachHere(); 1930 break; 1931 case Op_ClearArray: 1932 case Op_SafePoint: 1933 mem_op = true; 1934 break; 1935 default: 1936 if( n->is_Store() ) { 1937 // Do match stores, despite no ideal reg 1938 mem_op = true; 1939 break; 1940 } 1941 if( n->is_Mem() ) { // Loads and LoadStores 1942 mem_op = true; 1943 // Loads must be root of match tree due to prior load conflict 1944 if( C->subsume_loads() == false ) 1945 set_shared(n); 1946 } 1947 // Fall into default case 1948 if( !n->ideal_reg() ) 1949 set_dontcare(n); // Unmatchable Nodes 1950 } // end_switch 1951 1952 for(int i = n->req() - 1; i >= 0; --i) { // For my children 1953 Node *m = n->in(i); // Get ith input 1954 if (m == NULL) continue; // Ignore NULLs 1955 uint mop = m->Opcode(); 1956 1957 // Must clone all producers of flags, or we will not match correctly. 1958 // Suppose a compare setting int-flags is shared (e.g., a switch-tree) 1959 // then it will match into an ideal Op_RegFlags. Alas, the fp-flags 1960 // are also there, so we may match a float-branch to int-flags and 1961 // expect the allocator to haul the flags from the int-side to the 1962 // fp-side. No can do. 1963 if( _must_clone[mop] ) { 1964 mstack.push(m, Visit); 1965 continue; // for(int i = ...) 1966 } 1967 1968 if( mop == Op_AddP && m->in(AddPNode::Base)->Opcode() == Op_DecodeN ) { 1969 // Bases used in addresses must be shared but since 1970 // they are shared through a DecodeN they may appear 1971 // to have a single use so force sharing here. 1972 set_shared(m->in(AddPNode::Base)->in(1)); 1973 } 1974 1975 // Clone addressing expressions as they are "free" in memory access instructions 1976 if( mem_op && i == MemNode::Address && mop == Op_AddP ) { 1977 // Some inputs for address expression are not put on stack 1978 // to avoid marking them as shared and forcing them into register 1979 // if they are used only in address expressions. 1980 // But they should be marked as shared if there are other uses 1981 // besides address expressions. 1982 1983 Node *off = m->in(AddPNode::Offset); 1984 if( off->is_Con() && 1985 // When there are other uses besides address expressions 1986 // put it on stack and mark as shared. 1987 !is_visited(m) ) { 1988 address_visited.test_set(m->_idx); // Flag as address_visited 1989 Node *adr = m->in(AddPNode::Address); 1990 1991 // Intel, ARM and friends can handle 2 adds in addressing mode 1992 if( clone_shift_expressions && adr->is_AddP() && 1993 // AtomicAdd is not an addressing expression. 1994 // Cheap to find it by looking for screwy base. 1995 !adr->in(AddPNode::Base)->is_top() && 1996 // Are there other uses besides address expressions? 1997 !is_visited(adr) ) { 1998 address_visited.set(adr->_idx); // Flag as address_visited 1999 Node *shift = adr->in(AddPNode::Offset); 2000 // Check for shift by small constant as well 2001 if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2002 shift->in(2)->get_int() <= 3 && 2003 // Are there other uses besides address expressions? 2004 !is_visited(shift) ) { 2005 address_visited.set(shift->_idx); // Flag as address_visited 2006 mstack.push(shift->in(2), Visit); 2007 Node *conv = shift->in(1); 2008 #ifdef _LP64 2009 // Allow Matcher to match the rule which bypass 2010 // ConvI2L operation for an array index on LP64 2011 // if the index value is positive. 2012 if( conv->Opcode() == Op_ConvI2L && 2013 conv->as_Type()->type()->is_long()->_lo >= 0 && 2014 // Are there other uses besides address expressions? 2015 !is_visited(conv) ) { 2016 address_visited.set(conv->_idx); // Flag as address_visited 2017 mstack.push(conv->in(1), Pre_Visit); 2018 } else 2019 #endif 2020 mstack.push(conv, Pre_Visit); 2021 } else { 2022 mstack.push(shift, Pre_Visit); 2023 } 2024 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2025 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2026 } else { // Sparc, Alpha, PPC and friends 2027 mstack.push(adr, Pre_Visit); 2028 } 2029 2030 // Clone X+offset as it also folds into most addressing expressions 2031 mstack.push(off, Visit); 2032 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2033 continue; // for(int i = ...) 2034 } // if( off->is_Con() ) 2035 } // if( mem_op && 2036 mstack.push(m, Pre_Visit); 2037 } // for(int i = ...) 2038 } 2039 else if (nstate == Alt_Post_Visit) { 2040 mstack.pop(); // Remove node from stack 2041 // We cannot remove the Cmp input from the Bool here, as the Bool may be 2042 // shared and all users of the Bool need to move the Cmp in parallel. 2043 // This leaves both the Bool and the If pointing at the Cmp. To 2044 // prevent the Matcher from trying to Match the Cmp along both paths 2045 // BoolNode::match_edge always returns a zero. 2046 2047 // We reorder the Op_If in a pre-order manner, so we can visit without 2048 // accidentally sharing the Cmp (the Bool and the If make 2 users). 2049 n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool 2050 } 2051 else if (nstate == Post_Visit) { 2052 mstack.pop(); // Remove node from stack 2053 2054 // Now hack a few special opcodes 2055 switch( n->Opcode() ) { // Handle some opcodes special 2056 case Op_StorePConditional: 2057 case Op_StoreIConditional: 2058 case Op_StoreLConditional: 2059 case Op_CompareAndSwapI: 2060 case Op_CompareAndSwapL: 2061 case Op_CompareAndSwapP: 2062 case Op_CompareAndSwapN: { // Convert trinary to binary-tree 2063 Node *newval = n->in(MemNode::ValueIn ); 2064 Node *oldval = n->in(LoadStoreNode::ExpectedIn); 2065 Node *pair = new (C, 3) BinaryNode( oldval, newval ); 2066 n->set_req(MemNode::ValueIn,pair); 2067 n->del_req(LoadStoreNode::ExpectedIn); 2068 break; 2069 } 2070 case Op_CMoveD: // Convert trinary to binary-tree 2071 case Op_CMoveF: 2072 case Op_CMoveI: 2073 case Op_CMoveL: 2074 case Op_CMoveN: 2075 case Op_CMoveP: { 2076 // Restructure into a binary tree for Matching. It's possible that 2077 // we could move this code up next to the graph reshaping for IfNodes 2078 // or vice-versa, but I do not want to debug this for Ladybird. 2079 // 10/2/2000 CNC. 2080 Node *pair1 = new (C, 3) BinaryNode(n->in(1),n->in(1)->in(1)); 2081 n->set_req(1,pair1); 2082 Node *pair2 = new (C, 3) BinaryNode(n->in(2),n->in(3)); 2083 n->set_req(2,pair2); 2084 n->del_req(3); 2085 break; 2086 } 2087 case Op_StrEquals: { 2088 Node *pair1 = new (C, 3) BinaryNode(n->in(2),n->in(3)); 2089 n->set_req(2,pair1); 2090 n->set_req(3,n->in(4)); 2091 n->del_req(4); 2092 break; 2093 } 2094 case Op_StrComp: 2095 case Op_StrIndexOf: { 2096 Node *pair1 = new (C, 3) BinaryNode(n->in(2),n->in(3)); 2097 n->set_req(2,pair1); 2098 Node *pair2 = new (C, 3) BinaryNode(n->in(4),n->in(5)); 2099 n->set_req(3,pair2); 2100 n->del_req(5); 2101 n->del_req(4); 2102 break; 2103 } 2104 default: 2105 break; 2106 } 2107 } 2108 else { 2109 ShouldNotReachHere(); 2110 } 2111 } // end of while (mstack.is_nonempty()) 2112 } 2113 2114 #ifdef ASSERT 2115 // machine-independent root to machine-dependent root 2116 void Matcher::dump_old2new_map() { 2117 _old2new_map.dump(); 2118 } 2119 #endif 2120 2121 //---------------------------collect_null_checks------------------------------- 2122 // Find null checks in the ideal graph; write a machine-specific node for 2123 // it. Used by later implicit-null-check handling. Actually collects 2124 // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal 2125 // value being tested. 2126 void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) { 2127 Node *iff = proj->in(0); 2128 if( iff->Opcode() == Op_If ) { 2129 // During matching If's have Bool & Cmp side-by-side 2130 BoolNode *b = iff->in(1)->as_Bool(); 2131 Node *cmp = iff->in(2); 2132 int opc = cmp->Opcode(); 2133 if (opc != Op_CmpP && opc != Op_CmpN) return; 2134 2135 const Type* ct = cmp->in(2)->bottom_type(); 2136 if (ct == TypePtr::NULL_PTR || 2137 (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) { 2138 2139 bool push_it = false; 2140 if( proj->Opcode() == Op_IfTrue ) { 2141 extern int all_null_checks_found; 2142 all_null_checks_found++; 2143 if( b->_test._test == BoolTest::ne ) { 2144 push_it = true; 2145 } 2146 } else { 2147 assert( proj->Opcode() == Op_IfFalse, "" ); 2148 if( b->_test._test == BoolTest::eq ) { 2149 push_it = true; 2150 } 2151 } 2152 if( push_it ) { 2153 _null_check_tests.push(proj); 2154 Node* val = cmp->in(1); 2155 #ifdef _LP64 2156 if (val->bottom_type()->isa_narrowoop() && 2157 !Matcher::narrow_oop_use_complex_address()) { 2158 // 2159 // Look for DecodeN node which should be pinned to orig_proj. 2160 // On platforms (Sparc) which can not handle 2 adds 2161 // in addressing mode we have to keep a DecodeN node and 2162 // use it to do implicit NULL check in address. 2163 // 2164 // DecodeN node was pinned to non-null path (orig_proj) during 2165 // CastPP transformation in final_graph_reshaping_impl(). 2166 // 2167 uint cnt = orig_proj->outcnt(); 2168 for (uint i = 0; i < orig_proj->outcnt(); i++) { 2169 Node* d = orig_proj->raw_out(i); 2170 if (d->is_DecodeN() && d->in(1) == val) { 2171 val = d; 2172 val->set_req(0, NULL); // Unpin now. 2173 // Mark this as special case to distinguish from 2174 // a regular case: CmpP(DecodeN, NULL). 2175 val = (Node*)(((intptr_t)val) | 1); 2176 break; 2177 } 2178 } 2179 } 2180 #endif 2181 _null_check_tests.push(val); 2182 } 2183 } 2184 } 2185 } 2186 2187 //---------------------------validate_null_checks------------------------------ 2188 // Its possible that the value being NULL checked is not the root of a match 2189 // tree. If so, I cannot use the value in an implicit null check. 2190 void Matcher::validate_null_checks( ) { 2191 uint cnt = _null_check_tests.size(); 2192 for( uint i=0; i < cnt; i+=2 ) { 2193 Node *test = _null_check_tests[i]; 2194 Node *val = _null_check_tests[i+1]; 2195 bool is_decoden = ((intptr_t)val) & 1; 2196 val = (Node*)(((intptr_t)val) & ~1); 2197 if (has_new_node(val)) { 2198 Node* new_val = new_node(val); 2199 if (is_decoden) { 2200 assert(val->is_DecodeN() && val->in(0) == NULL, "sanity"); 2201 // Note: new_val may have a control edge if 2202 // the original ideal node DecodeN was matched before 2203 // it was unpinned in Matcher::collect_null_checks(). 2204 // Unpin the mach node and mark it. 2205 new_val->set_req(0, NULL); 2206 new_val = (Node*)(((intptr_t)new_val) | 1); 2207 } 2208 // Is a match-tree root, so replace with the matched value 2209 _null_check_tests.map(i+1, new_val); 2210 } else { 2211 // Yank from candidate list 2212 _null_check_tests.map(i+1,_null_check_tests[--cnt]); 2213 _null_check_tests.map(i,_null_check_tests[--cnt]); 2214 _null_check_tests.pop(); 2215 _null_check_tests.pop(); 2216 i-=2; 2217 } 2218 } 2219 } 2220 2221 2222 // Used by the DFA in dfa_sparc.cpp. Check for a prior FastLock 2223 // acting as an Acquire and thus we don't need an Acquire here. We 2224 // retain the Node to act as a compiler ordering barrier. 2225 bool Matcher::prior_fast_lock( const Node *acq ) { 2226 Node *r = acq->in(0); 2227 if( !r->is_Region() || r->req() <= 1 ) return false; 2228 Node *proj = r->in(1); 2229 if( !proj->is_Proj() ) return false; 2230 Node *call = proj->in(0); 2231 if( !call->is_Call() || call->as_Call()->entry_point() != OptoRuntime::complete_monitor_locking_Java() ) 2232 return false; 2233 2234 return true; 2235 } 2236 2237 // Used by the DFA in dfa_sparc.cpp. Check for a following FastUnLock 2238 // acting as a Release and thus we don't need a Release here. We 2239 // retain the Node to act as a compiler ordering barrier. 2240 bool Matcher::post_fast_unlock( const Node *rel ) { 2241 Compile *C = Compile::current(); 2242 assert( rel->Opcode() == Op_MemBarRelease, "" ); 2243 const MemBarReleaseNode *mem = (const MemBarReleaseNode*)rel; 2244 DUIterator_Fast imax, i = mem->fast_outs(imax); 2245 Node *ctrl = NULL; 2246 while( true ) { 2247 ctrl = mem->fast_out(i); // Throw out-of-bounds if proj not found 2248 assert( ctrl->is_Proj(), "only projections here" ); 2249 ProjNode *proj = (ProjNode*)ctrl; 2250 if( proj->_con == TypeFunc::Control && 2251 !C->node_arena()->contains(ctrl) ) // Unmatched old-space only 2252 break; 2253 i++; 2254 } 2255 Node *iff = NULL; 2256 for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) { 2257 Node *x = ctrl->fast_out(j); 2258 if( x->is_If() && x->req() > 1 && 2259 !C->node_arena()->contains(x) ) { // Unmatched old-space only 2260 iff = x; 2261 break; 2262 } 2263 } 2264 if( !iff ) return false; 2265 Node *bol = iff->in(1); 2266 // The iff might be some random subclass of If or bol might be Con-Top 2267 if (!bol->is_Bool()) return false; 2268 assert( bol->req() > 1, "" ); 2269 return (bol->in(1)->Opcode() == Op_FastUnlock); 2270 } 2271 2272 // Used by the DFA in dfa_xxx.cpp. Check for a following barrier or 2273 // atomic instruction acting as a store_load barrier without any 2274 // intervening volatile load, and thus we don't need a barrier here. 2275 // We retain the Node to act as a compiler ordering barrier. 2276 bool Matcher::post_store_load_barrier(const Node *vmb) { 2277 Compile *C = Compile::current(); 2278 assert( vmb->is_MemBar(), "" ); 2279 assert( vmb->Opcode() != Op_MemBarAcquire, "" ); 2280 const MemBarNode *mem = (const MemBarNode*)vmb; 2281 2282 // Get the Proj node, ctrl, that can be used to iterate forward 2283 Node *ctrl = NULL; 2284 DUIterator_Fast imax, i = mem->fast_outs(imax); 2285 while( true ) { 2286 ctrl = mem->fast_out(i); // Throw out-of-bounds if proj not found 2287 assert( ctrl->is_Proj(), "only projections here" ); 2288 ProjNode *proj = (ProjNode*)ctrl; 2289 if( proj->_con == TypeFunc::Control && 2290 !C->node_arena()->contains(ctrl) ) // Unmatched old-space only 2291 break; 2292 i++; 2293 } 2294 2295 for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) { 2296 Node *x = ctrl->fast_out(j); 2297 int xop = x->Opcode(); 2298 2299 // We don't need current barrier if we see another or a lock 2300 // before seeing volatile load. 2301 // 2302 // Op_Fastunlock previously appeared in the Op_* list below. 2303 // With the advent of 1-0 lock operations we're no longer guaranteed 2304 // that a monitor exit operation contains a serializing instruction. 2305 2306 if (xop == Op_MemBarVolatile || 2307 xop == Op_FastLock || 2308 xop == Op_CompareAndSwapL || 2309 xop == Op_CompareAndSwapP || 2310 xop == Op_CompareAndSwapN || 2311 xop == Op_CompareAndSwapI) 2312 return true; 2313 2314 if (x->is_MemBar()) { 2315 // We must retain this membar if there is an upcoming volatile 2316 // load, which will be preceded by acquire membar. 2317 if (xop == Op_MemBarAcquire) 2318 return false; 2319 // For other kinds of barriers, check by pretending we 2320 // are them, and seeing if we can be removed. 2321 else 2322 return post_store_load_barrier((const MemBarNode*)x); 2323 } 2324 2325 // Delicate code to detect case of an upcoming fastlock block 2326 if( x->is_If() && x->req() > 1 && 2327 !C->node_arena()->contains(x) ) { // Unmatched old-space only 2328 Node *iff = x; 2329 Node *bol = iff->in(1); 2330 // The iff might be some random subclass of If or bol might be Con-Top 2331 if (!bol->is_Bool()) return false; 2332 assert( bol->req() > 1, "" ); 2333 return (bol->in(1)->Opcode() == Op_FastUnlock); 2334 } 2335 // probably not necessary to check for these 2336 if (x->is_Call() || x->is_SafePoint() || x->is_block_proj()) 2337 return false; 2338 } 2339 return false; 2340 } 2341 2342 //============================================================================= 2343 //---------------------------State--------------------------------------------- 2344 State::State(void) { 2345 #ifdef ASSERT 2346 _id = 0; 2347 _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe); 2348 _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d); 2349 //memset(_cost, -1, sizeof(_cost)); 2350 //memset(_rule, -1, sizeof(_rule)); 2351 #endif 2352 memset(_valid, 0, sizeof(_valid)); 2353 } 2354 2355 #ifdef ASSERT 2356 State::~State() { 2357 _id = 99; 2358 _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe); 2359 _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d); 2360 memset(_cost, -3, sizeof(_cost)); 2361 memset(_rule, -3, sizeof(_rule)); 2362 } 2363 #endif 2364 2365 #ifndef PRODUCT 2366 //---------------------------dump---------------------------------------------- 2367 void State::dump() { 2368 tty->print("\n"); 2369 dump(0); 2370 } 2371 2372 void State::dump(int depth) { 2373 for( int j = 0; j < depth; j++ ) 2374 tty->print(" "); 2375 tty->print("--N: "); 2376 _leaf->dump(); 2377 uint i; 2378 for( i = 0; i < _LAST_MACH_OPER; i++ ) 2379 // Check for valid entry 2380 if( valid(i) ) { 2381 for( int j = 0; j < depth; j++ ) 2382 tty->print(" "); 2383 assert(_cost[i] != max_juint, "cost must be a valid value"); 2384 assert(_rule[i] < _last_Mach_Node, "rule[i] must be valid rule"); 2385 tty->print_cr("%s %d %s", 2386 ruleName[i], _cost[i], ruleName[_rule[i]] ); 2387 } 2388 tty->print_cr(""); 2389 2390 for( i=0; i<2; i++ ) 2391 if( _kids[i] ) 2392 _kids[i]->dump(depth+1); 2393 } 2394 #endif