1 /* 2 * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "compiler/compileLog.hpp" 27 #include "gc/g1/g1SATBCardTableModRefBS.hpp" 28 #include "gc/g1/heapRegion.hpp" 29 #include "gc/shared/barrierSet.hpp" 30 #include "gc/shared/cardTableModRefBS.hpp" 31 #include "gc/shared/collectedHeap.hpp" 32 #include "gc/shenandoah/brooksPointer.hpp" 33 #include "gc/shenandoah/shenandoahConnectionMatrix.hpp" 34 #include "gc/shenandoah/shenandoahHeap.hpp" 35 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 36 #include "memory/resourceArea.hpp" 37 #include "opto/addnode.hpp" 38 #include "opto/castnode.hpp" 39 #include "opto/convertnode.hpp" 40 #include "opto/graphKit.hpp" 41 #include "opto/idealKit.hpp" 42 #include "opto/intrinsicnode.hpp" 43 #include "opto/locknode.hpp" 44 #include "opto/machnode.hpp" 45 #include "opto/opaquenode.hpp" 46 #include "opto/parse.hpp" 47 #include "opto/rootnode.hpp" 48 #include "opto/runtime.hpp" 49 #include "opto/shenandoahSupport.hpp" 50 #include "runtime/deoptimization.hpp" 51 #include "runtime/sharedRuntime.hpp" 52 53 //----------------------------GraphKit----------------------------------------- 54 // Main utility constructor. 55 GraphKit::GraphKit(JVMState* jvms) 56 : Phase(Phase::Parser), 57 _env(C->env()), 58 _gvn(*C->initial_gvn()) 59 { 60 _exceptions = jvms->map()->next_exception(); 61 if (_exceptions != NULL) jvms->map()->set_next_exception(NULL); 62 set_jvms(jvms); 63 } 64 65 // Private constructor for parser. 66 GraphKit::GraphKit() 67 : Phase(Phase::Parser), 68 _env(C->env()), 69 _gvn(*C->initial_gvn()) 70 { 71 _exceptions = NULL; 72 set_map(NULL); 73 debug_only(_sp = -99); 74 debug_only(set_bci(-99)); 75 } 76 77 78 79 //---------------------------clean_stack--------------------------------------- 80 // Clear away rubbish from the stack area of the JVM state. 81 // This destroys any arguments that may be waiting on the stack. 82 void GraphKit::clean_stack(int from_sp) { 83 SafePointNode* map = this->map(); 84 JVMState* jvms = this->jvms(); 85 int stk_size = jvms->stk_size(); 86 int stkoff = jvms->stkoff(); 87 Node* top = this->top(); 88 for (int i = from_sp; i < stk_size; i++) { 89 if (map->in(stkoff + i) != top) { 90 map->set_req(stkoff + i, top); 91 } 92 } 93 } 94 95 96 //--------------------------------sync_jvms----------------------------------- 97 // Make sure our current jvms agrees with our parse state. 98 JVMState* GraphKit::sync_jvms() const { 99 JVMState* jvms = this->jvms(); 100 jvms->set_bci(bci()); // Record the new bci in the JVMState 101 jvms->set_sp(sp()); // Record the new sp in the JVMState 102 assert(jvms_in_sync(), "jvms is now in sync"); 103 return jvms; 104 } 105 106 //--------------------------------sync_jvms_for_reexecute--------------------- 107 // Make sure our current jvms agrees with our parse state. This version 108 // uses the reexecute_sp for reexecuting bytecodes. 109 JVMState* GraphKit::sync_jvms_for_reexecute() { 110 JVMState* jvms = this->jvms(); 111 jvms->set_bci(bci()); // Record the new bci in the JVMState 112 jvms->set_sp(reexecute_sp()); // Record the new sp in the JVMState 113 return jvms; 114 } 115 116 #ifdef ASSERT 117 bool GraphKit::jvms_in_sync() const { 118 Parse* parse = is_Parse(); 119 if (parse == NULL) { 120 if (bci() != jvms()->bci()) return false; 121 if (sp() != (int)jvms()->sp()) return false; 122 return true; 123 } 124 if (jvms()->method() != parse->method()) return false; 125 if (jvms()->bci() != parse->bci()) return false; 126 int jvms_sp = jvms()->sp(); 127 if (jvms_sp != parse->sp()) return false; 128 int jvms_depth = jvms()->depth(); 129 if (jvms_depth != parse->depth()) return false; 130 return true; 131 } 132 133 // Local helper checks for special internal merge points 134 // used to accumulate and merge exception states. 135 // They are marked by the region's in(0) edge being the map itself. 136 // Such merge points must never "escape" into the parser at large, 137 // until they have been handed to gvn.transform. 138 static bool is_hidden_merge(Node* reg) { 139 if (reg == NULL) return false; 140 if (reg->is_Phi()) { 141 reg = reg->in(0); 142 if (reg == NULL) return false; 143 } 144 return reg->is_Region() && reg->in(0) != NULL && reg->in(0)->is_Root(); 145 } 146 147 void GraphKit::verify_map() const { 148 if (map() == NULL) return; // null map is OK 149 assert(map()->req() <= jvms()->endoff(), "no extra garbage on map"); 150 assert(!map()->has_exceptions(), "call add_exception_states_from 1st"); 151 assert(!is_hidden_merge(control()), "call use_exception_state, not set_map"); 152 } 153 154 void GraphKit::verify_exception_state(SafePointNode* ex_map) { 155 assert(ex_map->next_exception() == NULL, "not already part of a chain"); 156 assert(has_saved_ex_oop(ex_map), "every exception state has an ex_oop"); 157 } 158 #endif 159 160 //---------------------------stop_and_kill_map--------------------------------- 161 // Set _map to NULL, signalling a stop to further bytecode execution. 162 // First smash the current map's control to a constant, to mark it dead. 163 void GraphKit::stop_and_kill_map() { 164 SafePointNode* dead_map = stop(); 165 if (dead_map != NULL) { 166 dead_map->disconnect_inputs(NULL, C); // Mark the map as killed. 167 assert(dead_map->is_killed(), "must be so marked"); 168 } 169 } 170 171 172 //--------------------------------stopped-------------------------------------- 173 // Tell if _map is NULL, or control is top. 174 bool GraphKit::stopped() { 175 if (map() == NULL) return true; 176 else if (control() == top()) return true; 177 else return false; 178 } 179 180 181 //-----------------------------has_ex_handler---------------------------------- 182 // Tell if this method or any caller method has exception handlers. 183 bool GraphKit::has_ex_handler() { 184 for (JVMState* jvmsp = jvms(); jvmsp != NULL; jvmsp = jvmsp->caller()) { 185 if (jvmsp->has_method() && jvmsp->method()->has_exception_handlers()) { 186 return true; 187 } 188 } 189 return false; 190 } 191 192 //------------------------------save_ex_oop------------------------------------ 193 // Save an exception without blowing stack contents or other JVM state. 194 void GraphKit::set_saved_ex_oop(SafePointNode* ex_map, Node* ex_oop) { 195 assert(!has_saved_ex_oop(ex_map), "clear ex-oop before setting again"); 196 ex_map->add_req(ex_oop); 197 debug_only(verify_exception_state(ex_map)); 198 } 199 200 inline static Node* common_saved_ex_oop(SafePointNode* ex_map, bool clear_it) { 201 assert(GraphKit::has_saved_ex_oop(ex_map), "ex_oop must be there"); 202 Node* ex_oop = ex_map->in(ex_map->req()-1); 203 if (clear_it) ex_map->del_req(ex_map->req()-1); 204 return ex_oop; 205 } 206 207 //-----------------------------saved_ex_oop------------------------------------ 208 // Recover a saved exception from its map. 209 Node* GraphKit::saved_ex_oop(SafePointNode* ex_map) { 210 return common_saved_ex_oop(ex_map, false); 211 } 212 213 //--------------------------clear_saved_ex_oop--------------------------------- 214 // Erase a previously saved exception from its map. 215 Node* GraphKit::clear_saved_ex_oop(SafePointNode* ex_map) { 216 return common_saved_ex_oop(ex_map, true); 217 } 218 219 #ifdef ASSERT 220 //---------------------------has_saved_ex_oop---------------------------------- 221 // Erase a previously saved exception from its map. 222 bool GraphKit::has_saved_ex_oop(SafePointNode* ex_map) { 223 return ex_map->req() == ex_map->jvms()->endoff()+1; 224 } 225 #endif 226 227 //-------------------------make_exception_state-------------------------------- 228 // Turn the current JVM state into an exception state, appending the ex_oop. 229 SafePointNode* GraphKit::make_exception_state(Node* ex_oop) { 230 sync_jvms(); 231 SafePointNode* ex_map = stop(); // do not manipulate this map any more 232 set_saved_ex_oop(ex_map, ex_oop); 233 return ex_map; 234 } 235 236 237 //--------------------------add_exception_state-------------------------------- 238 // Add an exception to my list of exceptions. 239 void GraphKit::add_exception_state(SafePointNode* ex_map) { 240 if (ex_map == NULL || ex_map->control() == top()) { 241 return; 242 } 243 #ifdef ASSERT 244 verify_exception_state(ex_map); 245 if (has_exceptions()) { 246 assert(ex_map->jvms()->same_calls_as(_exceptions->jvms()), "all collected exceptions must come from the same place"); 247 } 248 #endif 249 250 // If there is already an exception of exactly this type, merge with it. 251 // In particular, null-checks and other low-level exceptions common up here. 252 Node* ex_oop = saved_ex_oop(ex_map); 253 const Type* ex_type = _gvn.type(ex_oop); 254 if (ex_oop == top()) { 255 // No action needed. 256 return; 257 } 258 assert(ex_type->isa_instptr(), "exception must be an instance"); 259 for (SafePointNode* e2 = _exceptions; e2 != NULL; e2 = e2->next_exception()) { 260 const Type* ex_type2 = _gvn.type(saved_ex_oop(e2)); 261 // We check sp also because call bytecodes can generate exceptions 262 // both before and after arguments are popped! 263 if (ex_type2 == ex_type 264 && e2->_jvms->sp() == ex_map->_jvms->sp()) { 265 combine_exception_states(ex_map, e2); 266 return; 267 } 268 } 269 270 // No pre-existing exception of the same type. Chain it on the list. 271 push_exception_state(ex_map); 272 } 273 274 //-----------------------add_exception_states_from----------------------------- 275 void GraphKit::add_exception_states_from(JVMState* jvms) { 276 SafePointNode* ex_map = jvms->map()->next_exception(); 277 if (ex_map != NULL) { 278 jvms->map()->set_next_exception(NULL); 279 for (SafePointNode* next_map; ex_map != NULL; ex_map = next_map) { 280 next_map = ex_map->next_exception(); 281 ex_map->set_next_exception(NULL); 282 add_exception_state(ex_map); 283 } 284 } 285 } 286 287 //-----------------------transfer_exceptions_into_jvms------------------------- 288 JVMState* GraphKit::transfer_exceptions_into_jvms() { 289 if (map() == NULL) { 290 // We need a JVMS to carry the exceptions, but the map has gone away. 291 // Create a scratch JVMS, cloned from any of the exception states... 292 if (has_exceptions()) { 293 _map = _exceptions; 294 _map = clone_map(); 295 _map->set_next_exception(NULL); 296 clear_saved_ex_oop(_map); 297 debug_only(verify_map()); 298 } else { 299 // ...or created from scratch 300 JVMState* jvms = new (C) JVMState(_method, NULL); 301 jvms->set_bci(_bci); 302 jvms->set_sp(_sp); 303 jvms->set_map(new SafePointNode(TypeFunc::Parms, jvms)); 304 set_jvms(jvms); 305 for (uint i = 0; i < map()->req(); i++) map()->init_req(i, top()); 306 set_all_memory(top()); 307 while (map()->req() < jvms->endoff()) map()->add_req(top()); 308 } 309 // (This is a kludge, in case you didn't notice.) 310 set_control(top()); 311 } 312 JVMState* jvms = sync_jvms(); 313 assert(!jvms->map()->has_exceptions(), "no exceptions on this map yet"); 314 jvms->map()->set_next_exception(_exceptions); 315 _exceptions = NULL; // done with this set of exceptions 316 return jvms; 317 } 318 319 static inline void add_n_reqs(Node* dstphi, Node* srcphi) { 320 assert(is_hidden_merge(dstphi), "must be a special merge node"); 321 assert(is_hidden_merge(srcphi), "must be a special merge node"); 322 uint limit = srcphi->req(); 323 for (uint i = PhiNode::Input; i < limit; i++) { 324 dstphi->add_req(srcphi->in(i)); 325 } 326 } 327 static inline void add_one_req(Node* dstphi, Node* src) { 328 assert(is_hidden_merge(dstphi), "must be a special merge node"); 329 assert(!is_hidden_merge(src), "must not be a special merge node"); 330 dstphi->add_req(src); 331 } 332 333 //-----------------------combine_exception_states------------------------------ 334 // This helper function combines exception states by building phis on a 335 // specially marked state-merging region. These regions and phis are 336 // untransformed, and can build up gradually. The region is marked by 337 // having a control input of its exception map, rather than NULL. Such 338 // regions do not appear except in this function, and in use_exception_state. 339 void GraphKit::combine_exception_states(SafePointNode* ex_map, SafePointNode* phi_map) { 340 if (failing()) return; // dying anyway... 341 JVMState* ex_jvms = ex_map->_jvms; 342 assert(ex_jvms->same_calls_as(phi_map->_jvms), "consistent call chains"); 343 assert(ex_jvms->stkoff() == phi_map->_jvms->stkoff(), "matching locals"); 344 assert(ex_jvms->sp() == phi_map->_jvms->sp(), "matching stack sizes"); 345 assert(ex_jvms->monoff() == phi_map->_jvms->monoff(), "matching JVMS"); 346 assert(ex_jvms->scloff() == phi_map->_jvms->scloff(), "matching scalar replaced objects"); 347 assert(ex_map->req() == phi_map->req(), "matching maps"); 348 uint tos = ex_jvms->stkoff() + ex_jvms->sp(); 349 Node* hidden_merge_mark = root(); 350 Node* region = phi_map->control(); 351 MergeMemNode* phi_mem = phi_map->merged_memory(); 352 MergeMemNode* ex_mem = ex_map->merged_memory(); 353 if (region->in(0) != hidden_merge_mark) { 354 // The control input is not (yet) a specially-marked region in phi_map. 355 // Make it so, and build some phis. 356 region = new RegionNode(2); 357 _gvn.set_type(region, Type::CONTROL); 358 region->set_req(0, hidden_merge_mark); // marks an internal ex-state 359 region->init_req(1, phi_map->control()); 360 phi_map->set_control(region); 361 Node* io_phi = PhiNode::make(region, phi_map->i_o(), Type::ABIO); 362 record_for_igvn(io_phi); 363 _gvn.set_type(io_phi, Type::ABIO); 364 phi_map->set_i_o(io_phi); 365 for (MergeMemStream mms(phi_mem); mms.next_non_empty(); ) { 366 Node* m = mms.memory(); 367 Node* m_phi = PhiNode::make(region, m, Type::MEMORY, mms.adr_type(C)); 368 record_for_igvn(m_phi); 369 _gvn.set_type(m_phi, Type::MEMORY); 370 mms.set_memory(m_phi); 371 } 372 } 373 374 // Either or both of phi_map and ex_map might already be converted into phis. 375 Node* ex_control = ex_map->control(); 376 // if there is special marking on ex_map also, we add multiple edges from src 377 bool add_multiple = (ex_control->in(0) == hidden_merge_mark); 378 // how wide was the destination phi_map, originally? 379 uint orig_width = region->req(); 380 381 if (add_multiple) { 382 add_n_reqs(region, ex_control); 383 add_n_reqs(phi_map->i_o(), ex_map->i_o()); 384 } else { 385 // ex_map has no merges, so we just add single edges everywhere 386 add_one_req(region, ex_control); 387 add_one_req(phi_map->i_o(), ex_map->i_o()); 388 } 389 for (MergeMemStream mms(phi_mem, ex_mem); mms.next_non_empty2(); ) { 390 if (mms.is_empty()) { 391 // get a copy of the base memory, and patch some inputs into it 392 const TypePtr* adr_type = mms.adr_type(C); 393 Node* phi = mms.force_memory()->as_Phi()->slice_memory(adr_type); 394 assert(phi->as_Phi()->region() == mms.base_memory()->in(0), ""); 395 mms.set_memory(phi); 396 // Prepare to append interesting stuff onto the newly sliced phi: 397 while (phi->req() > orig_width) phi->del_req(phi->req()-1); 398 } 399 // Append stuff from ex_map: 400 if (add_multiple) { 401 add_n_reqs(mms.memory(), mms.memory2()); 402 } else { 403 add_one_req(mms.memory(), mms.memory2()); 404 } 405 } 406 uint limit = ex_map->req(); 407 for (uint i = TypeFunc::Parms; i < limit; i++) { 408 // Skip everything in the JVMS after tos. (The ex_oop follows.) 409 if (i == tos) i = ex_jvms->monoff(); 410 Node* src = ex_map->in(i); 411 Node* dst = phi_map->in(i); 412 if (src != dst) { 413 PhiNode* phi; 414 if (dst->in(0) != region) { 415 dst = phi = PhiNode::make(region, dst, _gvn.type(dst)); 416 record_for_igvn(phi); 417 _gvn.set_type(phi, phi->type()); 418 phi_map->set_req(i, dst); 419 // Prepare to append interesting stuff onto the new phi: 420 while (dst->req() > orig_width) dst->del_req(dst->req()-1); 421 } else { 422 assert(dst->is_Phi(), "nobody else uses a hidden region"); 423 phi = dst->as_Phi(); 424 } 425 if (add_multiple && src->in(0) == ex_control) { 426 // Both are phis. 427 add_n_reqs(dst, src); 428 } else { 429 while (dst->req() < region->req()) add_one_req(dst, src); 430 } 431 const Type* srctype = _gvn.type(src); 432 if (phi->type() != srctype) { 433 const Type* dsttype = phi->type()->meet_speculative(srctype); 434 if (phi->type() != dsttype) { 435 phi->set_type(dsttype); 436 _gvn.set_type(phi, dsttype); 437 } 438 } 439 } 440 } 441 phi_map->merge_replaced_nodes_with(ex_map); 442 } 443 444 //--------------------------use_exception_state-------------------------------- 445 Node* GraphKit::use_exception_state(SafePointNode* phi_map) { 446 if (failing()) { stop(); return top(); } 447 Node* region = phi_map->control(); 448 Node* hidden_merge_mark = root(); 449 assert(phi_map->jvms()->map() == phi_map, "sanity: 1-1 relation"); 450 Node* ex_oop = clear_saved_ex_oop(phi_map); 451 if (region->in(0) == hidden_merge_mark) { 452 // Special marking for internal ex-states. Process the phis now. 453 region->set_req(0, region); // now it's an ordinary region 454 set_jvms(phi_map->jvms()); // ...so now we can use it as a map 455 // Note: Setting the jvms also sets the bci and sp. 456 set_control(_gvn.transform(region)); 457 uint tos = jvms()->stkoff() + sp(); 458 for (uint i = 1; i < tos; i++) { 459 Node* x = phi_map->in(i); 460 if (x->in(0) == region) { 461 assert(x->is_Phi(), "expected a special phi"); 462 phi_map->set_req(i, _gvn.transform(x)); 463 } 464 } 465 for (MergeMemStream mms(merged_memory()); mms.next_non_empty(); ) { 466 Node* x = mms.memory(); 467 if (x->in(0) == region) { 468 assert(x->is_Phi(), "nobody else uses a hidden region"); 469 mms.set_memory(_gvn.transform(x)); 470 } 471 } 472 if (ex_oop->in(0) == region) { 473 assert(ex_oop->is_Phi(), "expected a special phi"); 474 ex_oop = _gvn.transform(ex_oop); 475 } 476 } else { 477 set_jvms(phi_map->jvms()); 478 } 479 480 assert(!is_hidden_merge(phi_map->control()), "hidden ex. states cleared"); 481 assert(!is_hidden_merge(phi_map->i_o()), "hidden ex. states cleared"); 482 return ex_oop; 483 } 484 485 //---------------------------------java_bc------------------------------------- 486 Bytecodes::Code GraphKit::java_bc() const { 487 ciMethod* method = this->method(); 488 int bci = this->bci(); 489 if (method != NULL && bci != InvocationEntryBci) 490 return method->java_code_at_bci(bci); 491 else 492 return Bytecodes::_illegal; 493 } 494 495 void GraphKit::uncommon_trap_if_should_post_on_exceptions(Deoptimization::DeoptReason reason, 496 bool must_throw) { 497 // if the exception capability is set, then we will generate code 498 // to check the JavaThread.should_post_on_exceptions flag to see 499 // if we actually need to report exception events (for this 500 // thread). If we don't need to report exception events, we will 501 // take the normal fast path provided by add_exception_events. If 502 // exception event reporting is enabled for this thread, we will 503 // take the uncommon_trap in the BuildCutout below. 504 505 // first must access the should_post_on_exceptions_flag in this thread's JavaThread 506 Node* jthread = _gvn.transform(new ThreadLocalNode()); 507 Node* adr = basic_plus_adr(top(), jthread, in_bytes(JavaThread::should_post_on_exceptions_flag_offset())); 508 Node* should_post_flag = make_load(control(), adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw, MemNode::unordered); 509 510 // Test the should_post_on_exceptions_flag vs. 0 511 Node* chk = _gvn.transform( new CmpINode(should_post_flag, intcon(0)) ); 512 Node* tst = _gvn.transform( new BoolNode(chk, BoolTest::eq) ); 513 514 // Branch to slow_path if should_post_on_exceptions_flag was true 515 { BuildCutout unless(this, tst, PROB_MAX); 516 // Do not try anything fancy if we're notifying the VM on every throw. 517 // Cf. case Bytecodes::_athrow in parse2.cpp. 518 uncommon_trap(reason, Deoptimization::Action_none, 519 (ciKlass*)NULL, (char*)NULL, must_throw); 520 } 521 522 } 523 524 //------------------------------builtin_throw---------------------------------- 525 void GraphKit::builtin_throw(Deoptimization::DeoptReason reason, Node* arg) { 526 bool must_throw = true; 527 528 if (env()->jvmti_can_post_on_exceptions()) { 529 // check if we must post exception events, take uncommon trap if so 530 uncommon_trap_if_should_post_on_exceptions(reason, must_throw); 531 // here if should_post_on_exceptions is false 532 // continue on with the normal codegen 533 } 534 535 // If this particular condition has not yet happened at this 536 // bytecode, then use the uncommon trap mechanism, and allow for 537 // a future recompilation if several traps occur here. 538 // If the throw is hot, try to use a more complicated inline mechanism 539 // which keeps execution inside the compiled code. 540 bool treat_throw_as_hot = false; 541 ciMethodData* md = method()->method_data(); 542 543 if (ProfileTraps) { 544 if (too_many_traps(reason)) { 545 treat_throw_as_hot = true; 546 } 547 // (If there is no MDO at all, assume it is early in 548 // execution, and that any deopts are part of the 549 // startup transient, and don't need to be remembered.) 550 551 // Also, if there is a local exception handler, treat all throws 552 // as hot if there has been at least one in this method. 553 if (C->trap_count(reason) != 0 554 && method()->method_data()->trap_count(reason) != 0 555 && has_ex_handler()) { 556 treat_throw_as_hot = true; 557 } 558 } 559 560 // If this throw happens frequently, an uncommon trap might cause 561 // a performance pothole. If there is a local exception handler, 562 // and if this particular bytecode appears to be deoptimizing often, 563 // let us handle the throw inline, with a preconstructed instance. 564 // Note: If the deopt count has blown up, the uncommon trap 565 // runtime is going to flush this nmethod, not matter what. 566 if (treat_throw_as_hot 567 && (!StackTraceInThrowable || OmitStackTraceInFastThrow)) { 568 // If the throw is local, we use a pre-existing instance and 569 // punt on the backtrace. This would lead to a missing backtrace 570 // (a repeat of 4292742) if the backtrace object is ever asked 571 // for its backtrace. 572 // Fixing this remaining case of 4292742 requires some flavor of 573 // escape analysis. Leave that for the future. 574 ciInstance* ex_obj = NULL; 575 switch (reason) { 576 case Deoptimization::Reason_null_check: 577 ex_obj = env()->NullPointerException_instance(); 578 break; 579 case Deoptimization::Reason_div0_check: 580 ex_obj = env()->ArithmeticException_instance(); 581 break; 582 case Deoptimization::Reason_range_check: 583 ex_obj = env()->ArrayIndexOutOfBoundsException_instance(); 584 break; 585 case Deoptimization::Reason_class_check: 586 if (java_bc() == Bytecodes::_aastore) { 587 ex_obj = env()->ArrayStoreException_instance(); 588 } else { 589 ex_obj = env()->ClassCastException_instance(); 590 } 591 break; 592 } 593 if (failing()) { stop(); return; } // exception allocation might fail 594 if (ex_obj != NULL) { 595 // Cheat with a preallocated exception object. 596 if (C->log() != NULL) 597 C->log()->elem("hot_throw preallocated='1' reason='%s'", 598 Deoptimization::trap_reason_name(reason)); 599 const TypeInstPtr* ex_con = TypeInstPtr::make(ex_obj); 600 Node* ex_node = _gvn.transform(ConNode::make(ex_con)); 601 602 // Clear the detail message of the preallocated exception object. 603 // Weblogic sometimes mutates the detail message of exceptions 604 // using reflection. 605 int offset = java_lang_Throwable::get_detailMessage_offset(); 606 const TypePtr* adr_typ = ex_con->add_offset(offset); 607 608 Node *adr = basic_plus_adr(ex_node, ex_node, offset); 609 const TypeOopPtr* val_type = TypeOopPtr::make_from_klass(env()->String_klass()); 610 // Conservatively release stores of object references. 611 Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT, MemNode::release); 612 613 add_exception_state(make_exception_state(ex_node)); 614 return; 615 } 616 } 617 618 // %%% Maybe add entry to OptoRuntime which directly throws the exc.? 619 // It won't be much cheaper than bailing to the interp., since we'll 620 // have to pass up all the debug-info, and the runtime will have to 621 // create the stack trace. 622 623 // Usual case: Bail to interpreter. 624 // Reserve the right to recompile if we haven't seen anything yet. 625 626 ciMethod* m = Deoptimization::reason_is_speculate(reason) ? C->method() : NULL; 627 Deoptimization::DeoptAction action = Deoptimization::Action_maybe_recompile; 628 if (treat_throw_as_hot 629 && (method()->method_data()->trap_recompiled_at(bci(), m) 630 || C->too_many_traps(reason))) { 631 // We cannot afford to take more traps here. Suffer in the interpreter. 632 if (C->log() != NULL) 633 C->log()->elem("hot_throw preallocated='0' reason='%s' mcount='%d'", 634 Deoptimization::trap_reason_name(reason), 635 C->trap_count(reason)); 636 action = Deoptimization::Action_none; 637 } 638 639 // "must_throw" prunes the JVM state to include only the stack, if there 640 // are no local exception handlers. This should cut down on register 641 // allocation time and code size, by drastically reducing the number 642 // of in-edges on the call to the uncommon trap. 643 644 uncommon_trap(reason, action, (ciKlass*)NULL, (char*)NULL, must_throw); 645 } 646 647 648 //----------------------------PreserveJVMState--------------------------------- 649 PreserveJVMState::PreserveJVMState(GraphKit* kit, bool clone_map) { 650 debug_only(kit->verify_map()); 651 _kit = kit; 652 _map = kit->map(); // preserve the map 653 _sp = kit->sp(); 654 kit->set_map(clone_map ? kit->clone_map() : NULL); 655 #ifdef ASSERT 656 _bci = kit->bci(); 657 Parse* parser = kit->is_Parse(); 658 int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo(); 659 _block = block; 660 #endif 661 } 662 PreserveJVMState::~PreserveJVMState() { 663 GraphKit* kit = _kit; 664 #ifdef ASSERT 665 assert(kit->bci() == _bci, "bci must not shift"); 666 Parse* parser = kit->is_Parse(); 667 int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo(); 668 assert(block == _block, "block must not shift"); 669 #endif 670 kit->set_map(_map); 671 kit->set_sp(_sp); 672 } 673 674 675 //-----------------------------BuildCutout------------------------------------- 676 BuildCutout::BuildCutout(GraphKit* kit, Node* p, float prob, float cnt) 677 : PreserveJVMState(kit) 678 { 679 assert(p->is_Con() || p->is_Bool(), "test must be a bool"); 680 SafePointNode* outer_map = _map; // preserved map is caller's 681 SafePointNode* inner_map = kit->map(); 682 IfNode* iff = kit->create_and_map_if(outer_map->control(), p, prob, cnt); 683 outer_map->set_control(kit->gvn().transform( new IfTrueNode(iff) )); 684 inner_map->set_control(kit->gvn().transform( new IfFalseNode(iff) )); 685 } 686 BuildCutout::~BuildCutout() { 687 GraphKit* kit = _kit; 688 assert(kit->stopped(), "cutout code must stop, throw, return, etc."); 689 } 690 691 //---------------------------PreserveReexecuteState---------------------------- 692 PreserveReexecuteState::PreserveReexecuteState(GraphKit* kit) { 693 assert(!kit->stopped(), "must call stopped() before"); 694 _kit = kit; 695 _sp = kit->sp(); 696 _reexecute = kit->jvms()->_reexecute; 697 } 698 PreserveReexecuteState::~PreserveReexecuteState() { 699 if (_kit->stopped()) return; 700 _kit->jvms()->_reexecute = _reexecute; 701 _kit->set_sp(_sp); 702 } 703 704 //------------------------------clone_map-------------------------------------- 705 // Implementation of PreserveJVMState 706 // 707 // Only clone_map(...) here. If this function is only used in the 708 // PreserveJVMState class we may want to get rid of this extra 709 // function eventually and do it all there. 710 711 SafePointNode* GraphKit::clone_map() { 712 if (map() == NULL) return NULL; 713 714 // Clone the memory edge first 715 Node* mem = MergeMemNode::make(map()->memory()); 716 gvn().set_type_bottom(mem); 717 718 SafePointNode *clonemap = (SafePointNode*)map()->clone(); 719 JVMState* jvms = this->jvms(); 720 JVMState* clonejvms = jvms->clone_shallow(C); 721 clonemap->set_memory(mem); 722 clonemap->set_jvms(clonejvms); 723 clonejvms->set_map(clonemap); 724 record_for_igvn(clonemap); 725 gvn().set_type_bottom(clonemap); 726 return clonemap; 727 } 728 729 730 //-----------------------------set_map_clone----------------------------------- 731 void GraphKit::set_map_clone(SafePointNode* m) { 732 _map = m; 733 _map = clone_map(); 734 _map->set_next_exception(NULL); 735 debug_only(verify_map()); 736 } 737 738 739 //----------------------------kill_dead_locals--------------------------------- 740 // Detect any locals which are known to be dead, and force them to top. 741 void GraphKit::kill_dead_locals() { 742 // Consult the liveness information for the locals. If any 743 // of them are unused, then they can be replaced by top(). This 744 // should help register allocation time and cut down on the size 745 // of the deoptimization information. 746 747 // This call is made from many of the bytecode handling 748 // subroutines called from the Big Switch in do_one_bytecode. 749 // Every bytecode which might include a slow path is responsible 750 // for killing its dead locals. The more consistent we 751 // are about killing deads, the fewer useless phis will be 752 // constructed for them at various merge points. 753 754 // bci can be -1 (InvocationEntryBci). We return the entry 755 // liveness for the method. 756 757 if (method() == NULL || method()->code_size() == 0) { 758 // We are building a graph for a call to a native method. 759 // All locals are live. 760 return; 761 } 762 763 ResourceMark rm; 764 765 // Consult the liveness information for the locals. If any 766 // of them are unused, then they can be replaced by top(). This 767 // should help register allocation time and cut down on the size 768 // of the deoptimization information. 769 MethodLivenessResult live_locals = method()->liveness_at_bci(bci()); 770 771 int len = (int)live_locals.size(); 772 assert(len <= jvms()->loc_size(), "too many live locals"); 773 for (int local = 0; local < len; local++) { 774 if (!live_locals.at(local)) { 775 set_local(local, top()); 776 } 777 } 778 } 779 780 #ifdef ASSERT 781 //-------------------------dead_locals_are_killed------------------------------ 782 // Return true if all dead locals are set to top in the map. 783 // Used to assert "clean" debug info at various points. 784 bool GraphKit::dead_locals_are_killed() { 785 if (method() == NULL || method()->code_size() == 0) { 786 // No locals need to be dead, so all is as it should be. 787 return true; 788 } 789 790 // Make sure somebody called kill_dead_locals upstream. 791 ResourceMark rm; 792 for (JVMState* jvms = this->jvms(); jvms != NULL; jvms = jvms->caller()) { 793 if (jvms->loc_size() == 0) continue; // no locals to consult 794 SafePointNode* map = jvms->map(); 795 ciMethod* method = jvms->method(); 796 int bci = jvms->bci(); 797 if (jvms == this->jvms()) { 798 bci = this->bci(); // it might not yet be synched 799 } 800 MethodLivenessResult live_locals = method->liveness_at_bci(bci); 801 int len = (int)live_locals.size(); 802 if (!live_locals.is_valid() || len == 0) 803 // This method is trivial, or is poisoned by a breakpoint. 804 return true; 805 assert(len == jvms->loc_size(), "live map consistent with locals map"); 806 for (int local = 0; local < len; local++) { 807 if (!live_locals.at(local) && map->local(jvms, local) != top()) { 808 if (PrintMiscellaneous && (Verbose || WizardMode)) { 809 tty->print_cr("Zombie local %d: ", local); 810 jvms->dump(); 811 } 812 return false; 813 } 814 } 815 } 816 return true; 817 } 818 819 #endif //ASSERT 820 821 // Helper function for enforcing certain bytecodes to reexecute if 822 // deoptimization happens 823 static bool should_reexecute_implied_by_bytecode(JVMState *jvms, bool is_anewarray) { 824 ciMethod* cur_method = jvms->method(); 825 int cur_bci = jvms->bci(); 826 if (cur_method != NULL && cur_bci != InvocationEntryBci) { 827 Bytecodes::Code code = cur_method->java_code_at_bci(cur_bci); 828 return Interpreter::bytecode_should_reexecute(code) || 829 is_anewarray && code == Bytecodes::_multianewarray; 830 // Reexecute _multianewarray bytecode which was replaced with 831 // sequence of [a]newarray. See Parse::do_multianewarray(). 832 // 833 // Note: interpreter should not have it set since this optimization 834 // is limited by dimensions and guarded by flag so in some cases 835 // multianewarray() runtime calls will be generated and 836 // the bytecode should not be reexecutes (stack will not be reset). 837 } else 838 return false; 839 } 840 841 // Helper function for adding JVMState and debug information to node 842 void GraphKit::add_safepoint_edges(SafePointNode* call, bool must_throw) { 843 // Add the safepoint edges to the call (or other safepoint). 844 845 // Make sure dead locals are set to top. This 846 // should help register allocation time and cut down on the size 847 // of the deoptimization information. 848 assert(dead_locals_are_killed(), "garbage in debug info before safepoint"); 849 850 // Walk the inline list to fill in the correct set of JVMState's 851 // Also fill in the associated edges for each JVMState. 852 853 // If the bytecode needs to be reexecuted we need to put 854 // the arguments back on the stack. 855 const bool should_reexecute = jvms()->should_reexecute(); 856 JVMState* youngest_jvms = should_reexecute ? sync_jvms_for_reexecute() : sync_jvms(); 857 858 // NOTE: set_bci (called from sync_jvms) might reset the reexecute bit to 859 // undefined if the bci is different. This is normal for Parse but it 860 // should not happen for LibraryCallKit because only one bci is processed. 861 assert(!is_LibraryCallKit() || (jvms()->should_reexecute() == should_reexecute), 862 "in LibraryCallKit the reexecute bit should not change"); 863 864 // If we are guaranteed to throw, we can prune everything but the 865 // input to the current bytecode. 866 bool can_prune_locals = false; 867 uint stack_slots_not_pruned = 0; 868 int inputs = 0, depth = 0; 869 if (must_throw) { 870 assert(method() == youngest_jvms->method(), "sanity"); 871 if (compute_stack_effects(inputs, depth)) { 872 can_prune_locals = true; 873 stack_slots_not_pruned = inputs; 874 } 875 } 876 877 if (env()->should_retain_local_variables()) { 878 // At any safepoint, this method can get breakpointed, which would 879 // then require an immediate deoptimization. 880 can_prune_locals = false; // do not prune locals 881 stack_slots_not_pruned = 0; 882 } 883 884 // do not scribble on the input jvms 885 JVMState* out_jvms = youngest_jvms->clone_deep(C); 886 call->set_jvms(out_jvms); // Start jvms list for call node 887 888 // For a known set of bytecodes, the interpreter should reexecute them if 889 // deoptimization happens. We set the reexecute state for them here 890 if (out_jvms->is_reexecute_undefined() && //don't change if already specified 891 should_reexecute_implied_by_bytecode(out_jvms, call->is_AllocateArray())) { 892 out_jvms->set_should_reexecute(true); //NOTE: youngest_jvms not changed 893 } 894 895 // Presize the call: 896 DEBUG_ONLY(uint non_debug_edges = call->req()); 897 call->add_req_batch(top(), youngest_jvms->debug_depth()); 898 assert(call->req() == non_debug_edges + youngest_jvms->debug_depth(), ""); 899 900 // Set up edges so that the call looks like this: 901 // Call [state:] ctl io mem fptr retadr 902 // [parms:] parm0 ... parmN 903 // [root:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN 904 // [...mid:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN [...] 905 // [young:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN 906 // Note that caller debug info precedes callee debug info. 907 908 // Fill pointer walks backwards from "young:" to "root:" in the diagram above: 909 uint debug_ptr = call->req(); 910 911 // Loop over the map input edges associated with jvms, add them 912 // to the call node, & reset all offsets to match call node array. 913 for (JVMState* in_jvms = youngest_jvms; in_jvms != NULL; ) { 914 uint debug_end = debug_ptr; 915 uint debug_start = debug_ptr - in_jvms->debug_size(); 916 debug_ptr = debug_start; // back up the ptr 917 918 uint p = debug_start; // walks forward in [debug_start, debug_end) 919 uint j, k, l; 920 SafePointNode* in_map = in_jvms->map(); 921 out_jvms->set_map(call); 922 923 if (can_prune_locals) { 924 assert(in_jvms->method() == out_jvms->method(), "sanity"); 925 // If the current throw can reach an exception handler in this JVMS, 926 // then we must keep everything live that can reach that handler. 927 // As a quick and dirty approximation, we look for any handlers at all. 928 if (in_jvms->method()->has_exception_handlers()) { 929 can_prune_locals = false; 930 } 931 } 932 933 // Add the Locals 934 k = in_jvms->locoff(); 935 l = in_jvms->loc_size(); 936 out_jvms->set_locoff(p); 937 if (!can_prune_locals) { 938 for (j = 0; j < l; j++) 939 call->set_req(p++, in_map->in(k+j)); 940 } else { 941 p += l; // already set to top above by add_req_batch 942 } 943 944 // Add the Expression Stack 945 k = in_jvms->stkoff(); 946 l = in_jvms->sp(); 947 out_jvms->set_stkoff(p); 948 if (!can_prune_locals) { 949 for (j = 0; j < l; j++) 950 call->set_req(p++, in_map->in(k+j)); 951 } else if (can_prune_locals && stack_slots_not_pruned != 0) { 952 // Divide stack into {S0,...,S1}, where S0 is set to top. 953 uint s1 = stack_slots_not_pruned; 954 stack_slots_not_pruned = 0; // for next iteration 955 if (s1 > l) s1 = l; 956 uint s0 = l - s1; 957 p += s0; // skip the tops preinstalled by add_req_batch 958 for (j = s0; j < l; j++) 959 call->set_req(p++, in_map->in(k+j)); 960 } else { 961 p += l; // already set to top above by add_req_batch 962 } 963 964 // Add the Monitors 965 k = in_jvms->monoff(); 966 l = in_jvms->mon_size(); 967 out_jvms->set_monoff(p); 968 for (j = 0; j < l; j++) 969 call->set_req(p++, in_map->in(k+j)); 970 971 // Copy any scalar object fields. 972 k = in_jvms->scloff(); 973 l = in_jvms->scl_size(); 974 out_jvms->set_scloff(p); 975 for (j = 0; j < l; j++) 976 call->set_req(p++, in_map->in(k+j)); 977 978 // Finish the new jvms. 979 out_jvms->set_endoff(p); 980 981 assert(out_jvms->endoff() == debug_end, "fill ptr must match"); 982 assert(out_jvms->depth() == in_jvms->depth(), "depth must match"); 983 assert(out_jvms->loc_size() == in_jvms->loc_size(), "size must match"); 984 assert(out_jvms->mon_size() == in_jvms->mon_size(), "size must match"); 985 assert(out_jvms->scl_size() == in_jvms->scl_size(), "size must match"); 986 assert(out_jvms->debug_size() == in_jvms->debug_size(), "size must match"); 987 988 // Update the two tail pointers in parallel. 989 out_jvms = out_jvms->caller(); 990 in_jvms = in_jvms->caller(); 991 } 992 993 assert(debug_ptr == non_debug_edges, "debug info must fit exactly"); 994 995 // Test the correctness of JVMState::debug_xxx accessors: 996 assert(call->jvms()->debug_start() == non_debug_edges, ""); 997 assert(call->jvms()->debug_end() == call->req(), ""); 998 assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, ""); 999 } 1000 1001 bool GraphKit::compute_stack_effects(int& inputs, int& depth) { 1002 Bytecodes::Code code = java_bc(); 1003 if (code == Bytecodes::_wide) { 1004 code = method()->java_code_at_bci(bci() + 1); 1005 } 1006 1007 BasicType rtype = T_ILLEGAL; 1008 int rsize = 0; 1009 1010 if (code != Bytecodes::_illegal) { 1011 depth = Bytecodes::depth(code); // checkcast=0, athrow=-1 1012 rtype = Bytecodes::result_type(code); // checkcast=P, athrow=V 1013 if (rtype < T_CONFLICT) 1014 rsize = type2size[rtype]; 1015 } 1016 1017 switch (code) { 1018 case Bytecodes::_illegal: 1019 return false; 1020 1021 case Bytecodes::_ldc: 1022 case Bytecodes::_ldc_w: 1023 case Bytecodes::_ldc2_w: 1024 inputs = 0; 1025 break; 1026 1027 case Bytecodes::_dup: inputs = 1; break; 1028 case Bytecodes::_dup_x1: inputs = 2; break; 1029 case Bytecodes::_dup_x2: inputs = 3; break; 1030 case Bytecodes::_dup2: inputs = 2; break; 1031 case Bytecodes::_dup2_x1: inputs = 3; break; 1032 case Bytecodes::_dup2_x2: inputs = 4; break; 1033 case Bytecodes::_swap: inputs = 2; break; 1034 case Bytecodes::_arraylength: inputs = 1; break; 1035 1036 case Bytecodes::_getstatic: 1037 case Bytecodes::_putstatic: 1038 case Bytecodes::_getfield: 1039 case Bytecodes::_putfield: 1040 { 1041 bool ignored_will_link; 1042 ciField* field = method()->get_field_at_bci(bci(), ignored_will_link); 1043 int size = field->type()->size(); 1044 bool is_get = (depth >= 0), is_static = (depth & 1); 1045 inputs = (is_static ? 0 : 1); 1046 if (is_get) { 1047 depth = size - inputs; 1048 } else { 1049 inputs += size; // putxxx pops the value from the stack 1050 depth = - inputs; 1051 } 1052 } 1053 break; 1054 1055 case Bytecodes::_invokevirtual: 1056 case Bytecodes::_invokespecial: 1057 case Bytecodes::_invokestatic: 1058 case Bytecodes::_invokedynamic: 1059 case Bytecodes::_invokeinterface: 1060 { 1061 bool ignored_will_link; 1062 ciSignature* declared_signature = NULL; 1063 ciMethod* ignored_callee = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature); 1064 assert(declared_signature != NULL, "cannot be null"); 1065 inputs = declared_signature->arg_size_for_bc(code); 1066 int size = declared_signature->return_type()->size(); 1067 depth = size - inputs; 1068 } 1069 break; 1070 1071 case Bytecodes::_multianewarray: 1072 { 1073 ciBytecodeStream iter(method()); 1074 iter.reset_to_bci(bci()); 1075 iter.next(); 1076 inputs = iter.get_dimensions(); 1077 assert(rsize == 1, ""); 1078 depth = rsize - inputs; 1079 } 1080 break; 1081 1082 case Bytecodes::_ireturn: 1083 case Bytecodes::_lreturn: 1084 case Bytecodes::_freturn: 1085 case Bytecodes::_dreturn: 1086 case Bytecodes::_areturn: 1087 assert(rsize == -depth, ""); 1088 inputs = rsize; 1089 break; 1090 1091 case Bytecodes::_jsr: 1092 case Bytecodes::_jsr_w: 1093 inputs = 0; 1094 depth = 1; // S.B. depth=1, not zero 1095 break; 1096 1097 default: 1098 // bytecode produces a typed result 1099 inputs = rsize - depth; 1100 assert(inputs >= 0, ""); 1101 break; 1102 } 1103 1104 #ifdef ASSERT 1105 // spot check 1106 int outputs = depth + inputs; 1107 assert(outputs >= 0, "sanity"); 1108 switch (code) { 1109 case Bytecodes::_checkcast: assert(inputs == 1 && outputs == 1, ""); break; 1110 case Bytecodes::_athrow: assert(inputs == 1 && outputs == 0, ""); break; 1111 case Bytecodes::_aload_0: assert(inputs == 0 && outputs == 1, ""); break; 1112 case Bytecodes::_return: assert(inputs == 0 && outputs == 0, ""); break; 1113 case Bytecodes::_drem: assert(inputs == 4 && outputs == 2, ""); break; 1114 } 1115 #endif //ASSERT 1116 1117 return true; 1118 } 1119 1120 1121 1122 //------------------------------basic_plus_adr--------------------------------- 1123 Node* GraphKit::basic_plus_adr(Node* base, Node* ptr, Node* offset) { 1124 // short-circuit a common case 1125 if (offset == intcon(0)) return ptr; 1126 return _gvn.transform( new AddPNode(base, ptr, offset) ); 1127 } 1128 1129 Node* GraphKit::ConvI2L(Node* offset) { 1130 // short-circuit a common case 1131 jint offset_con = find_int_con(offset, Type::OffsetBot); 1132 if (offset_con != Type::OffsetBot) { 1133 return longcon((jlong) offset_con); 1134 } 1135 return _gvn.transform( new ConvI2LNode(offset)); 1136 } 1137 1138 Node* GraphKit::ConvI2UL(Node* offset) { 1139 juint offset_con = (juint) find_int_con(offset, Type::OffsetBot); 1140 if (offset_con != (juint) Type::OffsetBot) { 1141 return longcon((julong) offset_con); 1142 } 1143 Node* conv = _gvn.transform( new ConvI2LNode(offset)); 1144 Node* mask = _gvn.transform(ConLNode::make((julong) max_juint)); 1145 return _gvn.transform( new AndLNode(conv, mask) ); 1146 } 1147 1148 Node* GraphKit::ConvL2I(Node* offset) { 1149 // short-circuit a common case 1150 jlong offset_con = find_long_con(offset, (jlong)Type::OffsetBot); 1151 if (offset_con != (jlong)Type::OffsetBot) { 1152 return intcon((int) offset_con); 1153 } 1154 return _gvn.transform( new ConvL2INode(offset)); 1155 } 1156 1157 //-------------------------load_object_klass----------------------------------- 1158 Node* GraphKit::load_object_klass(Node* obj) { 1159 // Special-case a fresh allocation to avoid building nodes: 1160 Node* akls = AllocateNode::Ideal_klass(obj, &_gvn); 1161 if (akls != NULL) return akls; 1162 if (ShenandoahVerifyReadsToFromSpace) { 1163 obj = shenandoah_read_barrier(obj); 1164 } 1165 Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes()); 1166 return _gvn.transform(LoadKlassNode::make(_gvn, NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS)); 1167 } 1168 1169 //-------------------------load_array_length----------------------------------- 1170 Node* GraphKit::load_array_length(Node* array) { 1171 // Special-case a fresh allocation to avoid building nodes: 1172 AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(array, &_gvn); 1173 Node *alen; 1174 if (alloc == NULL) { 1175 if (ShenandoahVerifyReadsToFromSpace) { 1176 array = shenandoah_read_barrier(array); 1177 } 1178 1179 Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes()); 1180 alen = _gvn.transform( new LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS)); 1181 } else { 1182 alen = alloc->Ideal_length(); 1183 Node* ccast = alloc->make_ideal_length(_gvn.type(array)->is_oopptr(), &_gvn); 1184 if (ccast != alen) { 1185 alen = _gvn.transform(ccast); 1186 } 1187 } 1188 return alen; 1189 } 1190 1191 //------------------------------do_null_check---------------------------------- 1192 // Helper function to do a NULL pointer check. Returned value is 1193 // the incoming address with NULL casted away. You are allowed to use the 1194 // not-null value only if you are control dependent on the test. 1195 #ifndef PRODUCT 1196 extern int explicit_null_checks_inserted, 1197 explicit_null_checks_elided; 1198 #endif 1199 Node* GraphKit::null_check_common(Node* value, BasicType type, 1200 // optional arguments for variations: 1201 bool assert_null, 1202 Node* *null_control, 1203 bool speculative) { 1204 assert(!assert_null || null_control == NULL, "not both at once"); 1205 if (stopped()) return top(); 1206 NOT_PRODUCT(explicit_null_checks_inserted++); 1207 1208 // Construct NULL check 1209 Node *chk = NULL; 1210 switch(type) { 1211 case T_LONG : chk = new CmpLNode(value, _gvn.zerocon(T_LONG)); break; 1212 case T_INT : chk = new CmpINode(value, _gvn.intcon(0)); break; 1213 case T_ARRAY : // fall through 1214 type = T_OBJECT; // simplify further tests 1215 case T_OBJECT : { 1216 const Type *t = _gvn.type( value ); 1217 1218 const TypeOopPtr* tp = t->isa_oopptr(); 1219 if (tp != NULL && tp->klass() != NULL && !tp->klass()->is_loaded() 1220 // Only for do_null_check, not any of its siblings: 1221 && !assert_null && null_control == NULL) { 1222 // Usually, any field access or invocation on an unloaded oop type 1223 // will simply fail to link, since the statically linked class is 1224 // likely also to be unloaded. However, in -Xcomp mode, sometimes 1225 // the static class is loaded but the sharper oop type is not. 1226 // Rather than checking for this obscure case in lots of places, 1227 // we simply observe that a null check on an unloaded class 1228 // will always be followed by a nonsense operation, so we 1229 // can just issue the uncommon trap here. 1230 // Our access to the unloaded class will only be correct 1231 // after it has been loaded and initialized, which requires 1232 // a trip through the interpreter. 1233 #ifndef PRODUCT 1234 if (WizardMode) { tty->print("Null check of unloaded "); tp->klass()->print(); tty->cr(); } 1235 #endif 1236 uncommon_trap(Deoptimization::Reason_unloaded, 1237 Deoptimization::Action_reinterpret, 1238 tp->klass(), "!loaded"); 1239 return top(); 1240 } 1241 1242 if (assert_null) { 1243 // See if the type is contained in NULL_PTR. 1244 // If so, then the value is already null. 1245 if (t->higher_equal(TypePtr::NULL_PTR)) { 1246 NOT_PRODUCT(explicit_null_checks_elided++); 1247 return value; // Elided null assert quickly! 1248 } 1249 } else { 1250 // See if mixing in the NULL pointer changes type. 1251 // If so, then the NULL pointer was not allowed in the original 1252 // type. In other words, "value" was not-null. 1253 if (t->meet(TypePtr::NULL_PTR) != t->remove_speculative()) { 1254 // same as: if (!TypePtr::NULL_PTR->higher_equal(t)) ... 1255 NOT_PRODUCT(explicit_null_checks_elided++); 1256 return value; // Elided null check quickly! 1257 } 1258 } 1259 chk = new CmpPNode( value, null() ); 1260 break; 1261 } 1262 1263 default: 1264 fatal("unexpected type: %s", type2name(type)); 1265 } 1266 assert(chk != NULL, "sanity check"); 1267 chk = _gvn.transform(chk); 1268 1269 BoolTest::mask btest = assert_null ? BoolTest::eq : BoolTest::ne; 1270 BoolNode *btst = new BoolNode( chk, btest); 1271 Node *tst = _gvn.transform( btst ); 1272 1273 //----------- 1274 // if peephole optimizations occurred, a prior test existed. 1275 // If a prior test existed, maybe it dominates as we can avoid this test. 1276 if (tst != btst && type == T_OBJECT) { 1277 // At this point we want to scan up the CFG to see if we can 1278 // find an identical test (and so avoid this test altogether). 1279 Node *cfg = control(); 1280 int depth = 0; 1281 while( depth < 16 ) { // Limit search depth for speed 1282 if( cfg->Opcode() == Op_IfTrue && 1283 cfg->in(0)->in(1) == tst ) { 1284 // Found prior test. Use "cast_not_null" to construct an identical 1285 // CastPP (and hence hash to) as already exists for the prior test. 1286 // Return that casted value. 1287 if (assert_null) { 1288 replace_in_map(value, null()); 1289 return null(); // do not issue the redundant test 1290 } 1291 Node *oldcontrol = control(); 1292 set_control(cfg); 1293 Node *res = cast_not_null(value); 1294 set_control(oldcontrol); 1295 NOT_PRODUCT(explicit_null_checks_elided++); 1296 return res; 1297 } 1298 cfg = IfNode::up_one_dom(cfg, /*linear_only=*/ true); 1299 if (cfg == NULL) break; // Quit at region nodes 1300 depth++; 1301 } 1302 } 1303 1304 //----------- 1305 // Branch to failure if null 1306 float ok_prob = PROB_MAX; // a priori estimate: nulls never happen 1307 Deoptimization::DeoptReason reason; 1308 if (assert_null) { 1309 reason = Deoptimization::Reason_null_assert; 1310 } else if (type == T_OBJECT) { 1311 reason = Deoptimization::reason_null_check(speculative); 1312 } else { 1313 reason = Deoptimization::Reason_div0_check; 1314 } 1315 // %%% Since Reason_unhandled is not recorded on a per-bytecode basis, 1316 // ciMethodData::has_trap_at will return a conservative -1 if any 1317 // must-be-null assertion has failed. This could cause performance 1318 // problems for a method after its first do_null_assert failure. 1319 // Consider using 'Reason_class_check' instead? 1320 1321 // To cause an implicit null check, we set the not-null probability 1322 // to the maximum (PROB_MAX). For an explicit check the probability 1323 // is set to a smaller value. 1324 if (null_control != NULL || too_many_traps(reason)) { 1325 // probability is less likely 1326 ok_prob = PROB_LIKELY_MAG(3); 1327 } else if (!assert_null && 1328 (ImplicitNullCheckThreshold > 0) && 1329 method() != NULL && 1330 (method()->method_data()->trap_count(reason) 1331 >= (uint)ImplicitNullCheckThreshold)) { 1332 ok_prob = PROB_LIKELY_MAG(3); 1333 } 1334 1335 if (null_control != NULL) { 1336 IfNode* iff = create_and_map_if(control(), tst, ok_prob, COUNT_UNKNOWN); 1337 Node* null_true = _gvn.transform( new IfFalseNode(iff)); 1338 set_control( _gvn.transform( new IfTrueNode(iff))); 1339 #ifndef PRODUCT 1340 if (null_true == top()) { 1341 explicit_null_checks_elided++; 1342 } 1343 #endif 1344 (*null_control) = null_true; 1345 } else { 1346 BuildCutout unless(this, tst, ok_prob); 1347 // Check for optimizer eliding test at parse time 1348 if (stopped()) { 1349 // Failure not possible; do not bother making uncommon trap. 1350 NOT_PRODUCT(explicit_null_checks_elided++); 1351 } else if (assert_null) { 1352 uncommon_trap(reason, 1353 Deoptimization::Action_make_not_entrant, 1354 NULL, "assert_null"); 1355 } else { 1356 replace_in_map(value, zerocon(type)); 1357 builtin_throw(reason); 1358 } 1359 } 1360 1361 // Must throw exception, fall-thru not possible? 1362 if (stopped()) { 1363 return top(); // No result 1364 } 1365 1366 if (assert_null) { 1367 // Cast obj to null on this path. 1368 replace_in_map(value, zerocon(type)); 1369 return zerocon(type); 1370 } 1371 1372 // Cast obj to not-null on this path, if there is no null_control. 1373 // (If there is a null_control, a non-null value may come back to haunt us.) 1374 if (type == T_OBJECT) { 1375 Node* cast = cast_not_null(value, false); 1376 if (null_control == NULL || (*null_control) == top()) 1377 replace_in_map(value, cast); 1378 value = cast; 1379 } 1380 1381 return value; 1382 } 1383 1384 1385 //------------------------------cast_not_null---------------------------------- 1386 // Cast obj to not-null on this path 1387 Node* GraphKit::cast_not_null(Node* obj, bool do_replace_in_map) { 1388 const Type *t = _gvn.type(obj); 1389 const Type *t_not_null = t->join_speculative(TypePtr::NOTNULL); 1390 // Object is already not-null? 1391 if( t == t_not_null ) return obj; 1392 1393 Node *cast = new CastPPNode(obj,t_not_null); 1394 cast->init_req(0, control()); 1395 cast = _gvn.transform( cast ); 1396 1397 // Scan for instances of 'obj' in the current JVM mapping. 1398 // These instances are known to be not-null after the test. 1399 if (do_replace_in_map) 1400 replace_in_map(obj, cast); 1401 1402 return cast; // Return casted value 1403 } 1404 1405 1406 //--------------------------replace_in_map------------------------------------- 1407 void GraphKit::replace_in_map(Node* old, Node* neww) { 1408 if (old == neww) { 1409 return; 1410 } 1411 1412 map()->replace_edge(old, neww); 1413 1414 // Note: This operation potentially replaces any edge 1415 // on the map. This includes locals, stack, and monitors 1416 // of the current (innermost) JVM state. 1417 1418 // don't let inconsistent types from profiling escape this 1419 // method 1420 1421 const Type* told = _gvn.type(old); 1422 const Type* tnew = _gvn.type(neww); 1423 1424 if (!tnew->higher_equal(told)) { 1425 return; 1426 } 1427 1428 map()->record_replaced_node(old, neww); 1429 } 1430 1431 1432 //============================================================================= 1433 //--------------------------------memory--------------------------------------- 1434 Node* GraphKit::memory(uint alias_idx) { 1435 MergeMemNode* mem = merged_memory(); 1436 Node* p = mem->memory_at(alias_idx); 1437 _gvn.set_type(p, Type::MEMORY); // must be mapped 1438 return p; 1439 } 1440 1441 //-----------------------------reset_memory------------------------------------ 1442 Node* GraphKit::reset_memory() { 1443 Node* mem = map()->memory(); 1444 // do not use this node for any more parsing! 1445 debug_only( map()->set_memory((Node*)NULL) ); 1446 return _gvn.transform( mem ); 1447 } 1448 1449 //------------------------------set_all_memory--------------------------------- 1450 void GraphKit::set_all_memory(Node* newmem) { 1451 Node* mergemem = MergeMemNode::make(newmem); 1452 gvn().set_type_bottom(mergemem); 1453 map()->set_memory(mergemem); 1454 } 1455 1456 //------------------------------set_all_memory_call---------------------------- 1457 void GraphKit::set_all_memory_call(Node* call, bool separate_io_proj) { 1458 Node* newmem = _gvn.transform( new ProjNode(call, TypeFunc::Memory, separate_io_proj) ); 1459 set_all_memory(newmem); 1460 } 1461 1462 //============================================================================= 1463 // 1464 // parser factory methods for MemNodes 1465 // 1466 // These are layered on top of the factory methods in LoadNode and StoreNode, 1467 // and integrate with the parser's memory state and _gvn engine. 1468 // 1469 1470 // factory methods in "int adr_idx" 1471 Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, 1472 int adr_idx, 1473 MemNode::MemOrd mo, 1474 LoadNode::ControlDependency control_dependency, 1475 bool require_atomic_access, 1476 bool unaligned, 1477 bool mismatched) { 1478 assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" ); 1479 const TypePtr* adr_type = NULL; // debug-mode-only argument 1480 debug_only(adr_type = C->get_adr_type(adr_idx)); 1481 Node* mem = memory(adr_idx); 1482 Node* ld; 1483 if (require_atomic_access && bt == T_LONG) { 1484 ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched); 1485 } else if (require_atomic_access && bt == T_DOUBLE) { 1486 ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched); 1487 } else { 1488 ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched); 1489 } 1490 ld = _gvn.transform(ld); 1491 if ((bt == T_OBJECT) && C->do_escape_analysis() || C->eliminate_boxing()) { 1492 // Improve graph before escape analysis and boxing elimination. 1493 record_for_igvn(ld); 1494 } 1495 return ld; 1496 } 1497 1498 Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt, 1499 int adr_idx, 1500 MemNode::MemOrd mo, 1501 bool require_atomic_access, 1502 bool unaligned, 1503 bool mismatched) { 1504 assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); 1505 const TypePtr* adr_type = NULL; 1506 debug_only(adr_type = C->get_adr_type(adr_idx)); 1507 Node *mem = memory(adr_idx); 1508 Node* st; 1509 if (require_atomic_access && bt == T_LONG) { 1510 st = StoreLNode::make_atomic(ctl, mem, adr, adr_type, val, mo); 1511 } else if (require_atomic_access && bt == T_DOUBLE) { 1512 st = StoreDNode::make_atomic(ctl, mem, adr, adr_type, val, mo); 1513 } else { 1514 st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo); 1515 } 1516 if (unaligned) { 1517 st->as_Store()->set_unaligned_access(); 1518 } 1519 if (mismatched) { 1520 st->as_Store()->set_mismatched_access(); 1521 } 1522 st = _gvn.transform(st); 1523 set_memory(st, adr_idx); 1524 // Back-to-back stores can only remove intermediate store with DU info 1525 // so push on worklist for optimizer. 1526 if (mem->req() > MemNode::Address && adr == mem->in(MemNode::Address)) 1527 record_for_igvn(st); 1528 1529 return st; 1530 } 1531 1532 1533 Node* GraphKit::pre_barrier(bool do_load, 1534 Node* ctl, 1535 Node* obj, 1536 Node* adr, 1537 uint adr_idx, 1538 Node* val, 1539 const TypeOopPtr* val_type, 1540 Node* pre_val, 1541 BasicType bt) { 1542 1543 BarrierSet* bs = Universe::heap()->barrier_set(); 1544 set_control(ctl); 1545 switch (bs->kind()) { 1546 case BarrierSet::G1SATBCTLogging: 1547 g1_write_barrier_pre(do_load, obj, adr, adr_idx, val, val_type, pre_val, bt); 1548 return val; 1549 case BarrierSet::ShenandoahBarrierSet: 1550 return shenandoah_write_barrier_pre(do_load, obj, adr, adr_idx, val, val_type, pre_val, bt); 1551 break; 1552 1553 case BarrierSet::CardTableForRS: 1554 case BarrierSet::CardTableExtension: 1555 case BarrierSet::ModRef: 1556 break; 1557 1558 default : 1559 ShouldNotReachHere(); 1560 1561 } 1562 return val; 1563 } 1564 1565 bool GraphKit::can_move_pre_barrier() const { 1566 BarrierSet* bs = Universe::heap()->barrier_set(); 1567 switch (bs->kind()) { 1568 case BarrierSet::G1SATBCTLogging: 1569 return true; // Can move it if no safepoint 1570 1571 case BarrierSet::CardTableForRS: 1572 case BarrierSet::CardTableExtension: 1573 case BarrierSet::ModRef: 1574 return true; // There is no pre-barrier 1575 1576 case BarrierSet::ShenandoahBarrierSet: 1577 // Need newval in pre-barrier for matrix update. 1578 return !UseShenandoahMatrix; 1579 1580 default : 1581 ShouldNotReachHere(); 1582 } 1583 return false; 1584 } 1585 1586 void GraphKit::post_barrier(Node* ctl, 1587 Node* store, 1588 Node* obj, 1589 Node* adr, 1590 uint adr_idx, 1591 Node* val, 1592 BasicType bt, 1593 bool use_precise) { 1594 BarrierSet* bs = Universe::heap()->barrier_set(); 1595 set_control(ctl); 1596 switch (bs->kind()) { 1597 case BarrierSet::G1SATBCTLogging: 1598 g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise); 1599 break; 1600 1601 case BarrierSet::CardTableForRS: 1602 case BarrierSet::CardTableExtension: 1603 write_barrier_post(store, obj, adr, adr_idx, val, use_precise); 1604 break; 1605 1606 case BarrierSet::ModRef: 1607 case BarrierSet::ShenandoahBarrierSet: 1608 break; 1609 1610 default : 1611 ShouldNotReachHere(); 1612 1613 } 1614 } 1615 1616 Node* GraphKit::store_oop(Node* ctl, 1617 Node* obj, 1618 Node* adr, 1619 const TypePtr* adr_type, 1620 Node* val, 1621 const TypeOopPtr* val_type, 1622 BasicType bt, 1623 bool use_precise, 1624 MemNode::MemOrd mo, 1625 bool mismatched) { 1626 // Transformation of a value which could be NULL pointer (CastPP #NULL) 1627 // could be delayed during Parse (for example, in adjust_map_after_if()). 1628 // Execute transformation here to avoid barrier generation in such case. 1629 if (_gvn.type(val) == TypePtr::NULL_PTR) 1630 val = _gvn.makecon(TypePtr::NULL_PTR); 1631 1632 set_control(ctl); 1633 if (stopped()) return top(); // Dead path ? 1634 1635 assert(bt == T_OBJECT, "sanity"); 1636 assert(val != NULL, "not dead path"); 1637 uint adr_idx = C->get_alias_index(adr_type); 1638 assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); 1639 1640 val = pre_barrier(true /* do_load */, 1641 control(), obj, adr, adr_idx, val, val_type, 1642 NULL /* pre_val */, 1643 bt); 1644 1645 Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo, mismatched); 1646 post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise); 1647 return store; 1648 } 1649 1650 // Could be an array or object we don't know at compile time (unsafe ref.) 1651 Node* GraphKit::store_oop_to_unknown(Node* ctl, 1652 Node* obj, // containing obj 1653 Node* adr, // actual adress to store val at 1654 const TypePtr* adr_type, 1655 Node* val, 1656 BasicType bt, 1657 MemNode::MemOrd mo, 1658 bool mismatched) { 1659 Compile::AliasType* at = C->alias_type(adr_type); 1660 const TypeOopPtr* val_type = NULL; 1661 if (adr_type->isa_instptr()) { 1662 if (at->field() != NULL) { 1663 // known field. This code is a copy of the do_put_xxx logic. 1664 ciField* field = at->field(); 1665 if (!field->type()->is_loaded()) { 1666 val_type = TypeInstPtr::BOTTOM; 1667 } else { 1668 val_type = TypeOopPtr::make_from_klass(field->type()->as_klass()); 1669 } 1670 } 1671 } else if (adr_type->isa_aryptr()) { 1672 val_type = adr_type->is_aryptr()->elem()->make_oopptr(); 1673 } 1674 if (val_type == NULL) { 1675 val_type = TypeInstPtr::BOTTOM; 1676 } 1677 return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo, mismatched); 1678 } 1679 1680 1681 //-------------------------array_element_address------------------------- 1682 Node* GraphKit::array_element_address(Node* ary, Node* idx, BasicType elembt, 1683 const TypeInt* sizetype, Node* ctrl) { 1684 uint shift = exact_log2(type2aelembytes(elembt)); 1685 uint header = arrayOopDesc::base_offset_in_bytes(elembt); 1686 1687 // short-circuit a common case (saves lots of confusing waste motion) 1688 jint idx_con = find_int_con(idx, -1); 1689 if (idx_con >= 0) { 1690 intptr_t offset = header + ((intptr_t)idx_con << shift); 1691 return basic_plus_adr(ary, offset); 1692 } 1693 1694 // must be correct type for alignment purposes 1695 Node* base = basic_plus_adr(ary, header); 1696 idx = Compile::conv_I2X_index(&_gvn, idx, sizetype, ctrl); 1697 Node* scale = _gvn.transform( new LShiftXNode(idx, intcon(shift)) ); 1698 return basic_plus_adr(ary, base, scale); 1699 } 1700 1701 //-------------------------load_array_element------------------------- 1702 Node* GraphKit::load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype) { 1703 const Type* elemtype = arytype->elem(); 1704 BasicType elembt = elemtype->array_element_basic_type(); 1705 Node* adr = array_element_address(ary, idx, elembt, arytype->size()); 1706 if (elembt == T_NARROWOOP) { 1707 elembt = T_OBJECT; // To satisfy switch in LoadNode::make() 1708 } 1709 Node* ld = make_load(ctl, adr, elemtype, elembt, arytype, MemNode::unordered); 1710 return ld; 1711 } 1712 1713 //-------------------------set_arguments_for_java_call------------------------- 1714 // Arguments (pre-popped from the stack) are taken from the JVMS. 1715 void GraphKit::set_arguments_for_java_call(CallJavaNode* call) { 1716 // Add the call arguments: 1717 uint nargs = call->method()->arg_size(); 1718 for (uint i = 0; i < nargs; i++) { 1719 Node* arg = argument(i); 1720 if (ShenandoahVerifyReadsToFromSpace && call->is_CallDynamicJava() && i == 0) { 1721 arg = shenandoah_read_barrier(arg); 1722 } 1723 call->init_req(i + TypeFunc::Parms, arg); 1724 } 1725 } 1726 1727 //---------------------------set_edges_for_java_call--------------------------- 1728 // Connect a newly created call into the current JVMS. 1729 // A return value node (if any) is returned from set_edges_for_java_call. 1730 void GraphKit::set_edges_for_java_call(CallJavaNode* call, bool must_throw, bool separate_io_proj) { 1731 1732 // Add the predefined inputs: 1733 call->init_req( TypeFunc::Control, control() ); 1734 call->init_req( TypeFunc::I_O , i_o() ); 1735 call->init_req( TypeFunc::Memory , reset_memory() ); 1736 call->init_req( TypeFunc::FramePtr, frameptr() ); 1737 call->init_req( TypeFunc::ReturnAdr, top() ); 1738 1739 add_safepoint_edges(call, must_throw); 1740 1741 Node* xcall = _gvn.transform(call); 1742 1743 if (xcall == top()) { 1744 set_control(top()); 1745 return; 1746 } 1747 assert(xcall == call, "call identity is stable"); 1748 1749 // Re-use the current map to produce the result. 1750 1751 set_control(_gvn.transform(new ProjNode(call, TypeFunc::Control))); 1752 set_i_o( _gvn.transform(new ProjNode(call, TypeFunc::I_O , separate_io_proj))); 1753 set_all_memory_call(xcall, separate_io_proj); 1754 1755 //return xcall; // no need, caller already has it 1756 } 1757 1758 Node* GraphKit::set_results_for_java_call(CallJavaNode* call, bool separate_io_proj) { 1759 if (stopped()) return top(); // maybe the call folded up? 1760 1761 // Capture the return value, if any. 1762 Node* ret; 1763 if (call->method() == NULL || 1764 call->method()->return_type()->basic_type() == T_VOID) 1765 ret = top(); 1766 else ret = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); 1767 1768 // Note: Since any out-of-line call can produce an exception, 1769 // we always insert an I_O projection from the call into the result. 1770 1771 make_slow_call_ex(call, env()->Throwable_klass(), separate_io_proj); 1772 1773 if (separate_io_proj) { 1774 // The caller requested separate projections be used by the fall 1775 // through and exceptional paths, so replace the projections for 1776 // the fall through path. 1777 set_i_o(_gvn.transform( new ProjNode(call, TypeFunc::I_O) )); 1778 set_all_memory(_gvn.transform( new ProjNode(call, TypeFunc::Memory) )); 1779 } 1780 return ret; 1781 } 1782 1783 //--------------------set_predefined_input_for_runtime_call-------------------- 1784 // Reading and setting the memory state is way conservative here. 1785 // The real problem is that I am not doing real Type analysis on memory, 1786 // so I cannot distinguish card mark stores from other stores. Across a GC 1787 // point the Store Barrier and the card mark memory has to agree. I cannot 1788 // have a card mark store and its barrier split across the GC point from 1789 // either above or below. Here I get that to happen by reading ALL of memory. 1790 // A better answer would be to separate out card marks from other memory. 1791 // For now, return the input memory state, so that it can be reused 1792 // after the call, if this call has restricted memory effects. 1793 Node* GraphKit::set_predefined_input_for_runtime_call(SafePointNode* call) { 1794 // Set fixed predefined input arguments 1795 Node* memory = reset_memory(); 1796 call->init_req( TypeFunc::Control, control() ); 1797 call->init_req( TypeFunc::I_O, top() ); // does no i/o 1798 call->init_req( TypeFunc::Memory, memory ); // may gc ptrs 1799 call->init_req( TypeFunc::FramePtr, frameptr() ); 1800 call->init_req( TypeFunc::ReturnAdr, top() ); 1801 return memory; 1802 } 1803 1804 //-------------------set_predefined_output_for_runtime_call-------------------- 1805 // Set control and memory (not i_o) from the call. 1806 // If keep_mem is not NULL, use it for the output state, 1807 // except for the RawPtr output of the call, if hook_mem is TypeRawPtr::BOTTOM. 1808 // If hook_mem is NULL, this call produces no memory effects at all. 1809 // If hook_mem is a Java-visible memory slice (such as arraycopy operands), 1810 // then only that memory slice is taken from the call. 1811 // In the last case, we must put an appropriate memory barrier before 1812 // the call, so as to create the correct anti-dependencies on loads 1813 // preceding the call. 1814 void GraphKit::set_predefined_output_for_runtime_call(Node* call, 1815 Node* keep_mem, 1816 const TypePtr* hook_mem) { 1817 // no i/o 1818 set_control(_gvn.transform( new ProjNode(call,TypeFunc::Control) )); 1819 if (keep_mem) { 1820 // First clone the existing memory state 1821 set_all_memory(keep_mem); 1822 if (hook_mem != NULL) { 1823 // Make memory for the call 1824 Node* mem = _gvn.transform( new ProjNode(call, TypeFunc::Memory) ); 1825 // Set the RawPtr memory state only. This covers all the heap top/GC stuff 1826 // We also use hook_mem to extract specific effects from arraycopy stubs. 1827 set_memory(mem, hook_mem); 1828 } 1829 // ...else the call has NO memory effects. 1830 1831 // Make sure the call advertises its memory effects precisely. 1832 // This lets us build accurate anti-dependences in gcm.cpp. 1833 assert(C->alias_type(call->adr_type()) == C->alias_type(hook_mem), 1834 "call node must be constructed correctly"); 1835 } else { 1836 assert(hook_mem == NULL, ""); 1837 // This is not a "slow path" call; all memory comes from the call. 1838 set_all_memory_call(call); 1839 } 1840 } 1841 1842 1843 // Replace the call with the current state of the kit. 1844 void GraphKit::replace_call(CallNode* call, Node* result, bool do_replaced_nodes) { 1845 JVMState* ejvms = NULL; 1846 if (has_exceptions()) { 1847 ejvms = transfer_exceptions_into_jvms(); 1848 } 1849 1850 ReplacedNodes replaced_nodes = map()->replaced_nodes(); 1851 ReplacedNodes replaced_nodes_exception; 1852 Node* ex_ctl = top(); 1853 1854 SafePointNode* final_state = stop(); 1855 1856 // Find all the needed outputs of this call 1857 CallProjections callprojs; 1858 call->extract_projections(&callprojs, true); 1859 1860 Node* init_mem = call->in(TypeFunc::Memory); 1861 Node* final_mem = final_state->in(TypeFunc::Memory); 1862 Node* final_ctl = final_state->in(TypeFunc::Control); 1863 Node* final_io = final_state->in(TypeFunc::I_O); 1864 1865 // Replace all the old call edges with the edges from the inlining result 1866 if (callprojs.fallthrough_catchproj != NULL) { 1867 C->gvn_replace_by(callprojs.fallthrough_catchproj, final_ctl); 1868 } 1869 if (callprojs.fallthrough_memproj != NULL) { 1870 if (final_mem->is_MergeMem()) { 1871 // Parser's exits MergeMem was not transformed but may be optimized 1872 final_mem = _gvn.transform(final_mem); 1873 } 1874 C->gvn_replace_by(callprojs.fallthrough_memproj, final_mem); 1875 } 1876 if (callprojs.fallthrough_ioproj != NULL) { 1877 C->gvn_replace_by(callprojs.fallthrough_ioproj, final_io); 1878 } 1879 1880 // Replace the result with the new result if it exists and is used 1881 if (callprojs.resproj != NULL && result != NULL) { 1882 C->gvn_replace_by(callprojs.resproj, result); 1883 } 1884 1885 if (ejvms == NULL) { 1886 // No exception edges to simply kill off those paths 1887 if (callprojs.catchall_catchproj != NULL) { 1888 C->gvn_replace_by(callprojs.catchall_catchproj, C->top()); 1889 } 1890 if (callprojs.catchall_memproj != NULL) { 1891 C->gvn_replace_by(callprojs.catchall_memproj, C->top()); 1892 } 1893 if (callprojs.catchall_ioproj != NULL) { 1894 C->gvn_replace_by(callprojs.catchall_ioproj, C->top()); 1895 } 1896 // Replace the old exception object with top 1897 if (callprojs.exobj != NULL) { 1898 C->gvn_replace_by(callprojs.exobj, C->top()); 1899 } 1900 } else { 1901 GraphKit ekit(ejvms); 1902 1903 // Load my combined exception state into the kit, with all phis transformed: 1904 SafePointNode* ex_map = ekit.combine_and_pop_all_exception_states(); 1905 replaced_nodes_exception = ex_map->replaced_nodes(); 1906 1907 Node* ex_oop = ekit.use_exception_state(ex_map); 1908 1909 if (callprojs.catchall_catchproj != NULL) { 1910 C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control()); 1911 ex_ctl = ekit.control(); 1912 } 1913 if (callprojs.catchall_memproj != NULL) { 1914 C->gvn_replace_by(callprojs.catchall_memproj, ekit.reset_memory()); 1915 } 1916 if (callprojs.catchall_ioproj != NULL) { 1917 C->gvn_replace_by(callprojs.catchall_ioproj, ekit.i_o()); 1918 } 1919 1920 // Replace the old exception object with the newly created one 1921 if (callprojs.exobj != NULL) { 1922 C->gvn_replace_by(callprojs.exobj, ex_oop); 1923 } 1924 } 1925 1926 // Disconnect the call from the graph 1927 call->disconnect_inputs(NULL, C); 1928 C->gvn_replace_by(call, C->top()); 1929 1930 // Clean up any MergeMems that feed other MergeMems since the 1931 // optimizer doesn't like that. 1932 if (final_mem->is_MergeMem()) { 1933 Node_List wl; 1934 for (SimpleDUIterator i(final_mem); i.has_next(); i.next()) { 1935 Node* m = i.get(); 1936 if (m->is_MergeMem() && !wl.contains(m)) { 1937 wl.push(m); 1938 } 1939 } 1940 while (wl.size() > 0) { 1941 _gvn.transform(wl.pop()); 1942 } 1943 } 1944 1945 if (callprojs.fallthrough_catchproj != NULL && !final_ctl->is_top() && do_replaced_nodes) { 1946 replaced_nodes.apply(C, final_ctl); 1947 } 1948 if (!ex_ctl->is_top() && do_replaced_nodes) { 1949 replaced_nodes_exception.apply(C, ex_ctl); 1950 } 1951 } 1952 1953 1954 //------------------------------increment_counter------------------------------ 1955 // for statistics: increment a VM counter by 1 1956 1957 void GraphKit::increment_counter(address counter_addr) { 1958 Node* adr1 = makecon(TypeRawPtr::make(counter_addr)); 1959 increment_counter(adr1); 1960 } 1961 1962 void GraphKit::increment_counter(Node* counter_addr) { 1963 int adr_type = Compile::AliasIdxRaw; 1964 Node* ctrl = control(); 1965 Node* cnt = make_load(ctrl, counter_addr, TypeInt::INT, T_INT, adr_type, MemNode::unordered); 1966 Node* incr = _gvn.transform(new AddINode(cnt, _gvn.intcon(1))); 1967 store_to_memory(ctrl, counter_addr, incr, T_INT, adr_type, MemNode::unordered); 1968 } 1969 1970 1971 //------------------------------uncommon_trap---------------------------------- 1972 // Bail out to the interpreter in mid-method. Implemented by calling the 1973 // uncommon_trap blob. This helper function inserts a runtime call with the 1974 // right debug info. 1975 void GraphKit::uncommon_trap(int trap_request, 1976 ciKlass* klass, const char* comment, 1977 bool must_throw, 1978 bool keep_exact_action) { 1979 if (failing()) stop(); 1980 if (stopped()) return; // trap reachable? 1981 1982 // Note: If ProfileTraps is true, and if a deopt. actually 1983 // occurs here, the runtime will make sure an MDO exists. There is 1984 // no need to call method()->ensure_method_data() at this point. 1985 1986 // Set the stack pointer to the right value for reexecution: 1987 set_sp(reexecute_sp()); 1988 1989 #ifdef ASSERT 1990 if (!must_throw) { 1991 // Make sure the stack has at least enough depth to execute 1992 // the current bytecode. 1993 int inputs, ignored_depth; 1994 if (compute_stack_effects(inputs, ignored_depth)) { 1995 assert(sp() >= inputs, "must have enough JVMS stack to execute %s: sp=%d, inputs=%d", 1996 Bytecodes::name(java_bc()), sp(), inputs); 1997 } 1998 } 1999 #endif 2000 2001 Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(trap_request); 2002 Deoptimization::DeoptAction action = Deoptimization::trap_request_action(trap_request); 2003 2004 switch (action) { 2005 case Deoptimization::Action_maybe_recompile: 2006 case Deoptimization::Action_reinterpret: 2007 // Temporary fix for 6529811 to allow virtual calls to be sure they 2008 // get the chance to go from mono->bi->mega 2009 if (!keep_exact_action && 2010 Deoptimization::trap_request_index(trap_request) < 0 && 2011 too_many_recompiles(reason)) { 2012 // This BCI is causing too many recompilations. 2013 if (C->log() != NULL) { 2014 C->log()->elem("observe that='trap_action_change' reason='%s' from='%s' to='none'", 2015 Deoptimization::trap_reason_name(reason), 2016 Deoptimization::trap_action_name(action)); 2017 } 2018 action = Deoptimization::Action_none; 2019 trap_request = Deoptimization::make_trap_request(reason, action); 2020 } else { 2021 C->set_trap_can_recompile(true); 2022 } 2023 break; 2024 case Deoptimization::Action_make_not_entrant: 2025 C->set_trap_can_recompile(true); 2026 break; 2027 #ifdef ASSERT 2028 case Deoptimization::Action_none: 2029 case Deoptimization::Action_make_not_compilable: 2030 break; 2031 default: 2032 fatal("unknown action %d: %s", action, Deoptimization::trap_action_name(action)); 2033 break; 2034 #endif 2035 } 2036 2037 if (TraceOptoParse) { 2038 char buf[100]; 2039 tty->print_cr("Uncommon trap %s at bci:%d", 2040 Deoptimization::format_trap_request(buf, sizeof(buf), 2041 trap_request), bci()); 2042 } 2043 2044 CompileLog* log = C->log(); 2045 if (log != NULL) { 2046 int kid = (klass == NULL)? -1: log->identify(klass); 2047 log->begin_elem("uncommon_trap bci='%d'", bci()); 2048 char buf[100]; 2049 log->print(" %s", Deoptimization::format_trap_request(buf, sizeof(buf), 2050 trap_request)); 2051 if (kid >= 0) log->print(" klass='%d'", kid); 2052 if (comment != NULL) log->print(" comment='%s'", comment); 2053 log->end_elem(); 2054 } 2055 2056 // Make sure any guarding test views this path as very unlikely 2057 Node *i0 = control()->in(0); 2058 if (i0 != NULL && i0->is_If()) { // Found a guarding if test? 2059 IfNode *iff = i0->as_If(); 2060 float f = iff->_prob; // Get prob 2061 if (control()->Opcode() == Op_IfTrue) { 2062 if (f > PROB_UNLIKELY_MAG(4)) 2063 iff->_prob = PROB_MIN; 2064 } else { 2065 if (f < PROB_LIKELY_MAG(4)) 2066 iff->_prob = PROB_MAX; 2067 } 2068 } 2069 2070 // Clear out dead values from the debug info. 2071 kill_dead_locals(); 2072 2073 // Now insert the uncommon trap subroutine call 2074 address call_addr = SharedRuntime::uncommon_trap_blob()->entry_point(); 2075 const TypePtr* no_memory_effects = NULL; 2076 // Pass the index of the class to be loaded 2077 Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON | 2078 (must_throw ? RC_MUST_THROW : 0), 2079 OptoRuntime::uncommon_trap_Type(), 2080 call_addr, "uncommon_trap", no_memory_effects, 2081 intcon(trap_request)); 2082 assert(call->as_CallStaticJava()->uncommon_trap_request() == trap_request, 2083 "must extract request correctly from the graph"); 2084 assert(trap_request != 0, "zero value reserved by uncommon_trap_request"); 2085 2086 call->set_req(TypeFunc::ReturnAdr, returnadr()); 2087 // The debug info is the only real input to this call. 2088 2089 // Halt-and-catch fire here. The above call should never return! 2090 HaltNode* halt = new HaltNode(control(), frameptr()); 2091 _gvn.set_type_bottom(halt); 2092 root()->add_req(halt); 2093 2094 stop_and_kill_map(); 2095 } 2096 2097 2098 //--------------------------just_allocated_object------------------------------ 2099 // Report the object that was just allocated. 2100 // It must be the case that there are no intervening safepoints. 2101 // We use this to determine if an object is so "fresh" that 2102 // it does not require card marks. 2103 Node* GraphKit::just_allocated_object(Node* current_control) { 2104 if (C->recent_alloc_ctl() == current_control) 2105 return C->recent_alloc_obj(); 2106 return NULL; 2107 } 2108 2109 2110 void GraphKit::round_double_arguments(ciMethod* dest_method) { 2111 // (Note: TypeFunc::make has a cache that makes this fast.) 2112 const TypeFunc* tf = TypeFunc::make(dest_method); 2113 int nargs = tf->domain()->cnt() - TypeFunc::Parms; 2114 for (int j = 0; j < nargs; j++) { 2115 const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms); 2116 if( targ->basic_type() == T_DOUBLE ) { 2117 // If any parameters are doubles, they must be rounded before 2118 // the call, dstore_rounding does gvn.transform 2119 Node *arg = argument(j); 2120 arg = dstore_rounding(arg); 2121 set_argument(j, arg); 2122 } 2123 } 2124 } 2125 2126 /** 2127 * Record profiling data exact_kls for Node n with the type system so 2128 * that it can propagate it (speculation) 2129 * 2130 * @param n node that the type applies to 2131 * @param exact_kls type from profiling 2132 * @param maybe_null did profiling see null? 2133 * 2134 * @return node with improved type 2135 */ 2136 Node* GraphKit::record_profile_for_speculation(Node* n, ciKlass* exact_kls, bool maybe_null) { 2137 const Type* current_type = _gvn.type(n); 2138 assert(UseTypeSpeculation, "type speculation must be on"); 2139 2140 const TypePtr* speculative = current_type->speculative(); 2141 2142 // Should the klass from the profile be recorded in the speculative type? 2143 if (current_type->would_improve_type(exact_kls, jvms()->depth())) { 2144 const TypeKlassPtr* tklass = TypeKlassPtr::make(exact_kls); 2145 const TypeOopPtr* xtype = tklass->as_instance_type(); 2146 assert(xtype->klass_is_exact(), "Should be exact"); 2147 // Any reason to believe n is not null (from this profiling or a previous one)? 2148 const TypePtr* ptr = (maybe_null && current_type->speculative_maybe_null()) ? TypePtr::BOTTOM : TypePtr::NOTNULL; 2149 // record the new speculative type's depth 2150 speculative = xtype->cast_to_ptr_type(ptr->ptr())->is_ptr(); 2151 speculative = speculative->with_inline_depth(jvms()->depth()); 2152 } else if (current_type->would_improve_ptr(maybe_null)) { 2153 // Profiling report that null was never seen so we can change the 2154 // speculative type to non null ptr. 2155 assert(!maybe_null, "nothing to improve"); 2156 if (speculative == NULL) { 2157 speculative = TypePtr::NOTNULL; 2158 } else { 2159 const TypePtr* ptr = TypePtr::NOTNULL; 2160 speculative = speculative->cast_to_ptr_type(ptr->ptr())->is_ptr(); 2161 } 2162 } 2163 2164 if (speculative != current_type->speculative()) { 2165 // Build a type with a speculative type (what we think we know 2166 // about the type but will need a guard when we use it) 2167 const TypeOopPtr* spec_type = TypeOopPtr::make(TypePtr::BotPTR, Type::OffsetBot, TypeOopPtr::InstanceBot, speculative); 2168 // We're changing the type, we need a new CheckCast node to carry 2169 // the new type. The new type depends on the control: what 2170 // profiling tells us is only valid from here as far as we can 2171 // tell. 2172 Node* cast = new CheckCastPPNode(control(), n, current_type->remove_speculative()->join_speculative(spec_type)); 2173 cast = _gvn.transform(cast); 2174 replace_in_map(n, cast); 2175 n = cast; 2176 } 2177 2178 return n; 2179 } 2180 2181 /** 2182 * Record profiling data from receiver profiling at an invoke with the 2183 * type system so that it can propagate it (speculation) 2184 * 2185 * @param n receiver node 2186 * 2187 * @return node with improved type 2188 */ 2189 Node* GraphKit::record_profiled_receiver_for_speculation(Node* n) { 2190 if (!UseTypeSpeculation) { 2191 return n; 2192 } 2193 ciKlass* exact_kls = profile_has_unique_klass(); 2194 bool maybe_null = true; 2195 if (java_bc() == Bytecodes::_checkcast || 2196 java_bc() == Bytecodes::_instanceof || 2197 java_bc() == Bytecodes::_aastore) { 2198 ciProfileData* data = method()->method_data()->bci_to_data(bci()); 2199 maybe_null = data == NULL ? true : data->as_BitData()->null_seen(); 2200 } 2201 return record_profile_for_speculation(n, exact_kls, maybe_null); 2202 } 2203 2204 /** 2205 * Record profiling data from argument profiling at an invoke with the 2206 * type system so that it can propagate it (speculation) 2207 * 2208 * @param dest_method target method for the call 2209 * @param bc what invoke bytecode is this? 2210 */ 2211 void GraphKit::record_profiled_arguments_for_speculation(ciMethod* dest_method, Bytecodes::Code bc) { 2212 if (!UseTypeSpeculation) { 2213 return; 2214 } 2215 const TypeFunc* tf = TypeFunc::make(dest_method); 2216 int nargs = tf->domain()->cnt() - TypeFunc::Parms; 2217 int skip = Bytecodes::has_receiver(bc) ? 1 : 0; 2218 for (int j = skip, i = 0; j < nargs && i < TypeProfileArgsLimit; j++) { 2219 const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms); 2220 if (targ->basic_type() == T_OBJECT || targ->basic_type() == T_ARRAY) { 2221 bool maybe_null = true; 2222 ciKlass* better_type = NULL; 2223 if (method()->argument_profiled_type(bci(), i, better_type, maybe_null)) { 2224 record_profile_for_speculation(argument(j), better_type, maybe_null); 2225 } 2226 i++; 2227 } 2228 } 2229 } 2230 2231 /** 2232 * Record profiling data from parameter profiling at an invoke with 2233 * the type system so that it can propagate it (speculation) 2234 */ 2235 void GraphKit::record_profiled_parameters_for_speculation() { 2236 if (!UseTypeSpeculation) { 2237 return; 2238 } 2239 for (int i = 0, j = 0; i < method()->arg_size() ; i++) { 2240 if (_gvn.type(local(i))->isa_oopptr()) { 2241 bool maybe_null = true; 2242 ciKlass* better_type = NULL; 2243 if (method()->parameter_profiled_type(j, better_type, maybe_null)) { 2244 record_profile_for_speculation(local(i), better_type, maybe_null); 2245 } 2246 j++; 2247 } 2248 } 2249 } 2250 2251 /** 2252 * Record profiling data from return value profiling at an invoke with 2253 * the type system so that it can propagate it (speculation) 2254 */ 2255 void GraphKit::record_profiled_return_for_speculation() { 2256 if (!UseTypeSpeculation) { 2257 return; 2258 } 2259 bool maybe_null = true; 2260 ciKlass* better_type = NULL; 2261 if (method()->return_profiled_type(bci(), better_type, maybe_null)) { 2262 // If profiling reports a single type for the return value, 2263 // feed it to the type system so it can propagate it as a 2264 // speculative type 2265 record_profile_for_speculation(stack(sp()-1), better_type, maybe_null); 2266 } 2267 } 2268 2269 void GraphKit::round_double_result(ciMethod* dest_method) { 2270 // A non-strict method may return a double value which has an extended 2271 // exponent, but this must not be visible in a caller which is 'strict' 2272 // If a strict caller invokes a non-strict callee, round a double result 2273 2274 BasicType result_type = dest_method->return_type()->basic_type(); 2275 assert( method() != NULL, "must have caller context"); 2276 if( result_type == T_DOUBLE && method()->is_strict() && !dest_method->is_strict() ) { 2277 // Destination method's return value is on top of stack 2278 // dstore_rounding() does gvn.transform 2279 Node *result = pop_pair(); 2280 result = dstore_rounding(result); 2281 push_pair(result); 2282 } 2283 } 2284 2285 // rounding for strict float precision conformance 2286 Node* GraphKit::precision_rounding(Node* n) { 2287 return UseStrictFP && _method->flags().is_strict() 2288 && UseSSE == 0 && Matcher::strict_fp_requires_explicit_rounding 2289 ? _gvn.transform( new RoundFloatNode(0, n) ) 2290 : n; 2291 } 2292 2293 // rounding for strict double precision conformance 2294 Node* GraphKit::dprecision_rounding(Node *n) { 2295 return UseStrictFP && _method->flags().is_strict() 2296 && UseSSE <= 1 && Matcher::strict_fp_requires_explicit_rounding 2297 ? _gvn.transform( new RoundDoubleNode(0, n) ) 2298 : n; 2299 } 2300 2301 // rounding for non-strict double stores 2302 Node* GraphKit::dstore_rounding(Node* n) { 2303 return Matcher::strict_fp_requires_explicit_rounding 2304 && UseSSE <= 1 2305 ? _gvn.transform( new RoundDoubleNode(0, n) ) 2306 : n; 2307 } 2308 2309 //============================================================================= 2310 // Generate a fast path/slow path idiom. Graph looks like: 2311 // [foo] indicates that 'foo' is a parameter 2312 // 2313 // [in] NULL 2314 // \ / 2315 // CmpP 2316 // Bool ne 2317 // If 2318 // / \ 2319 // True False-<2> 2320 // / | 2321 // / cast_not_null 2322 // Load | | ^ 2323 // [fast_test] | | 2324 // gvn to opt_test | | 2325 // / \ | <1> 2326 // True False | 2327 // | \\ | 2328 // [slow_call] \[fast_result] 2329 // Ctl Val \ \ 2330 // | \ \ 2331 // Catch <1> \ \ 2332 // / \ ^ \ \ 2333 // Ex No_Ex | \ \ 2334 // | \ \ | \ <2> \ 2335 // ... \ [slow_res] | | \ [null_result] 2336 // \ \--+--+--- | | 2337 // \ | / \ | / 2338 // --------Region Phi 2339 // 2340 //============================================================================= 2341 // Code is structured as a series of driver functions all called 'do_XXX' that 2342 // call a set of helper functions. Helper functions first, then drivers. 2343 2344 //------------------------------null_check_oop--------------------------------- 2345 // Null check oop. Set null-path control into Region in slot 3. 2346 // Make a cast-not-nullness use the other not-null control. Return cast. 2347 Node* GraphKit::null_check_oop(Node* value, Node* *null_control, 2348 bool never_see_null, 2349 bool safe_for_replace, 2350 bool speculative) { 2351 // Initial NULL check taken path 2352 (*null_control) = top(); 2353 Node* cast = null_check_common(value, T_OBJECT, false, null_control, speculative); 2354 2355 // Generate uncommon_trap: 2356 if (never_see_null && (*null_control) != top()) { 2357 // If we see an unexpected null at a check-cast we record it and force a 2358 // recompile; the offending check-cast will be compiled to handle NULLs. 2359 // If we see more than one offending BCI, then all checkcasts in the 2360 // method will be compiled to handle NULLs. 2361 PreserveJVMState pjvms(this); 2362 set_control(*null_control); 2363 replace_in_map(value, null()); 2364 Deoptimization::DeoptReason reason = Deoptimization::reason_null_check(speculative); 2365 uncommon_trap(reason, 2366 Deoptimization::Action_make_not_entrant); 2367 (*null_control) = top(); // NULL path is dead 2368 } 2369 if ((*null_control) == top() && safe_for_replace) { 2370 replace_in_map(value, cast); 2371 } 2372 2373 // Cast away null-ness on the result 2374 return cast; 2375 } 2376 2377 //------------------------------opt_iff---------------------------------------- 2378 // Optimize the fast-check IfNode. Set the fast-path region slot 2. 2379 // Return slow-path control. 2380 Node* GraphKit::opt_iff(Node* region, Node* iff) { 2381 IfNode *opt_iff = _gvn.transform(iff)->as_If(); 2382 2383 // Fast path taken; set region slot 2 2384 Node *fast_taken = _gvn.transform( new IfFalseNode(opt_iff) ); 2385 region->init_req(2,fast_taken); // Capture fast-control 2386 2387 // Fast path not-taken, i.e. slow path 2388 Node *slow_taken = _gvn.transform( new IfTrueNode(opt_iff) ); 2389 return slow_taken; 2390 } 2391 2392 //-----------------------------make_runtime_call------------------------------- 2393 Node* GraphKit::make_runtime_call(int flags, 2394 const TypeFunc* call_type, address call_addr, 2395 const char* call_name, 2396 const TypePtr* adr_type, 2397 // The following parms are all optional. 2398 // The first NULL ends the list. 2399 Node* parm0, Node* parm1, 2400 Node* parm2, Node* parm3, 2401 Node* parm4, Node* parm5, 2402 Node* parm6, Node* parm7) { 2403 // Slow-path call 2404 bool is_leaf = !(flags & RC_NO_LEAF); 2405 bool has_io = (!is_leaf && !(flags & RC_NO_IO)); 2406 if (call_name == NULL) { 2407 assert(!is_leaf, "must supply name for leaf"); 2408 call_name = OptoRuntime::stub_name(call_addr); 2409 } 2410 CallNode* call; 2411 if (!is_leaf) { 2412 call = new CallStaticJavaNode(call_type, call_addr, call_name, 2413 bci(), adr_type); 2414 } else if (flags & RC_NO_FP) { 2415 call = new CallLeafNoFPNode(call_type, call_addr, call_name, adr_type); 2416 } else { 2417 call = new CallLeafNode(call_type, call_addr, call_name, adr_type); 2418 } 2419 2420 // The following is similar to set_edges_for_java_call, 2421 // except that the memory effects of the call are restricted to AliasIdxRaw. 2422 2423 // Slow path call has no side-effects, uses few values 2424 bool wide_in = !(flags & RC_NARROW_MEM); 2425 bool wide_out = (C->get_alias_index(adr_type) == Compile::AliasIdxBot); 2426 2427 Node* prev_mem = NULL; 2428 if (wide_in) { 2429 prev_mem = set_predefined_input_for_runtime_call(call); 2430 } else { 2431 assert(!wide_out, "narrow in => narrow out"); 2432 Node* narrow_mem = memory(adr_type); 2433 prev_mem = reset_memory(); 2434 map()->set_memory(narrow_mem); 2435 set_predefined_input_for_runtime_call(call); 2436 } 2437 2438 // Hook each parm in order. Stop looking at the first NULL. 2439 if (parm0 != NULL) { call->init_req(TypeFunc::Parms+0, parm0); 2440 if (parm1 != NULL) { call->init_req(TypeFunc::Parms+1, parm1); 2441 if (parm2 != NULL) { call->init_req(TypeFunc::Parms+2, parm2); 2442 if (parm3 != NULL) { call->init_req(TypeFunc::Parms+3, parm3); 2443 if (parm4 != NULL) { call->init_req(TypeFunc::Parms+4, parm4); 2444 if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5); 2445 if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6); 2446 if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7); 2447 /* close each nested if ===> */ } } } } } } } } 2448 assert(call->in(call->req()-1) != NULL, "must initialize all parms"); 2449 2450 if (!is_leaf) { 2451 // Non-leaves can block and take safepoints: 2452 add_safepoint_edges(call, ((flags & RC_MUST_THROW) != 0)); 2453 } 2454 // Non-leaves can throw exceptions: 2455 if (has_io) { 2456 call->set_req(TypeFunc::I_O, i_o()); 2457 } 2458 2459 if (flags & RC_UNCOMMON) { 2460 // Set the count to a tiny probability. Cf. Estimate_Block_Frequency. 2461 // (An "if" probability corresponds roughly to an unconditional count. 2462 // Sort of.) 2463 call->set_cnt(PROB_UNLIKELY_MAG(4)); 2464 } 2465 2466 Node* c = _gvn.transform(call); 2467 assert(c == call, "cannot disappear"); 2468 2469 if (wide_out) { 2470 // Slow path call has full side-effects. 2471 set_predefined_output_for_runtime_call(call); 2472 } else { 2473 // Slow path call has few side-effects, and/or sets few values. 2474 set_predefined_output_for_runtime_call(call, prev_mem, adr_type); 2475 } 2476 2477 if (has_io) { 2478 set_i_o(_gvn.transform(new ProjNode(call, TypeFunc::I_O))); 2479 } 2480 return call; 2481 2482 } 2483 2484 //------------------------------merge_memory----------------------------------- 2485 // Merge memory from one path into the current memory state. 2486 void GraphKit::merge_memory(Node* new_mem, Node* region, int new_path) { 2487 for (MergeMemStream mms(merged_memory(), new_mem->as_MergeMem()); mms.next_non_empty2(); ) { 2488 Node* old_slice = mms.force_memory(); 2489 Node* new_slice = mms.memory2(); 2490 if (old_slice != new_slice) { 2491 PhiNode* phi; 2492 if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region) { 2493 if (mms.is_empty()) { 2494 // clone base memory Phi's inputs for this memory slice 2495 assert(old_slice == mms.base_memory(), "sanity"); 2496 phi = PhiNode::make(region, NULL, Type::MEMORY, mms.adr_type(C)); 2497 _gvn.set_type(phi, Type::MEMORY); 2498 for (uint i = 1; i < phi->req(); i++) { 2499 phi->init_req(i, old_slice->in(i)); 2500 } 2501 } else { 2502 phi = old_slice->as_Phi(); // Phi was generated already 2503 } 2504 } else { 2505 phi = PhiNode::make(region, old_slice, Type::MEMORY, mms.adr_type(C)); 2506 _gvn.set_type(phi, Type::MEMORY); 2507 } 2508 phi->set_req(new_path, new_slice); 2509 mms.set_memory(phi); 2510 } 2511 } 2512 } 2513 2514 //------------------------------make_slow_call_ex------------------------------ 2515 // Make the exception handler hookups for the slow call 2516 void GraphKit::make_slow_call_ex(Node* call, ciInstanceKlass* ex_klass, bool separate_io_proj, bool deoptimize) { 2517 if (stopped()) return; 2518 2519 // Make a catch node with just two handlers: fall-through and catch-all 2520 Node* i_o = _gvn.transform( new ProjNode(call, TypeFunc::I_O, separate_io_proj) ); 2521 Node* catc = _gvn.transform( new CatchNode(control(), i_o, 2) ); 2522 Node* norm = _gvn.transform( new CatchProjNode(catc, CatchProjNode::fall_through_index, CatchProjNode::no_handler_bci) ); 2523 Node* excp = _gvn.transform( new CatchProjNode(catc, CatchProjNode::catch_all_index, CatchProjNode::no_handler_bci) ); 2524 2525 { PreserveJVMState pjvms(this); 2526 set_control(excp); 2527 set_i_o(i_o); 2528 2529 if (excp != top()) { 2530 if (deoptimize) { 2531 // Deoptimize if an exception is caught. Don't construct exception state in this case. 2532 uncommon_trap(Deoptimization::Reason_unhandled, 2533 Deoptimization::Action_none); 2534 } else { 2535 // Create an exception state also. 2536 // Use an exact type if the caller has specified a specific exception. 2537 const Type* ex_type = TypeOopPtr::make_from_klass_unique(ex_klass)->cast_to_ptr_type(TypePtr::NotNull); 2538 Node* ex_oop = new CreateExNode(ex_type, control(), i_o); 2539 add_exception_state(make_exception_state(_gvn.transform(ex_oop))); 2540 } 2541 } 2542 } 2543 2544 // Get the no-exception control from the CatchNode. 2545 set_control(norm); 2546 } 2547 2548 static IfNode* gen_subtype_check_compare(Node* ctrl, Node* in1, Node* in2, BoolTest::mask test, float p, PhaseGVN* gvn, BasicType bt) { 2549 Node* cmp = NULL; 2550 switch(bt) { 2551 case T_INT: cmp = new CmpINode(in1, in2); break; 2552 case T_ADDRESS: cmp = new CmpPNode(in1, in2); break; 2553 default: fatal("unexpected comparison type %s", type2name(bt)); 2554 } 2555 gvn->transform(cmp); 2556 Node* bol = gvn->transform(new BoolNode(cmp, test)); 2557 IfNode* iff = new IfNode(ctrl, bol, p, COUNT_UNKNOWN); 2558 gvn->transform(iff); 2559 if (!bol->is_Con()) gvn->record_for_igvn(iff); 2560 return iff; 2561 } 2562 2563 2564 //-------------------------------gen_subtype_check----------------------------- 2565 // Generate a subtyping check. Takes as input the subtype and supertype. 2566 // Returns 2 values: sets the default control() to the true path and returns 2567 // the false path. Only reads invariant memory; sets no (visible) memory. 2568 // The PartialSubtypeCheckNode sets the hidden 1-word cache in the encoding 2569 // but that's not exposed to the optimizer. This call also doesn't take in an 2570 // Object; if you wish to check an Object you need to load the Object's class 2571 // prior to coming here. 2572 Node* Phase::gen_subtype_check(Node* subklass, Node* superklass, Node** ctrl, MergeMemNode* mem, PhaseGVN* gvn) { 2573 Compile* C = gvn->C; 2574 2575 if ((*ctrl)->is_top()) { 2576 return C->top(); 2577 } 2578 2579 // Fast check for identical types, perhaps identical constants. 2580 // The types can even be identical non-constants, in cases 2581 // involving Array.newInstance, Object.clone, etc. 2582 if (subklass == superklass) 2583 return C->top(); // false path is dead; no test needed. 2584 2585 if (gvn->type(superklass)->singleton()) { 2586 ciKlass* superk = gvn->type(superklass)->is_klassptr()->klass(); 2587 ciKlass* subk = gvn->type(subklass)->is_klassptr()->klass(); 2588 2589 // In the common case of an exact superklass, try to fold up the 2590 // test before generating code. You may ask, why not just generate 2591 // the code and then let it fold up? The answer is that the generated 2592 // code will necessarily include null checks, which do not always 2593 // completely fold away. If they are also needless, then they turn 2594 // into a performance loss. Example: 2595 // Foo[] fa = blah(); Foo x = fa[0]; fa[1] = x; 2596 // Here, the type of 'fa' is often exact, so the store check 2597 // of fa[1]=x will fold up, without testing the nullness of x. 2598 switch (C->static_subtype_check(superk, subk)) { 2599 case Compile::SSC_always_false: 2600 { 2601 Node* always_fail = *ctrl; 2602 *ctrl = gvn->C->top(); 2603 return always_fail; 2604 } 2605 case Compile::SSC_always_true: 2606 return C->top(); 2607 case Compile::SSC_easy_test: 2608 { 2609 // Just do a direct pointer compare and be done. 2610 IfNode* iff = gen_subtype_check_compare(*ctrl, subklass, superklass, BoolTest::eq, PROB_STATIC_FREQUENT, gvn, T_ADDRESS); 2611 *ctrl = gvn->transform(new IfTrueNode(iff)); 2612 return gvn->transform(new IfFalseNode(iff)); 2613 } 2614 case Compile::SSC_full_test: 2615 break; 2616 default: 2617 ShouldNotReachHere(); 2618 } 2619 } 2620 2621 // %%% Possible further optimization: Even if the superklass is not exact, 2622 // if the subklass is the unique subtype of the superklass, the check 2623 // will always succeed. We could leave a dependency behind to ensure this. 2624 2625 // First load the super-klass's check-offset 2626 Node *p1 = gvn->transform(new AddPNode(superklass, superklass, gvn->MakeConX(in_bytes(Klass::super_check_offset_offset())))); 2627 Node* m = mem->memory_at(C->get_alias_index(gvn->type(p1)->is_ptr())); 2628 Node *chk_off = gvn->transform(new LoadINode(NULL, m, p1, gvn->type(p1)->is_ptr(), TypeInt::INT, MemNode::unordered)); 2629 int cacheoff_con = in_bytes(Klass::secondary_super_cache_offset()); 2630 bool might_be_cache = (gvn->find_int_con(chk_off, cacheoff_con) == cacheoff_con); 2631 2632 // Load from the sub-klass's super-class display list, or a 1-word cache of 2633 // the secondary superclass list, or a failing value with a sentinel offset 2634 // if the super-klass is an interface or exceptionally deep in the Java 2635 // hierarchy and we have to scan the secondary superclass list the hard way. 2636 // Worst-case type is a little odd: NULL is allowed as a result (usually 2637 // klass loads can never produce a NULL). 2638 Node *chk_off_X = chk_off; 2639 #ifdef _LP64 2640 chk_off_X = gvn->transform(new ConvI2LNode(chk_off_X)); 2641 #endif 2642 Node *p2 = gvn->transform(new AddPNode(subklass,subklass,chk_off_X)); 2643 // For some types like interfaces the following loadKlass is from a 1-word 2644 // cache which is mutable so can't use immutable memory. Other 2645 // types load from the super-class display table which is immutable. 2646 m = mem->memory_at(C->get_alias_index(gvn->type(p2)->is_ptr())); 2647 Node *kmem = might_be_cache ? m : C->immutable_memory(); 2648 Node *nkls = gvn->transform(LoadKlassNode::make(*gvn, NULL, kmem, p2, gvn->type(p2)->is_ptr(), TypeKlassPtr::OBJECT_OR_NULL)); 2649 2650 // Compile speed common case: ARE a subtype and we canNOT fail 2651 if( superklass == nkls ) 2652 return C->top(); // false path is dead; no test needed. 2653 2654 // See if we get an immediate positive hit. Happens roughly 83% of the 2655 // time. Test to see if the value loaded just previously from the subklass 2656 // is exactly the superklass. 2657 IfNode *iff1 = gen_subtype_check_compare(*ctrl, superklass, nkls, BoolTest::eq, PROB_LIKELY(0.83f), gvn, T_ADDRESS); 2658 Node *iftrue1 = gvn->transform( new IfTrueNode (iff1)); 2659 *ctrl = gvn->transform(new IfFalseNode(iff1)); 2660 2661 // Compile speed common case: Check for being deterministic right now. If 2662 // chk_off is a constant and not equal to cacheoff then we are NOT a 2663 // subklass. In this case we need exactly the 1 test above and we can 2664 // return those results immediately. 2665 if (!might_be_cache) { 2666 Node* not_subtype_ctrl = *ctrl; 2667 *ctrl = iftrue1; // We need exactly the 1 test above 2668 return not_subtype_ctrl; 2669 } 2670 2671 // Gather the various success & failures here 2672 RegionNode *r_ok_subtype = new RegionNode(4); 2673 gvn->record_for_igvn(r_ok_subtype); 2674 RegionNode *r_not_subtype = new RegionNode(3); 2675 gvn->record_for_igvn(r_not_subtype); 2676 2677 r_ok_subtype->init_req(1, iftrue1); 2678 2679 // Check for immediate negative hit. Happens roughly 11% of the time (which 2680 // is roughly 63% of the remaining cases). Test to see if the loaded 2681 // check-offset points into the subklass display list or the 1-element 2682 // cache. If it points to the display (and NOT the cache) and the display 2683 // missed then it's not a subtype. 2684 Node *cacheoff = gvn->intcon(cacheoff_con); 2685 IfNode *iff2 = gen_subtype_check_compare(*ctrl, chk_off, cacheoff, BoolTest::ne, PROB_LIKELY(0.63f), gvn, T_INT); 2686 r_not_subtype->init_req(1, gvn->transform(new IfTrueNode (iff2))); 2687 *ctrl = gvn->transform(new IfFalseNode(iff2)); 2688 2689 // Check for self. Very rare to get here, but it is taken 1/3 the time. 2690 // No performance impact (too rare) but allows sharing of secondary arrays 2691 // which has some footprint reduction. 2692 IfNode *iff3 = gen_subtype_check_compare(*ctrl, subklass, superklass, BoolTest::eq, PROB_LIKELY(0.36f), gvn, T_ADDRESS); 2693 r_ok_subtype->init_req(2, gvn->transform(new IfTrueNode(iff3))); 2694 *ctrl = gvn->transform(new IfFalseNode(iff3)); 2695 2696 // -- Roads not taken here: -- 2697 // We could also have chosen to perform the self-check at the beginning 2698 // of this code sequence, as the assembler does. This would not pay off 2699 // the same way, since the optimizer, unlike the assembler, can perform 2700 // static type analysis to fold away many successful self-checks. 2701 // Non-foldable self checks work better here in second position, because 2702 // the initial primary superclass check subsumes a self-check for most 2703 // types. An exception would be a secondary type like array-of-interface, 2704 // which does not appear in its own primary supertype display. 2705 // Finally, we could have chosen to move the self-check into the 2706 // PartialSubtypeCheckNode, and from there out-of-line in a platform 2707 // dependent manner. But it is worthwhile to have the check here, 2708 // where it can be perhaps be optimized. The cost in code space is 2709 // small (register compare, branch). 2710 2711 // Now do a linear scan of the secondary super-klass array. Again, no real 2712 // performance impact (too rare) but it's gotta be done. 2713 // Since the code is rarely used, there is no penalty for moving it 2714 // out of line, and it can only improve I-cache density. 2715 // The decision to inline or out-of-line this final check is platform 2716 // dependent, and is found in the AD file definition of PartialSubtypeCheck. 2717 Node* psc = gvn->transform( 2718 new PartialSubtypeCheckNode(*ctrl, subklass, superklass)); 2719 2720 IfNode *iff4 = gen_subtype_check_compare(*ctrl, psc, gvn->zerocon(T_OBJECT), BoolTest::ne, PROB_FAIR, gvn, T_ADDRESS); 2721 r_not_subtype->init_req(2, gvn->transform(new IfTrueNode (iff4))); 2722 r_ok_subtype ->init_req(3, gvn->transform(new IfFalseNode(iff4))); 2723 2724 // Return false path; set default control to true path. 2725 *ctrl = gvn->transform(r_ok_subtype); 2726 return gvn->transform(r_not_subtype); 2727 } 2728 2729 // Profile-driven exact type check: 2730 Node* GraphKit::type_check_receiver(Node* receiver, ciKlass* klass, 2731 float prob, 2732 Node* *casted_receiver) { 2733 const TypeKlassPtr* tklass = TypeKlassPtr::make(klass); 2734 Node* recv_klass = load_object_klass(receiver); 2735 Node* want_klass = makecon(tklass); 2736 Node* cmp = _gvn.transform( new CmpPNode(recv_klass, want_klass) ); 2737 Node* bol = _gvn.transform( new BoolNode(cmp, BoolTest::eq) ); 2738 IfNode* iff = create_and_xform_if(control(), bol, prob, COUNT_UNKNOWN); 2739 set_control( _gvn.transform( new IfTrueNode (iff) )); 2740 Node* fail = _gvn.transform( new IfFalseNode(iff) ); 2741 2742 const TypeOopPtr* recv_xtype = tklass->as_instance_type(); 2743 assert(recv_xtype->klass_is_exact(), ""); 2744 2745 // Subsume downstream occurrences of receiver with a cast to 2746 // recv_xtype, since now we know what the type will be. 2747 Node* cast = new CheckCastPPNode(control(), receiver, recv_xtype); 2748 (*casted_receiver) = _gvn.transform(cast); 2749 // (User must make the replace_in_map call.) 2750 2751 return fail; 2752 } 2753 2754 2755 //------------------------------seems_never_null------------------------------- 2756 // Use null_seen information if it is available from the profile. 2757 // If we see an unexpected null at a type check we record it and force a 2758 // recompile; the offending check will be recompiled to handle NULLs. 2759 // If we see several offending BCIs, then all checks in the 2760 // method will be recompiled. 2761 bool GraphKit::seems_never_null(Node* obj, ciProfileData* data, bool& speculating) { 2762 speculating = !_gvn.type(obj)->speculative_maybe_null(); 2763 Deoptimization::DeoptReason reason = Deoptimization::reason_null_check(speculating); 2764 if (UncommonNullCast // Cutout for this technique 2765 && obj != null() // And not the -Xcomp stupid case? 2766 && !too_many_traps(reason) 2767 ) { 2768 if (speculating) { 2769 return true; 2770 } 2771 if (data == NULL) 2772 // Edge case: no mature data. Be optimistic here. 2773 return true; 2774 // If the profile has not seen a null, assume it won't happen. 2775 assert(java_bc() == Bytecodes::_checkcast || 2776 java_bc() == Bytecodes::_instanceof || 2777 java_bc() == Bytecodes::_aastore, "MDO must collect null_seen bit here"); 2778 return !data->as_BitData()->null_seen(); 2779 } 2780 speculating = false; 2781 return false; 2782 } 2783 2784 //------------------------maybe_cast_profiled_receiver------------------------- 2785 // If the profile has seen exactly one type, narrow to exactly that type. 2786 // Subsequent type checks will always fold up. 2787 Node* GraphKit::maybe_cast_profiled_receiver(Node* not_null_obj, 2788 ciKlass* require_klass, 2789 ciKlass* spec_klass, 2790 bool safe_for_replace) { 2791 if (!UseTypeProfile || !TypeProfileCasts) return NULL; 2792 2793 Deoptimization::DeoptReason reason = Deoptimization::reason_class_check(spec_klass != NULL); 2794 2795 // Make sure we haven't already deoptimized from this tactic. 2796 if (too_many_traps(reason) || too_many_recompiles(reason)) 2797 return NULL; 2798 2799 // (No, this isn't a call, but it's enough like a virtual call 2800 // to use the same ciMethod accessor to get the profile info...) 2801 // If we have a speculative type use it instead of profiling (which 2802 // may not help us) 2803 ciKlass* exact_kls = spec_klass == NULL ? profile_has_unique_klass() : spec_klass; 2804 if (exact_kls != NULL) {// no cast failures here 2805 if (require_klass == NULL || 2806 C->static_subtype_check(require_klass, exact_kls) == Compile::SSC_always_true) { 2807 // If we narrow the type to match what the type profile sees or 2808 // the speculative type, we can then remove the rest of the 2809 // cast. 2810 // This is a win, even if the exact_kls is very specific, 2811 // because downstream operations, such as method calls, 2812 // will often benefit from the sharper type. 2813 Node* exact_obj = not_null_obj; // will get updated in place... 2814 Node* slow_ctl = type_check_receiver(exact_obj, exact_kls, 1.0, 2815 &exact_obj); 2816 { PreserveJVMState pjvms(this); 2817 set_control(slow_ctl); 2818 uncommon_trap_exact(reason, Deoptimization::Action_maybe_recompile); 2819 } 2820 if (safe_for_replace) { 2821 replace_in_map(not_null_obj, exact_obj); 2822 } 2823 return exact_obj; 2824 } 2825 // assert(ssc == Compile::SSC_always_true)... except maybe the profile lied to us. 2826 } 2827 2828 return NULL; 2829 } 2830 2831 /** 2832 * Cast obj to type and emit guard unless we had too many traps here 2833 * already 2834 * 2835 * @param obj node being casted 2836 * @param type type to cast the node to 2837 * @param not_null true if we know node cannot be null 2838 */ 2839 Node* GraphKit::maybe_cast_profiled_obj(Node* obj, 2840 ciKlass* type, 2841 bool not_null) { 2842 if (stopped()) { 2843 return obj; 2844 } 2845 2846 // type == NULL if profiling tells us this object is always null 2847 if (type != NULL) { 2848 Deoptimization::DeoptReason class_reason = Deoptimization::Reason_speculate_class_check; 2849 Deoptimization::DeoptReason null_reason = Deoptimization::Reason_speculate_null_check; 2850 2851 if (!too_many_traps(null_reason) && !too_many_recompiles(null_reason) && 2852 !too_many_traps(class_reason) && 2853 !too_many_recompiles(class_reason)) { 2854 Node* not_null_obj = NULL; 2855 // not_null is true if we know the object is not null and 2856 // there's no need for a null check 2857 if (!not_null) { 2858 Node* null_ctl = top(); 2859 not_null_obj = null_check_oop(obj, &null_ctl, true, true, true); 2860 assert(null_ctl->is_top(), "no null control here"); 2861 } else { 2862 not_null_obj = obj; 2863 } 2864 2865 Node* exact_obj = not_null_obj; 2866 ciKlass* exact_kls = type; 2867 Node* slow_ctl = type_check_receiver(exact_obj, exact_kls, 1.0, 2868 &exact_obj); 2869 { 2870 PreserveJVMState pjvms(this); 2871 set_control(slow_ctl); 2872 uncommon_trap_exact(class_reason, Deoptimization::Action_maybe_recompile); 2873 } 2874 replace_in_map(not_null_obj, exact_obj); 2875 obj = exact_obj; 2876 } 2877 } else { 2878 if (!too_many_traps(Deoptimization::Reason_null_assert) && 2879 !too_many_recompiles(Deoptimization::Reason_null_assert)) { 2880 Node* exact_obj = null_assert(obj); 2881 replace_in_map(obj, exact_obj); 2882 obj = exact_obj; 2883 } 2884 } 2885 return obj; 2886 } 2887 2888 //-------------------------------gen_instanceof-------------------------------- 2889 // Generate an instance-of idiom. Used by both the instance-of bytecode 2890 // and the reflective instance-of call. 2891 Node* GraphKit::gen_instanceof(Node* obj, Node* superklass, bool safe_for_replace) { 2892 kill_dead_locals(); // Benefit all the uncommon traps 2893 assert( !stopped(), "dead parse path should be checked in callers" ); 2894 assert(!TypePtr::NULL_PTR->higher_equal(_gvn.type(superklass)->is_klassptr()), 2895 "must check for not-null not-dead klass in callers"); 2896 2897 // Make the merge point 2898 enum { _obj_path = 1, _fail_path, _null_path, PATH_LIMIT }; 2899 RegionNode* region = new RegionNode(PATH_LIMIT); 2900 Node* phi = new PhiNode(region, TypeInt::BOOL); 2901 C->set_has_split_ifs(true); // Has chance for split-if optimization 2902 2903 ciProfileData* data = NULL; 2904 if (java_bc() == Bytecodes::_instanceof) { // Only for the bytecode 2905 data = method()->method_data()->bci_to_data(bci()); 2906 } 2907 bool speculative_not_null = false; 2908 bool never_see_null = (ProfileDynamicTypes // aggressive use of profile 2909 && seems_never_null(obj, data, speculative_not_null)); 2910 2911 // Null check; get casted pointer; set region slot 3 2912 Node* null_ctl = top(); 2913 Node* not_null_obj = null_check_oop(obj, &null_ctl, never_see_null, safe_for_replace, speculative_not_null); 2914 2915 // If not_null_obj is dead, only null-path is taken 2916 if (stopped()) { // Doing instance-of on a NULL? 2917 set_control(null_ctl); 2918 return intcon(0); 2919 } 2920 region->init_req(_null_path, null_ctl); 2921 phi ->init_req(_null_path, intcon(0)); // Set null path value 2922 if (null_ctl == top()) { 2923 // Do this eagerly, so that pattern matches like is_diamond_phi 2924 // will work even during parsing. 2925 assert(_null_path == PATH_LIMIT-1, "delete last"); 2926 region->del_req(_null_path); 2927 phi ->del_req(_null_path); 2928 } 2929 2930 // Do we know the type check always succeed? 2931 bool known_statically = false; 2932 if (_gvn.type(superklass)->singleton()) { 2933 ciKlass* superk = _gvn.type(superklass)->is_klassptr()->klass(); 2934 ciKlass* subk = _gvn.type(obj)->is_oopptr()->klass(); 2935 if (subk != NULL && subk->is_loaded()) { 2936 int static_res = C->static_subtype_check(superk, subk); 2937 known_statically = (static_res == Compile::SSC_always_true || static_res == Compile::SSC_always_false); 2938 } 2939 } 2940 2941 if (known_statically && UseTypeSpeculation) { 2942 // If we know the type check always succeeds then we don't use the 2943 // profiling data at this bytecode. Don't lose it, feed it to the 2944 // type system as a speculative type. 2945 not_null_obj = record_profiled_receiver_for_speculation(not_null_obj); 2946 } else { 2947 const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr(); 2948 // We may not have profiling here or it may not help us. If we 2949 // have a speculative type use it to perform an exact cast. 2950 ciKlass* spec_obj_type = obj_type->speculative_type(); 2951 if (spec_obj_type != NULL || (ProfileDynamicTypes && data != NULL)) { 2952 Node* cast_obj = maybe_cast_profiled_receiver(not_null_obj, NULL, spec_obj_type, safe_for_replace); 2953 if (stopped()) { // Profile disagrees with this path. 2954 set_control(null_ctl); // Null is the only remaining possibility. 2955 return intcon(0); 2956 } 2957 if (cast_obj != NULL) { 2958 not_null_obj = cast_obj; 2959 } 2960 } 2961 } 2962 2963 if (ShenandoahVerifyReadsToFromSpace) { 2964 not_null_obj = shenandoah_read_barrier(not_null_obj); 2965 } 2966 2967 // Load the object's klass 2968 Node* obj_klass = load_object_klass(not_null_obj); 2969 2970 // Generate the subtype check 2971 Node* not_subtype_ctrl = gen_subtype_check(obj_klass, superklass); 2972 2973 // Plug in the success path to the general merge in slot 1. 2974 region->init_req(_obj_path, control()); 2975 phi ->init_req(_obj_path, intcon(1)); 2976 2977 // Plug in the failing path to the general merge in slot 2. 2978 region->init_req(_fail_path, not_subtype_ctrl); 2979 phi ->init_req(_fail_path, intcon(0)); 2980 2981 // Return final merged results 2982 set_control( _gvn.transform(region) ); 2983 record_for_igvn(region); 2984 return _gvn.transform(phi); 2985 } 2986 2987 //-------------------------------gen_checkcast--------------------------------- 2988 // Generate a checkcast idiom. Used by both the checkcast bytecode and the 2989 // array store bytecode. Stack must be as-if BEFORE doing the bytecode so the 2990 // uncommon-trap paths work. Adjust stack after this call. 2991 // If failure_control is supplied and not null, it is filled in with 2992 // the control edge for the cast failure. Otherwise, an appropriate 2993 // uncommon trap or exception is thrown. 2994 Node* GraphKit::gen_checkcast(Node *obj, Node* superklass, 2995 Node* *failure_control) { 2996 kill_dead_locals(); // Benefit all the uncommon traps 2997 const TypeKlassPtr *tk = _gvn.type(superklass)->is_klassptr(); 2998 const Type *toop = TypeOopPtr::make_from_klass(tk->klass()); 2999 3000 // Fast cutout: Check the case that the cast is vacuously true. 3001 // This detects the common cases where the test will short-circuit 3002 // away completely. We do this before we perform the null check, 3003 // because if the test is going to turn into zero code, we don't 3004 // want a residual null check left around. (Causes a slowdown, 3005 // for example, in some objArray manipulations, such as a[i]=a[j].) 3006 if (tk->singleton()) { 3007 const TypeOopPtr* objtp = _gvn.type(obj)->isa_oopptr(); 3008 if (objtp != NULL && objtp->klass() != NULL) { 3009 switch (C->static_subtype_check(tk->klass(), objtp->klass())) { 3010 case Compile::SSC_always_true: 3011 // If we know the type check always succeed then we don't use 3012 // the profiling data at this bytecode. Don't lose it, feed it 3013 // to the type system as a speculative type. 3014 return record_profiled_receiver_for_speculation(obj); 3015 case Compile::SSC_always_false: 3016 // It needs a null check because a null will *pass* the cast check. 3017 // A non-null value will always produce an exception. 3018 return null_assert(obj); 3019 } 3020 } 3021 } 3022 3023 ciProfileData* data = NULL; 3024 bool safe_for_replace = false; 3025 if (failure_control == NULL) { // use MDO in regular case only 3026 assert(java_bc() == Bytecodes::_aastore || 3027 java_bc() == Bytecodes::_checkcast, 3028 "interpreter profiles type checks only for these BCs"); 3029 data = method()->method_data()->bci_to_data(bci()); 3030 safe_for_replace = true; 3031 } 3032 3033 // Make the merge point 3034 enum { _obj_path = 1, _null_path, PATH_LIMIT }; 3035 RegionNode* region = new RegionNode(PATH_LIMIT); 3036 Node* phi = new PhiNode(region, toop); 3037 C->set_has_split_ifs(true); // Has chance for split-if optimization 3038 3039 // Use null-cast information if it is available 3040 bool speculative_not_null = false; 3041 bool never_see_null = ((failure_control == NULL) // regular case only 3042 && seems_never_null(obj, data, speculative_not_null)); 3043 3044 // Null check; get casted pointer; set region slot 3 3045 Node* null_ctl = top(); 3046 Node* not_null_obj = null_check_oop(obj, &null_ctl, never_see_null, safe_for_replace, speculative_not_null); 3047 3048 if (ShenandoahVerifyReadsToFromSpace) { 3049 not_null_obj = shenandoah_read_barrier(not_null_obj); 3050 } 3051 3052 // If not_null_obj is dead, only null-path is taken 3053 if (stopped()) { // Doing instance-of on a NULL? 3054 set_control(null_ctl); 3055 return null(); 3056 } 3057 region->init_req(_null_path, null_ctl); 3058 phi ->init_req(_null_path, null()); // Set null path value 3059 if (null_ctl == top()) { 3060 // Do this eagerly, so that pattern matches like is_diamond_phi 3061 // will work even during parsing. 3062 assert(_null_path == PATH_LIMIT-1, "delete last"); 3063 region->del_req(_null_path); 3064 phi ->del_req(_null_path); 3065 } 3066 3067 Node* cast_obj = NULL; 3068 if (tk->klass_is_exact()) { 3069 // The following optimization tries to statically cast the speculative type of the object 3070 // (for example obtained during profiling) to the type of the superklass and then do a 3071 // dynamic check that the type of the object is what we expect. To work correctly 3072 // for checkcast and aastore the type of superklass should be exact. 3073 const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr(); 3074 // We may not have profiling here or it may not help us. If we have 3075 // a speculative type use it to perform an exact cast. 3076 ciKlass* spec_obj_type = obj_type->speculative_type(); 3077 if (spec_obj_type != NULL || data != NULL) { 3078 cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace); 3079 if (cast_obj != NULL) { 3080 if (failure_control != NULL) // failure is now impossible 3081 (*failure_control) = top(); 3082 // adjust the type of the phi to the exact klass: 3083 phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR)); 3084 } 3085 } 3086 } 3087 3088 if (cast_obj == NULL) { 3089 // Load the object's klass 3090 Node* obj_klass = load_object_klass(not_null_obj); 3091 3092 // Generate the subtype check 3093 Node* not_subtype_ctrl = gen_subtype_check( obj_klass, superklass ); 3094 3095 // Plug in success path into the merge 3096 cast_obj = _gvn.transform(new CheckCastPPNode(control(), not_null_obj, toop)); 3097 // Failure path ends in uncommon trap (or may be dead - failure impossible) 3098 if (failure_control == NULL) { 3099 if (not_subtype_ctrl != top()) { // If failure is possible 3100 PreserveJVMState pjvms(this); 3101 set_control(not_subtype_ctrl); 3102 builtin_throw(Deoptimization::Reason_class_check, obj_klass); 3103 } 3104 } else { 3105 (*failure_control) = not_subtype_ctrl; 3106 } 3107 } 3108 3109 region->init_req(_obj_path, control()); 3110 phi ->init_req(_obj_path, cast_obj); 3111 3112 // A merge of NULL or Casted-NotNull obj 3113 Node* res = _gvn.transform(phi); 3114 3115 // Note I do NOT always 'replace_in_map(obj,result)' here. 3116 // if( tk->klass()->can_be_primary_super() ) 3117 // This means that if I successfully store an Object into an array-of-String 3118 // I 'forget' that the Object is really now known to be a String. I have to 3119 // do this because we don't have true union types for interfaces - if I store 3120 // a Baz into an array-of-Interface and then tell the optimizer it's an 3121 // Interface, I forget that it's also a Baz and cannot do Baz-like field 3122 // references to it. FIX THIS WHEN UNION TYPES APPEAR! 3123 // replace_in_map( obj, res ); 3124 3125 // Return final merged results 3126 set_control( _gvn.transform(region) ); 3127 record_for_igvn(region); 3128 return res; 3129 } 3130 3131 //------------------------------next_monitor----------------------------------- 3132 // What number should be given to the next monitor? 3133 int GraphKit::next_monitor() { 3134 int current = jvms()->monitor_depth()* C->sync_stack_slots(); 3135 int next = current + C->sync_stack_slots(); 3136 // Keep the toplevel high water mark current: 3137 if (C->fixed_slots() < next) C->set_fixed_slots(next); 3138 return current; 3139 } 3140 3141 //------------------------------insert_mem_bar--------------------------------- 3142 // Memory barrier to avoid floating things around 3143 // The membar serves as a pinch point between both control and all memory slices. 3144 Node* GraphKit::insert_mem_bar(int opcode, Node* precedent) { 3145 MemBarNode* mb = MemBarNode::make(C, opcode, Compile::AliasIdxBot, precedent); 3146 mb->init_req(TypeFunc::Control, control()); 3147 mb->init_req(TypeFunc::Memory, reset_memory()); 3148 Node* membar = _gvn.transform(mb); 3149 set_control(_gvn.transform(new ProjNode(membar, TypeFunc::Control))); 3150 set_all_memory_call(membar); 3151 return membar; 3152 } 3153 3154 //-------------------------insert_mem_bar_volatile---------------------------- 3155 // Memory barrier to avoid floating things around 3156 // The membar serves as a pinch point between both control and memory(alias_idx). 3157 // If you want to make a pinch point on all memory slices, do not use this 3158 // function (even with AliasIdxBot); use insert_mem_bar() instead. 3159 Node* GraphKit::insert_mem_bar_volatile(int opcode, int alias_idx, Node* precedent) { 3160 // When Parse::do_put_xxx updates a volatile field, it appends a series 3161 // of MemBarVolatile nodes, one for *each* volatile field alias category. 3162 // The first membar is on the same memory slice as the field store opcode. 3163 // This forces the membar to follow the store. (Bug 6500685 broke this.) 3164 // All the other membars (for other volatile slices, including AliasIdxBot, 3165 // which stands for all unknown volatile slices) are control-dependent 3166 // on the first membar. This prevents later volatile loads or stores 3167 // from sliding up past the just-emitted store. 3168 3169 MemBarNode* mb = MemBarNode::make(C, opcode, alias_idx, precedent); 3170 mb->set_req(TypeFunc::Control,control()); 3171 if (alias_idx == Compile::AliasIdxBot) { 3172 mb->set_req(TypeFunc::Memory, merged_memory()->base_memory()); 3173 } else { 3174 assert(!(opcode == Op_Initialize && alias_idx != Compile::AliasIdxRaw), "fix caller"); 3175 mb->set_req(TypeFunc::Memory, memory(alias_idx)); 3176 } 3177 Node* membar = _gvn.transform(mb); 3178 set_control(_gvn.transform(new ProjNode(membar, TypeFunc::Control))); 3179 if (alias_idx == Compile::AliasIdxBot) { 3180 merged_memory()->set_base_memory(_gvn.transform(new ProjNode(membar, TypeFunc::Memory))); 3181 } else { 3182 set_memory(_gvn.transform(new ProjNode(membar, TypeFunc::Memory)),alias_idx); 3183 } 3184 return membar; 3185 } 3186 3187 //------------------------------shared_lock------------------------------------ 3188 // Emit locking code. 3189 FastLockNode* GraphKit::shared_lock(Node* obj) { 3190 // bci is either a monitorenter bc or InvocationEntryBci 3191 // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces 3192 assert(SynchronizationEntryBCI == InvocationEntryBci, ""); 3193 3194 if( !GenerateSynchronizationCode ) 3195 return NULL; // Not locking things? 3196 if (stopped()) // Dead monitor? 3197 return NULL; 3198 3199 assert(dead_locals_are_killed(), "should kill locals before sync. point"); 3200 3201 obj = shenandoah_write_barrier(obj); 3202 3203 // Box the stack location 3204 Node* box = _gvn.transform(new BoxLockNode(next_monitor())); 3205 Node* mem = reset_memory(); 3206 3207 FastLockNode * flock = _gvn.transform(new FastLockNode(0, obj, box) )->as_FastLock(); 3208 if (UseBiasedLocking && PrintPreciseBiasedLockingStatistics) { 3209 // Create the counters for this fast lock. 3210 flock->create_lock_counter(sync_jvms()); // sync_jvms used to get current bci 3211 } 3212 3213 // Create the rtm counters for this fast lock if needed. 3214 flock->create_rtm_lock_counter(sync_jvms()); // sync_jvms used to get current bci 3215 3216 // Add monitor to debug info for the slow path. If we block inside the 3217 // slow path and de-opt, we need the monitor hanging around 3218 map()->push_monitor( flock ); 3219 3220 const TypeFunc *tf = LockNode::lock_type(); 3221 LockNode *lock = new LockNode(C, tf); 3222 3223 lock->init_req( TypeFunc::Control, control() ); 3224 lock->init_req( TypeFunc::Memory , mem ); 3225 lock->init_req( TypeFunc::I_O , top() ) ; // does no i/o 3226 lock->init_req( TypeFunc::FramePtr, frameptr() ); 3227 lock->init_req( TypeFunc::ReturnAdr, top() ); 3228 3229 lock->init_req(TypeFunc::Parms + 0, obj); 3230 lock->init_req(TypeFunc::Parms + 1, box); 3231 lock->init_req(TypeFunc::Parms + 2, flock); 3232 add_safepoint_edges(lock); 3233 3234 lock = _gvn.transform( lock )->as_Lock(); 3235 3236 // lock has no side-effects, sets few values 3237 set_predefined_output_for_runtime_call(lock, mem, TypeRawPtr::BOTTOM); 3238 3239 insert_mem_bar(Op_MemBarAcquireLock); 3240 3241 // Add this to the worklist so that the lock can be eliminated 3242 record_for_igvn(lock); 3243 3244 #ifndef PRODUCT 3245 if (PrintLockStatistics) { 3246 // Update the counter for this lock. Don't bother using an atomic 3247 // operation since we don't require absolute accuracy. 3248 lock->create_lock_counter(map()->jvms()); 3249 increment_counter(lock->counter()->addr()); 3250 } 3251 #endif 3252 3253 return flock; 3254 } 3255 3256 3257 //------------------------------shared_unlock---------------------------------- 3258 // Emit unlocking code. 3259 void GraphKit::shared_unlock(Node* box, Node* obj) { 3260 // bci is either a monitorenter bc or InvocationEntryBci 3261 // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces 3262 assert(SynchronizationEntryBCI == InvocationEntryBci, ""); 3263 3264 if( !GenerateSynchronizationCode ) 3265 return; 3266 if (stopped()) { // Dead monitor? 3267 map()->pop_monitor(); // Kill monitor from debug info 3268 return; 3269 } 3270 3271 // Memory barrier to avoid floating things down past the locked region 3272 insert_mem_bar(Op_MemBarReleaseLock); 3273 3274 const TypeFunc *tf = OptoRuntime::complete_monitor_exit_Type(); 3275 UnlockNode *unlock = new UnlockNode(C, tf); 3276 #ifdef ASSERT 3277 unlock->set_dbg_jvms(sync_jvms()); 3278 #endif 3279 uint raw_idx = Compile::AliasIdxRaw; 3280 unlock->init_req( TypeFunc::Control, control() ); 3281 unlock->init_req( TypeFunc::Memory , memory(raw_idx) ); 3282 unlock->init_req( TypeFunc::I_O , top() ) ; // does no i/o 3283 unlock->init_req( TypeFunc::FramePtr, frameptr() ); 3284 unlock->init_req( TypeFunc::ReturnAdr, top() ); 3285 3286 unlock->init_req(TypeFunc::Parms + 0, obj); 3287 unlock->init_req(TypeFunc::Parms + 1, box); 3288 unlock = _gvn.transform(unlock)->as_Unlock(); 3289 3290 Node* mem = reset_memory(); 3291 3292 // unlock has no side-effects, sets few values 3293 set_predefined_output_for_runtime_call(unlock, mem, TypeRawPtr::BOTTOM); 3294 3295 // Kill monitor from debug info 3296 map()->pop_monitor( ); 3297 } 3298 3299 //-------------------------------get_layout_helper----------------------------- 3300 // If the given klass is a constant or known to be an array, 3301 // fetch the constant layout helper value into constant_value 3302 // and return (Node*)NULL. Otherwise, load the non-constant 3303 // layout helper value, and return the node which represents it. 3304 // This two-faced routine is useful because allocation sites 3305 // almost always feature constant types. 3306 Node* GraphKit::get_layout_helper(Node* klass_node, jint& constant_value) { 3307 const TypeKlassPtr* inst_klass = _gvn.type(klass_node)->isa_klassptr(); 3308 if (!StressReflectiveCode && inst_klass != NULL) { 3309 ciKlass* klass = inst_klass->klass(); 3310 bool xklass = inst_klass->klass_is_exact(); 3311 if (xklass || klass->is_array_klass()) { 3312 jint lhelper = klass->layout_helper(); 3313 if (lhelper != Klass::_lh_neutral_value) { 3314 constant_value = lhelper; 3315 return (Node*) NULL; 3316 } 3317 } 3318 } 3319 constant_value = Klass::_lh_neutral_value; // put in a known value 3320 Node* lhp = basic_plus_adr(klass_node, klass_node, in_bytes(Klass::layout_helper_offset())); 3321 return make_load(NULL, lhp, TypeInt::INT, T_INT, MemNode::unordered); 3322 } 3323 3324 // We just put in an allocate/initialize with a big raw-memory effect. 3325 // Hook selected additional alias categories on the initialization. 3326 static void hook_memory_on_init(GraphKit& kit, int alias_idx, 3327 MergeMemNode* init_in_merge, 3328 Node* init_out_raw) { 3329 DEBUG_ONLY(Node* init_in_raw = init_in_merge->base_memory()); 3330 assert(init_in_merge->memory_at(alias_idx) == init_in_raw, ""); 3331 3332 Node* prevmem = kit.memory(alias_idx); 3333 init_in_merge->set_memory_at(alias_idx, prevmem); 3334 kit.set_memory(init_out_raw, alias_idx); 3335 } 3336 3337 //---------------------------set_output_for_allocation------------------------- 3338 Node* GraphKit::set_output_for_allocation(AllocateNode* alloc, 3339 const TypeOopPtr* oop_type, 3340 bool deoptimize_on_exception) { 3341 int rawidx = Compile::AliasIdxRaw; 3342 alloc->set_req( TypeFunc::FramePtr, frameptr() ); 3343 add_safepoint_edges(alloc); 3344 Node* allocx = _gvn.transform(alloc); 3345 set_control( _gvn.transform(new ProjNode(allocx, TypeFunc::Control) ) ); 3346 // create memory projection for i_o 3347 set_memory ( _gvn.transform( new ProjNode(allocx, TypeFunc::Memory, true) ), rawidx ); 3348 make_slow_call_ex(allocx, env()->Throwable_klass(), true, deoptimize_on_exception); 3349 3350 // create a memory projection as for the normal control path 3351 Node* malloc = _gvn.transform(new ProjNode(allocx, TypeFunc::Memory)); 3352 set_memory(malloc, rawidx); 3353 3354 // a normal slow-call doesn't change i_o, but an allocation does 3355 // we create a separate i_o projection for the normal control path 3356 set_i_o(_gvn.transform( new ProjNode(allocx, TypeFunc::I_O, false) ) ); 3357 Node* rawoop = _gvn.transform( new ProjNode(allocx, TypeFunc::Parms) ); 3358 3359 // put in an initialization barrier 3360 InitializeNode* init = insert_mem_bar_volatile(Op_Initialize, rawidx, 3361 rawoop)->as_Initialize(); 3362 assert(alloc->initialization() == init, "2-way macro link must work"); 3363 assert(init ->allocation() == alloc, "2-way macro link must work"); 3364 { 3365 // Extract memory strands which may participate in the new object's 3366 // initialization, and source them from the new InitializeNode. 3367 // This will allow us to observe initializations when they occur, 3368 // and link them properly (as a group) to the InitializeNode. 3369 assert(init->in(InitializeNode::Memory) == malloc, ""); 3370 MergeMemNode* minit_in = MergeMemNode::make(malloc); 3371 init->set_req(InitializeNode::Memory, minit_in); 3372 record_for_igvn(minit_in); // fold it up later, if possible 3373 Node* minit_out = memory(rawidx); 3374 assert(minit_out->is_Proj() && minit_out->in(0) == init, ""); 3375 if (oop_type->isa_aryptr()) { 3376 const TypePtr* telemref = oop_type->add_offset(Type::OffsetBot); 3377 int elemidx = C->get_alias_index(telemref); 3378 hook_memory_on_init(*this, elemidx, minit_in, minit_out); 3379 } else if (oop_type->isa_instptr()) { 3380 ciInstanceKlass* ik = oop_type->klass()->as_instance_klass(); 3381 for (int i = 0, len = ik->nof_nonstatic_fields(); i < len; i++) { 3382 ciField* field = ik->nonstatic_field_at(i); 3383 if (field->offset() >= TrackedInitializationLimit * HeapWordSize) 3384 continue; // do not bother to track really large numbers of fields 3385 // Find (or create) the alias category for this field: 3386 int fieldidx = C->alias_type(field)->index(); 3387 hook_memory_on_init(*this, fieldidx, minit_in, minit_out); 3388 } 3389 } 3390 } 3391 3392 // Cast raw oop to the real thing... 3393 Node* javaoop = new CheckCastPPNode(control(), rawoop, oop_type); 3394 javaoop = _gvn.transform(javaoop); 3395 C->set_recent_alloc(control(), javaoop); 3396 assert(just_allocated_object(control()) == javaoop, "just allocated"); 3397 3398 #ifdef ASSERT 3399 { // Verify that the AllocateNode::Ideal_allocation recognizers work: 3400 assert(AllocateNode::Ideal_allocation(rawoop, &_gvn) == alloc, 3401 "Ideal_allocation works"); 3402 assert(AllocateNode::Ideal_allocation(javaoop, &_gvn) == alloc, 3403 "Ideal_allocation works"); 3404 if (alloc->is_AllocateArray()) { 3405 assert(AllocateArrayNode::Ideal_array_allocation(rawoop, &_gvn) == alloc->as_AllocateArray(), 3406 "Ideal_allocation works"); 3407 assert(AllocateArrayNode::Ideal_array_allocation(javaoop, &_gvn) == alloc->as_AllocateArray(), 3408 "Ideal_allocation works"); 3409 } else { 3410 assert(alloc->in(AllocateNode::ALength)->is_top(), "no length, please"); 3411 } 3412 } 3413 #endif //ASSERT 3414 3415 return javaoop; 3416 } 3417 3418 //---------------------------new_instance-------------------------------------- 3419 // This routine takes a klass_node which may be constant (for a static type) 3420 // or may be non-constant (for reflective code). It will work equally well 3421 // for either, and the graph will fold nicely if the optimizer later reduces 3422 // the type to a constant. 3423 // The optional arguments are for specialized use by intrinsics: 3424 // - If 'extra_slow_test' if not null is an extra condition for the slow-path. 3425 // - If 'return_size_val', report the the total object size to the caller. 3426 // - deoptimize_on_exception controls how Java exceptions are handled (rethrow vs deoptimize) 3427 Node* GraphKit::new_instance(Node* klass_node, 3428 Node* extra_slow_test, 3429 Node* *return_size_val, 3430 bool deoptimize_on_exception) { 3431 // Compute size in doublewords 3432 // The size is always an integral number of doublewords, represented 3433 // as a positive bytewise size stored in the klass's layout_helper. 3434 // The layout_helper also encodes (in a low bit) the need for a slow path. 3435 jint layout_con = Klass::_lh_neutral_value; 3436 Node* layout_val = get_layout_helper(klass_node, layout_con); 3437 int layout_is_con = (layout_val == NULL); 3438 3439 if (extra_slow_test == NULL) extra_slow_test = intcon(0); 3440 // Generate the initial go-slow test. It's either ALWAYS (return a 3441 // Node for 1) or NEVER (return a NULL) or perhaps (in the reflective 3442 // case) a computed value derived from the layout_helper. 3443 Node* initial_slow_test = NULL; 3444 if (layout_is_con) { 3445 assert(!StressReflectiveCode, "stress mode does not use these paths"); 3446 bool must_go_slow = Klass::layout_helper_needs_slow_path(layout_con); 3447 initial_slow_test = must_go_slow ? intcon(1) : extra_slow_test; 3448 } else { // reflective case 3449 // This reflective path is used by Unsafe.allocateInstance. 3450 // (It may be stress-tested by specifying StressReflectiveCode.) 3451 // Basically, we want to get into the VM is there's an illegal argument. 3452 Node* bit = intcon(Klass::_lh_instance_slow_path_bit); 3453 initial_slow_test = _gvn.transform( new AndINode(layout_val, bit) ); 3454 if (extra_slow_test != intcon(0)) { 3455 initial_slow_test = _gvn.transform( new OrINode(initial_slow_test, extra_slow_test) ); 3456 } 3457 // (Macro-expander will further convert this to a Bool, if necessary.) 3458 } 3459 3460 // Find the size in bytes. This is easy; it's the layout_helper. 3461 // The size value must be valid even if the slow path is taken. 3462 Node* size = NULL; 3463 if (layout_is_con) { 3464 size = MakeConX(Klass::layout_helper_size_in_bytes(layout_con)); 3465 } else { // reflective case 3466 // This reflective path is used by clone and Unsafe.allocateInstance. 3467 size = ConvI2X(layout_val); 3468 3469 // Clear the low bits to extract layout_helper_size_in_bytes: 3470 assert((int)Klass::_lh_instance_slow_path_bit < BytesPerLong, "clear bit"); 3471 Node* mask = MakeConX(~ (intptr_t)right_n_bits(LogBytesPerLong)); 3472 size = _gvn.transform( new AndXNode(size, mask) ); 3473 } 3474 if (return_size_val != NULL) { 3475 (*return_size_val) = size; 3476 } 3477 3478 // This is a precise notnull oop of the klass. 3479 // (Actually, it need not be precise if this is a reflective allocation.) 3480 // It's what we cast the result to. 3481 const TypeKlassPtr* tklass = _gvn.type(klass_node)->isa_klassptr(); 3482 if (!tklass) tklass = TypeKlassPtr::OBJECT; 3483 const TypeOopPtr* oop_type = tklass->as_instance_type(); 3484 3485 // Now generate allocation code 3486 3487 // The entire memory state is needed for slow path of the allocation 3488 // since GC and deoptimization can happened. 3489 Node *mem = reset_memory(); 3490 set_all_memory(mem); // Create new memory state 3491 3492 AllocateNode* alloc = new AllocateNode(C, AllocateNode::alloc_type(Type::TOP), 3493 control(), mem, i_o(), 3494 size, klass_node, 3495 initial_slow_test); 3496 3497 return set_output_for_allocation(alloc, oop_type, deoptimize_on_exception); 3498 } 3499 3500 //-------------------------------new_array------------------------------------- 3501 // helper for both newarray and anewarray 3502 // The 'length' parameter is (obviously) the length of the array. 3503 // See comments on new_instance for the meaning of the other arguments. 3504 Node* GraphKit::new_array(Node* klass_node, // array klass (maybe variable) 3505 Node* length, // number of array elements 3506 int nargs, // number of arguments to push back for uncommon trap 3507 Node* *return_size_val, 3508 bool deoptimize_on_exception) { 3509 jint layout_con = Klass::_lh_neutral_value; 3510 Node* layout_val = get_layout_helper(klass_node, layout_con); 3511 int layout_is_con = (layout_val == NULL); 3512 3513 if (!layout_is_con && !StressReflectiveCode && 3514 !too_many_traps(Deoptimization::Reason_class_check)) { 3515 // This is a reflective array creation site. 3516 // Optimistically assume that it is a subtype of Object[], 3517 // so that we can fold up all the address arithmetic. 3518 layout_con = Klass::array_layout_helper(T_OBJECT); 3519 Node* cmp_lh = _gvn.transform( new CmpINode(layout_val, intcon(layout_con)) ); 3520 Node* bol_lh = _gvn.transform( new BoolNode(cmp_lh, BoolTest::eq) ); 3521 { BuildCutout unless(this, bol_lh, PROB_MAX); 3522 inc_sp(nargs); 3523 uncommon_trap(Deoptimization::Reason_class_check, 3524 Deoptimization::Action_maybe_recompile); 3525 } 3526 layout_val = NULL; 3527 layout_is_con = true; 3528 } 3529 3530 // Generate the initial go-slow test. Make sure we do not overflow 3531 // if length is huge (near 2Gig) or negative! We do not need 3532 // exact double-words here, just a close approximation of needed 3533 // double-words. We can't add any offset or rounding bits, lest we 3534 // take a size -1 of bytes and make it positive. Use an unsigned 3535 // compare, so negative sizes look hugely positive. 3536 int fast_size_limit = FastAllocateSizeLimit; 3537 if (layout_is_con) { 3538 assert(!StressReflectiveCode, "stress mode does not use these paths"); 3539 // Increase the size limit if we have exact knowledge of array type. 3540 int log2_esize = Klass::layout_helper_log2_element_size(layout_con); 3541 fast_size_limit <<= (LogBytesPerLong - log2_esize); 3542 } 3543 3544 Node* initial_slow_cmp = _gvn.transform( new CmpUNode( length, intcon( fast_size_limit ) ) ); 3545 Node* initial_slow_test = _gvn.transform( new BoolNode( initial_slow_cmp, BoolTest::gt ) ); 3546 3547 // --- Size Computation --- 3548 // array_size = round_to_heap(array_header + (length << elem_shift)); 3549 // where round_to_heap(x) == round_to(x, MinObjAlignmentInBytes) 3550 // and round_to(x, y) == ((x + y-1) & ~(y-1)) 3551 // The rounding mask is strength-reduced, if possible. 3552 int round_mask = MinObjAlignmentInBytes - 1; 3553 Node* header_size = NULL; 3554 int header_size_min = arrayOopDesc::base_offset_in_bytes(T_BYTE); 3555 // (T_BYTE has the weakest alignment and size restrictions...) 3556 if (layout_is_con) { 3557 int hsize = Klass::layout_helper_header_size(layout_con); 3558 int eshift = Klass::layout_helper_log2_element_size(layout_con); 3559 BasicType etype = Klass::layout_helper_element_type(layout_con); 3560 if ((round_mask & ~right_n_bits(eshift)) == 0) 3561 round_mask = 0; // strength-reduce it if it goes away completely 3562 assert((hsize & right_n_bits(eshift)) == 0, "hsize is pre-rounded"); 3563 assert(header_size_min <= hsize, "generic minimum is smallest"); 3564 header_size_min = hsize; 3565 header_size = intcon(hsize + round_mask); 3566 } else { 3567 Node* hss = intcon(Klass::_lh_header_size_shift); 3568 Node* hsm = intcon(Klass::_lh_header_size_mask); 3569 Node* hsize = _gvn.transform( new URShiftINode(layout_val, hss) ); 3570 hsize = _gvn.transform( new AndINode(hsize, hsm) ); 3571 Node* mask = intcon(round_mask); 3572 header_size = _gvn.transform( new AddINode(hsize, mask) ); 3573 } 3574 3575 Node* elem_shift = NULL; 3576 if (layout_is_con) { 3577 int eshift = Klass::layout_helper_log2_element_size(layout_con); 3578 if (eshift != 0) 3579 elem_shift = intcon(eshift); 3580 } else { 3581 // There is no need to mask or shift this value. 3582 // The semantics of LShiftINode include an implicit mask to 0x1F. 3583 assert(Klass::_lh_log2_element_size_shift == 0, "use shift in place"); 3584 elem_shift = layout_val; 3585 } 3586 3587 // Transition to native address size for all offset calculations: 3588 Node* lengthx = ConvI2X(length); 3589 Node* headerx = ConvI2X(header_size); 3590 #ifdef _LP64 3591 { const TypeInt* tilen = _gvn.find_int_type(length); 3592 if (tilen != NULL && tilen->_lo < 0) { 3593 // Add a manual constraint to a positive range. Cf. array_element_address. 3594 jint size_max = fast_size_limit; 3595 if (size_max > tilen->_hi) size_max = tilen->_hi; 3596 const TypeInt* tlcon = TypeInt::make(0, size_max, Type::WidenMin); 3597 3598 // Only do a narrow I2L conversion if the range check passed. 3599 IfNode* iff = new IfNode(control(), initial_slow_test, PROB_MIN, COUNT_UNKNOWN); 3600 _gvn.transform(iff); 3601 RegionNode* region = new RegionNode(3); 3602 _gvn.set_type(region, Type::CONTROL); 3603 lengthx = new PhiNode(region, TypeLong::LONG); 3604 _gvn.set_type(lengthx, TypeLong::LONG); 3605 3606 // Range check passed. Use ConvI2L node with narrow type. 3607 Node* passed = IfFalse(iff); 3608 region->init_req(1, passed); 3609 // Make I2L conversion control dependent to prevent it from 3610 // floating above the range check during loop optimizations. 3611 lengthx->init_req(1, C->constrained_convI2L(&_gvn, length, tlcon, passed)); 3612 3613 // Range check failed. Use ConvI2L with wide type because length may be invalid. 3614 region->init_req(2, IfTrue(iff)); 3615 lengthx->init_req(2, ConvI2X(length)); 3616 3617 set_control(region); 3618 record_for_igvn(region); 3619 record_for_igvn(lengthx); 3620 } 3621 } 3622 #endif 3623 3624 // Combine header size (plus rounding) and body size. Then round down. 3625 // This computation cannot overflow, because it is used only in two 3626 // places, one where the length is sharply limited, and the other 3627 // after a successful allocation. 3628 Node* abody = lengthx; 3629 if (elem_shift != NULL) 3630 abody = _gvn.transform( new LShiftXNode(lengthx, elem_shift) ); 3631 Node* size = _gvn.transform( new AddXNode(headerx, abody) ); 3632 if (round_mask != 0) { 3633 Node* mask = MakeConX(~round_mask); 3634 size = _gvn.transform( new AndXNode(size, mask) ); 3635 } 3636 // else if round_mask == 0, the size computation is self-rounding 3637 3638 if (return_size_val != NULL) { 3639 // This is the size 3640 (*return_size_val) = size; 3641 } 3642 3643 // Now generate allocation code 3644 3645 // The entire memory state is needed for slow path of the allocation 3646 // since GC and deoptimization can happened. 3647 Node *mem = reset_memory(); 3648 set_all_memory(mem); // Create new memory state 3649 3650 if (initial_slow_test->is_Bool()) { 3651 // Hide it behind a CMoveI, or else PhaseIdealLoop::split_up will get sick. 3652 initial_slow_test = initial_slow_test->as_Bool()->as_int_value(&_gvn); 3653 } 3654 3655 // Create the AllocateArrayNode and its result projections 3656 AllocateArrayNode* alloc 3657 = new AllocateArrayNode(C, AllocateArrayNode::alloc_type(TypeInt::INT), 3658 control(), mem, i_o(), 3659 size, klass_node, 3660 initial_slow_test, 3661 length); 3662 3663 // Cast to correct type. Note that the klass_node may be constant or not, 3664 // and in the latter case the actual array type will be inexact also. 3665 // (This happens via a non-constant argument to inline_native_newArray.) 3666 // In any case, the value of klass_node provides the desired array type. 3667 const TypeInt* length_type = _gvn.find_int_type(length); 3668 const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type(); 3669 if (ary_type->isa_aryptr() && length_type != NULL) { 3670 // Try to get a better type than POS for the size 3671 ary_type = ary_type->is_aryptr()->cast_to_size(length_type); 3672 } 3673 3674 Node* javaoop = set_output_for_allocation(alloc, ary_type, deoptimize_on_exception); 3675 3676 // Cast length on remaining path to be as narrow as possible 3677 if (map()->find_edge(length) >= 0) { 3678 Node* ccast = alloc->make_ideal_length(ary_type, &_gvn); 3679 if (ccast != length) { 3680 _gvn.set_type_bottom(ccast); 3681 record_for_igvn(ccast); 3682 replace_in_map(length, ccast); 3683 } 3684 } 3685 3686 return javaoop; 3687 } 3688 3689 // The following "Ideal_foo" functions are placed here because they recognize 3690 // the graph shapes created by the functions immediately above. 3691 3692 //---------------------------Ideal_allocation---------------------------------- 3693 // Given an oop pointer or raw pointer, see if it feeds from an AllocateNode. 3694 AllocateNode* AllocateNode::Ideal_allocation(Node* ptr, PhaseTransform* phase) { 3695 if (ptr == NULL) { // reduce dumb test in callers 3696 return NULL; 3697 } 3698 3699 // Attempt to see through Shenandoah barriers. 3700 ptr = ShenandoahBarrierNode::skip_through_barrier(ptr); 3701 3702 if (ptr->is_CheckCastPP()) { // strip only one raw-to-oop cast 3703 ptr = ptr->in(1); 3704 if (ptr == NULL) return NULL; 3705 } 3706 // Return NULL for allocations with several casts: 3707 // j.l.reflect.Array.newInstance(jobject, jint) 3708 // Object.clone() 3709 // to keep more precise type from last cast. 3710 if (ptr->is_Proj()) { 3711 Node* allo = ptr->in(0); 3712 if (allo != NULL && allo->is_Allocate()) { 3713 return allo->as_Allocate(); 3714 } 3715 } 3716 // Report failure to match. 3717 return NULL; 3718 } 3719 3720 // Fancy version which also strips off an offset (and reports it to caller). 3721 AllocateNode* AllocateNode::Ideal_allocation(Node* ptr, PhaseTransform* phase, 3722 intptr_t& offset) { 3723 Node* base = AddPNode::Ideal_base_and_offset(ptr, phase, offset); 3724 if (base == NULL) return NULL; 3725 return Ideal_allocation(base, phase); 3726 } 3727 3728 // Trace Initialize <- Proj[Parm] <- Allocate 3729 AllocateNode* InitializeNode::allocation() { 3730 Node* rawoop = in(InitializeNode::RawAddress); 3731 if (rawoop->is_Proj()) { 3732 Node* alloc = rawoop->in(0); 3733 if (alloc->is_Allocate()) { 3734 return alloc->as_Allocate(); 3735 } 3736 } 3737 return NULL; 3738 } 3739 3740 // Trace Allocate -> Proj[Parm] -> Initialize 3741 InitializeNode* AllocateNode::initialization() { 3742 ProjNode* rawoop = proj_out(AllocateNode::RawAddress); 3743 if (rawoop == NULL) return NULL; 3744 for (DUIterator_Fast imax, i = rawoop->fast_outs(imax); i < imax; i++) { 3745 Node* init = rawoop->fast_out(i); 3746 if (init->is_Initialize()) { 3747 assert(init->as_Initialize()->allocation() == this, "2-way link"); 3748 return init->as_Initialize(); 3749 } 3750 } 3751 return NULL; 3752 } 3753 3754 //----------------------------- loop predicates --------------------------- 3755 3756 //------------------------------add_predicate_impl---------------------------- 3757 void GraphKit::add_predicate_impl(Deoptimization::DeoptReason reason, int nargs) { 3758 // Too many traps seen? 3759 if (too_many_traps(reason)) { 3760 #ifdef ASSERT 3761 if (TraceLoopPredicate) { 3762 int tc = C->trap_count(reason); 3763 tty->print("too many traps=%s tcount=%d in ", 3764 Deoptimization::trap_reason_name(reason), tc); 3765 method()->print(); // which method has too many predicate traps 3766 tty->cr(); 3767 } 3768 #endif 3769 // We cannot afford to take more traps here, 3770 // do not generate predicate. 3771 return; 3772 } 3773 3774 Node *cont = _gvn.intcon(1); 3775 Node* opq = _gvn.transform(new Opaque1Node(C, cont)); 3776 Node *bol = _gvn.transform(new Conv2BNode(opq)); 3777 IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN); 3778 Node* iffalse = _gvn.transform(new IfFalseNode(iff)); 3779 C->add_predicate_opaq(opq); 3780 { 3781 PreserveJVMState pjvms(this); 3782 set_control(iffalse); 3783 inc_sp(nargs); 3784 uncommon_trap(reason, Deoptimization::Action_maybe_recompile); 3785 } 3786 Node* iftrue = _gvn.transform(new IfTrueNode(iff)); 3787 set_control(iftrue); 3788 } 3789 3790 //------------------------------add_predicate--------------------------------- 3791 void GraphKit::add_predicate(int nargs) { 3792 if (UseLoopPredicate) { 3793 add_predicate_impl(Deoptimization::Reason_predicate, nargs); 3794 } 3795 // loop's limit check predicate should be near the loop. 3796 add_predicate_impl(Deoptimization::Reason_loop_limit_check, nargs); 3797 } 3798 3799 //----------------------------- store barriers ---------------------------- 3800 #define __ ideal. 3801 3802 void GraphKit::sync_kit(IdealKit& ideal) { 3803 set_all_memory(__ merged_memory()); 3804 set_i_o(__ i_o()); 3805 set_control(__ ctrl()); 3806 } 3807 3808 void GraphKit::final_sync(IdealKit& ideal) { 3809 // Final sync IdealKit and graphKit. 3810 sync_kit(ideal); 3811 } 3812 3813 Node* GraphKit::byte_map_base_node() { 3814 // Get base of card map 3815 CardTableModRefBS* ct = 3816 barrier_set_cast<CardTableModRefBS>(Universe::heap()->barrier_set()); 3817 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust users of this code"); 3818 if (ct->byte_map_base != NULL) { 3819 return makecon(TypeRawPtr::make((address)ct->byte_map_base)); 3820 } else { 3821 return null(); 3822 } 3823 } 3824 3825 // vanilla/CMS post barrier 3826 // Insert a write-barrier store. This is to let generational GC work; we have 3827 // to flag all oop-stores before the next GC point. 3828 void GraphKit::write_barrier_post(Node* oop_store, 3829 Node* obj, 3830 Node* adr, 3831 uint adr_idx, 3832 Node* val, 3833 bool use_precise) { 3834 // No store check needed if we're storing a NULL or an old object 3835 // (latter case is probably a string constant). The concurrent 3836 // mark sweep garbage collector, however, needs to have all nonNull 3837 // oop updates flagged via card-marks. 3838 if (val != NULL && val->is_Con()) { 3839 // must be either an oop or NULL 3840 const Type* t = val->bottom_type(); 3841 if (t == TypePtr::NULL_PTR || t == Type::TOP) 3842 // stores of null never (?) need barriers 3843 return; 3844 } 3845 3846 if (use_ReduceInitialCardMarks() 3847 && obj == just_allocated_object(control())) { 3848 // We can skip marks on a freshly-allocated object in Eden. 3849 // Keep this code in sync with new_store_pre_barrier() in runtime.cpp. 3850 // That routine informs GC to take appropriate compensating steps, 3851 // upon a slow-path allocation, so as to make this card-mark 3852 // elision safe. 3853 return; 3854 } 3855 3856 if (!use_precise) { 3857 // All card marks for a (non-array) instance are in one place: 3858 adr = obj; 3859 } 3860 // (Else it's an array (or unknown), and we want more precise card marks.) 3861 assert(adr != NULL, ""); 3862 3863 IdealKit ideal(this, true); 3864 3865 // Convert the pointer to an int prior to doing math on it 3866 Node* cast = __ CastPX(__ ctrl(), adr); 3867 3868 // Divide by card size 3869 assert(Universe::heap()->barrier_set()->is_a(BarrierSet::CardTableModRef), 3870 "Only one we handle so far."); 3871 Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) ); 3872 3873 // Combine card table base and card offset 3874 Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset ); 3875 3876 // Get the alias_index for raw card-mark memory 3877 int adr_type = Compile::AliasIdxRaw; 3878 Node* zero = __ ConI(0); // Dirty card value 3879 BasicType bt = T_BYTE; 3880 3881 if (UseConcMarkSweepGC && UseCondCardMark) { 3882 insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier 3883 __ sync_kit(this); 3884 } 3885 3886 if (UseCondCardMark) { 3887 // The classic GC reference write barrier is typically implemented 3888 // as a store into the global card mark table. Unfortunately 3889 // unconditional stores can result in false sharing and excessive 3890 // coherence traffic as well as false transactional aborts. 3891 // UseCondCardMark enables MP "polite" conditional card mark 3892 // stores. In theory we could relax the load from ctrl() to 3893 // no_ctrl, but that doesn't buy much latitude. 3894 Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, bt, adr_type); 3895 __ if_then(card_val, BoolTest::ne, zero); 3896 } 3897 3898 // Smash zero into card 3899 if( !UseConcMarkSweepGC ) { 3900 __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::unordered); 3901 } else { 3902 // Specialized path for CM store barrier 3903 __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type); 3904 } 3905 3906 if (UseCondCardMark) { 3907 __ end_if(); 3908 } 3909 3910 // Final sync IdealKit and GraphKit. 3911 final_sync(ideal); 3912 } 3913 /* 3914 * Determine if the G1 pre-barrier can be removed. The pre-barrier is 3915 * required by SATB to make sure all objects live at the start of the 3916 * marking are kept alive, all reference updates need to any previous 3917 * reference stored before writing. 3918 * 3919 * If the previous value is NULL there is no need to save the old value. 3920 * References that are NULL are filtered during runtime by the barrier 3921 * code to avoid unnecessary queuing. 3922 * 3923 * However in the case of newly allocated objects it might be possible to 3924 * prove that the reference about to be overwritten is NULL during compile 3925 * time and avoid adding the barrier code completely. 3926 * 3927 * The compiler needs to determine that the object in which a field is about 3928 * to be written is newly allocated, and that no prior store to the same field 3929 * has happened since the allocation. 3930 * 3931 * Returns true if the pre-barrier can be removed 3932 */ 3933 bool GraphKit::g1_can_remove_pre_barrier(PhaseTransform* phase, Node* adr, 3934 BasicType bt, uint adr_idx) { 3935 intptr_t offset = 0; 3936 Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 3937 AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase); 3938 3939 if (offset == Type::OffsetBot) { 3940 return false; // cannot unalias unless there are precise offsets 3941 } 3942 3943 if (alloc == NULL) { 3944 return false; // No allocation found 3945 } 3946 3947 intptr_t size_in_bytes = type2aelembytes(bt); 3948 3949 Node* mem = memory(adr_idx); // start searching here... 3950 3951 for (int cnt = 0; cnt < 50; cnt++) { 3952 3953 if (mem->is_Store()) { 3954 3955 Node* st_adr = mem->in(MemNode::Address); 3956 intptr_t st_offset = 0; 3957 Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset); 3958 3959 if (st_base == NULL) { 3960 break; // inscrutable pointer 3961 } 3962 3963 // Break we have found a store with same base and offset as ours so break 3964 if (st_base == base && st_offset == offset) { 3965 break; 3966 } 3967 3968 if (st_offset != offset && st_offset != Type::OffsetBot) { 3969 const int MAX_STORE = BytesPerLong; 3970 if (st_offset >= offset + size_in_bytes || 3971 st_offset <= offset - MAX_STORE || 3972 st_offset <= offset - mem->as_Store()->memory_size()) { 3973 // Success: The offsets are provably independent. 3974 // (You may ask, why not just test st_offset != offset and be done? 3975 // The answer is that stores of different sizes can co-exist 3976 // in the same sequence of RawMem effects. We sometimes initialize 3977 // a whole 'tile' of array elements with a single jint or jlong.) 3978 mem = mem->in(MemNode::Memory); 3979 continue; // advance through independent store memory 3980 } 3981 } 3982 3983 if (st_base != base 3984 && MemNode::detect_ptr_independence(base, alloc, st_base, 3985 AllocateNode::Ideal_allocation(st_base, phase), 3986 phase)) { 3987 // Success: The bases are provably independent. 3988 mem = mem->in(MemNode::Memory); 3989 continue; // advance through independent store memory 3990 } 3991 } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) { 3992 3993 InitializeNode* st_init = mem->in(0)->as_Initialize(); 3994 AllocateNode* st_alloc = st_init->allocation(); 3995 3996 // Make sure that we are looking at the same allocation site. 3997 // The alloc variable is guaranteed to not be null here from earlier check. 3998 if (alloc == st_alloc) { 3999 // Check that the initialization is storing NULL so that no previous store 4000 // has been moved up and directly write a reference 4001 Node* captured_store = st_init->find_captured_store(offset, 4002 type2aelembytes(T_OBJECT), 4003 phase); 4004 if (captured_store == NULL || captured_store == st_init->zero_memory()) { 4005 return true; 4006 } 4007 } 4008 } 4009 4010 // Unless there is an explicit 'continue', we must bail out here, 4011 // because 'mem' is an inscrutable memory state (e.g., a call). 4012 break; 4013 } 4014 4015 return false; 4016 } 4017 4018 static void g1_write_barrier_pre_helper(const GraphKit& kit, Node* adr) { 4019 if (UseShenandoahGC && ShenandoahWriteBarrier && adr != NULL) { 4020 Node* c = kit.control(); 4021 Node* call = c->in(1)->in(1)->in(1)->in(0); 4022 assert(call->is_g1_wb_pre_call(), "g1_wb_pre call expected"); 4023 call->add_req(adr); 4024 } 4025 } 4026 4027 // G1 pre/post barriers 4028 void GraphKit::g1_write_barrier_pre(bool do_load, 4029 Node* obj, 4030 Node* adr, 4031 uint alias_idx, 4032 Node* val, 4033 const TypeOopPtr* val_type, 4034 Node* pre_val, 4035 BasicType bt) { 4036 4037 // Some sanity checks 4038 // Note: val is unused in this routine. 4039 4040 if (do_load) { 4041 // We need to generate the load of the previous value 4042 assert(obj != NULL, "must have a base"); 4043 assert(adr != NULL, "where are loading from?"); 4044 assert(pre_val == NULL, "loaded already?"); 4045 assert(val_type != NULL, "need a type"); 4046 4047 if (use_ReduceInitialCardMarks() 4048 && g1_can_remove_pre_barrier(&_gvn, adr, bt, alias_idx)) { 4049 return; 4050 } 4051 4052 } else { 4053 // In this case both val_type and alias_idx are unused. 4054 assert(pre_val != NULL, "must be loaded already"); 4055 // Nothing to be done if pre_val is null. 4056 if (pre_val->bottom_type() == TypePtr::NULL_PTR) return; 4057 assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here"); 4058 } 4059 assert(bt == T_OBJECT, "or we shouldn't be here"); 4060 4061 IdealKit ideal(this, true); 4062 4063 Node* tls = __ thread(); // ThreadLocalStorage 4064 4065 Node* no_ctrl = NULL; 4066 Node* no_base = __ top(); 4067 Node* zero = __ ConI(0); 4068 Node* zeroX = __ ConX(0); 4069 4070 float likely = PROB_LIKELY(0.999); 4071 float unlikely = PROB_UNLIKELY(0.999); 4072 4073 BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE; 4074 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width"); 4075 4076 // Offsets into the thread 4077 const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 648 4078 SATBMarkQueue::byte_offset_of_active()); 4079 const int index_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 656 4080 SATBMarkQueue::byte_offset_of_index()); 4081 const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 652 4082 SATBMarkQueue::byte_offset_of_buf()); 4083 4084 // Now the actual pointers into the thread 4085 Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset)); 4086 Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); 4087 Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); 4088 4089 // Now some of the values 4090 Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); 4091 4092 // if (!marking) 4093 __ if_then(marking, BoolTest::ne, zero, unlikely); { 4094 BasicType index_bt = TypeX_X->basic_type(); 4095 assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size."); 4096 Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw); 4097 4098 if (do_load) { 4099 // load original value 4100 // alias_idx correct?? 4101 pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx); 4102 } 4103 4104 // if (pre_val != NULL) 4105 __ if_then(pre_val, BoolTest::ne, null()); { 4106 Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); 4107 4108 // is the queue for this thread full? 4109 __ if_then(index, BoolTest::ne, zeroX, likely); { 4110 4111 // decrement the index 4112 Node* next_index = _gvn.transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); 4113 4114 // Now get the buffer location we will log the previous value into and store it 4115 Node *log_addr = __ AddP(no_base, buffer, next_index); 4116 __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered); 4117 // update the index 4118 __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered); 4119 4120 } __ else_(); { 4121 4122 // logging buffer is full, call the runtime 4123 const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type(); 4124 __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls); 4125 } __ end_if(); // (!index) 4126 } __ end_if(); // (pre_val != NULL) 4127 } __ end_if(); // (!marking) 4128 4129 // Final sync IdealKit and GraphKit. 4130 final_sync(ideal); 4131 g1_write_barrier_pre_helper(*this, adr); 4132 } 4133 4134 Node* GraphKit::shenandoah_write_barrier_pre(bool do_load, 4135 Node* obj, 4136 Node* adr, 4137 uint alias_idx, 4138 Node* val, 4139 const TypeOopPtr* val_type, 4140 Node* pre_val, 4141 BasicType bt) { 4142 4143 // Some sanity checks 4144 // Note: val is unused in this routine. 4145 4146 if (val == NULL) { 4147 g1_write_barrier_pre(do_load, obj, adr, alias_idx, val, val_type, pre_val, bt); 4148 return NULL; 4149 } 4150 4151 if (! ShenandoahReduceStoreValBarrier) { 4152 val = shenandoah_read_barrier_storeval(val); 4153 shenandoah_update_matrix(adr, val); 4154 g1_write_barrier_pre(do_load, obj, adr, alias_idx, val, val_type, pre_val, bt); 4155 return val; 4156 } 4157 4158 if (do_load) { 4159 // We need to generate the load of the previous value 4160 assert(obj != NULL, "must have a base"); 4161 assert(adr != NULL, "where are loading from?"); 4162 assert(pre_val == NULL, "loaded already?"); 4163 assert(val_type != NULL, "need a type"); 4164 4165 if (use_ReduceInitialCardMarks() 4166 && g1_can_remove_pre_barrier(&_gvn, adr, bt, alias_idx)) { 4167 return shenandoah_read_barrier_storeval(val); 4168 } 4169 4170 } else { 4171 // In this case both val_type and alias_idx are unused. 4172 assert(pre_val != NULL, "must be loaded already"); 4173 // Nothing to be done if pre_val is null. 4174 if (pre_val->bottom_type() == TypePtr::NULL_PTR) return val; 4175 assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here"); 4176 } 4177 assert(bt == T_OBJECT, "or we shouldn't be here"); 4178 4179 IdealKit ideal(this, true, true); 4180 IdealVariable ival(ideal); 4181 __ declarations_done(); 4182 __ set(ival, val); 4183 Node* tls = __ thread(); // ThreadLocalStorage 4184 4185 Node* no_ctrl = NULL; 4186 Node* no_base = __ top(); 4187 Node* zero = __ ConI(0); 4188 Node* zeroX = __ ConX(0); 4189 4190 float likely = PROB_LIKELY(0.999); 4191 float unlikely = PROB_UNLIKELY(0.999); 4192 4193 BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE; 4194 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width"); 4195 4196 // Offsets into the thread 4197 const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 648 4198 SATBMarkQueue::byte_offset_of_active()); 4199 const int index_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 656 4200 SATBMarkQueue::byte_offset_of_index()); 4201 const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 652 4202 SATBMarkQueue::byte_offset_of_buf()); 4203 4204 // Now the actual pointers into the thread 4205 Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset)); 4206 Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); 4207 Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); 4208 4209 // Now some of the values 4210 Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); 4211 4212 // if (!marking) 4213 __ if_then(marking, BoolTest::ne, zero, unlikely); { 4214 4215 Node* storeval = ideal.value(ival); 4216 sync_kit(ideal); 4217 storeval = shenandoah_read_barrier_storeval(storeval); 4218 __ sync_kit(this); 4219 __ set(ival, storeval); 4220 4221 BasicType index_bt = TypeX_X->basic_type(); 4222 assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size."); 4223 Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw); 4224 4225 if (do_load) { 4226 // load original value 4227 // alias_idx correct?? 4228 pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx); 4229 } 4230 4231 // if (pre_val != NULL) 4232 __ if_then(pre_val, BoolTest::ne, null()); { 4233 Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); 4234 4235 // is the queue for this thread full? 4236 __ if_then(index, BoolTest::ne, zeroX, likely); { 4237 4238 // decrement the index 4239 Node* next_index = _gvn.transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); 4240 4241 // Now get the buffer location we will log the previous value into and store it 4242 Node *log_addr = __ AddP(no_base, buffer, next_index); 4243 __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered); 4244 // update the index 4245 __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered); 4246 4247 } __ else_(); { 4248 4249 // logging buffer is full, call the runtime 4250 const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type(); 4251 __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls); 4252 } __ end_if(); // (!index) 4253 } __ end_if(); // (pre_val != NULL) 4254 } __ end_if(); // (!marking) 4255 Node* new_val = __ value(ival); 4256 // IdealKit generates a Phi with very conservative type, and even 4257 // turns array types into TypeInstPtr (see type.cpp, _const_basic_type[T_ARRAY]). 4258 // We're forcing the result to be the original type. 4259 if (new_val != val) { 4260 const Type* t = _gvn.type(val); 4261 if (new_val->isa_Type()) { 4262 new_val->as_Type()->set_type(t); 4263 } 4264 _gvn.set_type(new_val, t); 4265 } 4266 val = new_val; 4267 __ dead(ival); 4268 // Final sync IdealKit and GraphKit. 4269 final_sync(ideal); 4270 g1_write_barrier_pre_helper(*this, adr); 4271 return val; 4272 } 4273 4274 void GraphKit::shenandoah_update_matrix(Node* adr, Node* val) { 4275 if (!UseShenandoahMatrix) { 4276 return; 4277 } 4278 4279 assert(val != NULL, "checked before"); 4280 if (adr == NULL) { 4281 return; // Nothing to do 4282 } 4283 assert(adr != NULL, "must not happen"); 4284 if (val->bottom_type()->higher_equal(TypePtr::NULL_PTR)) { 4285 // Nothing to do. 4286 return; 4287 } 4288 4289 ShenandoahConnectionMatrix* matrix = ShenandoahHeap::heap()->connection_matrix(); 4290 4291 enum { _set_path = 1, _already_set_path, _val_null_path, PATH_LIMIT }; 4292 RegionNode* region = new RegionNode(PATH_LIMIT); 4293 Node* prev_mem = memory(Compile::AliasIdxRaw); 4294 Node* memphi = PhiNode::make(region, prev_mem, Type::MEMORY, TypeRawPtr::BOTTOM); 4295 Node* null_ctrl = top(); 4296 Node* not_null_val = null_check_oop(val, &null_ctrl); 4297 4298 // Null path: nothing to do. 4299 region->init_req(_val_null_path, null_ctrl); 4300 memphi->init_req(_val_null_path, prev_mem); 4301 4302 // Not null path. Update the matrix. 4303 4304 /* 4305 Compute matrix index. Normally, we would update the matrix element at: 4306 4307 MATRIX_BASE + ((addr - HEAP_BASE) >> RS) * STRIDE) + ((new_val - HEAP_BASE) >> RS) 4308 4309 ...where: 4310 MATRIX_BASE is native matrix address 4311 STRIDE is matrix stride 4312 HEAP_BASE is lowest heap address 4313 RS is region size shift 4314 4315 This is what interpreter and C1 are doing. But in C2, we can make it more aggressive 4316 by restructuring the expression like this: 4317 4318 (addr >> RS) * STRIDE + (new_val >> RS) + [MATRIX_BASE - (HEAP_BASE >> RS) * (STRIDE + 1)] 4319 4320 Notice that first two parts can be computed out-of-order, and only then merged with addition, 4321 which helps scheduling. If STRIDE is a power of two, then addr computation can be folded with 4322 region size shift. The third constant can be folded at compile time. 4323 4324 As long as STRIDE is less than 2^RS, we never overflow. As long as HEAP_BASE is aligned to 4325 region size, we are safe with doing RS shifts. Guarantee both: 4326 */ 4327 HeapWord* heap_base = ShenandoahHeap::heap()->first_region_bottom(); 4328 intx stride = matrix->stride(); 4329 jint rs = ShenandoahHeapRegion::RegionSizeShift; 4330 4331 guarantee(stride < (intx)ShenandoahHeapRegion::RegionSizeBytes, "sanity"); 4332 guarantee(is_ptr_aligned(heap_base, ShenandoahHeapRegion::RegionSizeBytes), "sanity"); 4333 4334 Node* magic_con = MakeConX((jlong) matrix->matrix_addr() - ((jlong) heap_base >> rs) * (stride + 1)); 4335 4336 // Compute addr part 4337 Node* adr_idx = _gvn.transform(new CastP2XNode(control(), adr)); 4338 if (is_power_of_2(stride)) { 4339 // TODO: Apparently, C2 cannot perform peephole opt like this. 4340 adr_idx = _gvn.transform(new URShiftXNode(adr_idx, intcon(rs - log2_intptr(stride)))); 4341 } else { 4342 adr_idx = _gvn.transform(new URShiftXNode(adr_idx, intcon(rs))); 4343 adr_idx = _gvn.transform(new MulXNode(adr_idx, MakeConX(stride))); 4344 } 4345 4346 // Compute new_val part 4347 Node* val_idx = _gvn.transform(new CastP2XNode(control(), not_null_val)); 4348 val_idx = _gvn.transform(new URShiftXNode(val_idx, intcon(rs))); 4349 4350 // Add everything up 4351 adr_idx = _gvn.transform(new AddXNode(adr_idx, val_idx)); 4352 adr_idx = _gvn.transform(new CastX2PNode(adr_idx)); 4353 Node* matrix_adr = _gvn.transform(new AddPNode(top(), adr_idx, magic_con)); 4354 4355 // Load current value 4356 const TypePtr* adr_type = TypeRawPtr::BOTTOM; 4357 Node* current = _gvn.transform(LoadNode::make(_gvn, control(), memory(Compile::AliasIdxRaw), 4358 matrix_adr, adr_type, TypeInt::INT, T_BYTE, MemNode::unordered)); 4359 4360 // Check if already set 4361 Node* cmp_set = _gvn.transform(new CmpINode(current, intcon(0))); 4362 Node* cmp_set_bool = _gvn.transform(new BoolNode(cmp_set, BoolTest::eq)); 4363 IfNode* cmp_iff = create_and_map_if(control(), cmp_set_bool, PROB_FAIR, COUNT_UNKNOWN); 4364 4365 Node* if_not_set = _gvn.transform(new IfTrueNode(cmp_iff)); 4366 Node* if_set = _gvn.transform(new IfFalseNode(cmp_iff)); 4367 4368 // Already set, exit 4369 set_control(if_set); 4370 region->init_req(_already_set_path, control()); 4371 memphi->init_req(_already_set_path, prev_mem); 4372 4373 // Not set: do the store, and finish up 4374 set_control(if_not_set); 4375 Node* store = _gvn.transform(StoreNode::make(_gvn, control(), memory(Compile::AliasIdxRaw), 4376 matrix_adr, adr_type, intcon(1), T_BYTE, MemNode::unordered)); 4377 region->init_req(_set_path, control()); 4378 memphi->init_req(_set_path, store); 4379 4380 // Merge control flows and memory. 4381 set_control(_gvn.transform(region)); 4382 record_for_igvn(region); 4383 set_memory(_gvn.transform(memphi), Compile::AliasIdxRaw); 4384 } 4385 4386 /* 4387 * G1 similar to any GC with a Young Generation requires a way to keep track of 4388 * references from Old Generation to Young Generation to make sure all live 4389 * objects are found. G1 also requires to keep track of object references 4390 * between different regions to enable evacuation of old regions, which is done 4391 * as part of mixed collections. References are tracked in remembered sets and 4392 * is continuously updated as reference are written to with the help of the 4393 * post-barrier. 4394 * 4395 * To reduce the number of updates to the remembered set the post-barrier 4396 * filters updates to fields in objects located in the Young Generation, 4397 * the same region as the reference, when the NULL is being written or 4398 * if the card is already marked as dirty by an earlier write. 4399 * 4400 * Under certain circumstances it is possible to avoid generating the 4401 * post-barrier completely if it is possible during compile time to prove 4402 * the object is newly allocated and that no safepoint exists between the 4403 * allocation and the store. 4404 * 4405 * In the case of slow allocation the allocation code must handle the barrier 4406 * as part of the allocation in the case the allocated object is not located 4407 * in the nursery, this would happen for humongous objects. This is similar to 4408 * how CMS is required to handle this case, see the comments for the method 4409 * CollectedHeap::new_store_pre_barrier and OptoRuntime::new_store_pre_barrier. 4410 * A deferred card mark is required for these objects and handled in the above 4411 * mentioned methods. 4412 * 4413 * Returns true if the post barrier can be removed 4414 */ 4415 bool GraphKit::g1_can_remove_post_barrier(PhaseTransform* phase, Node* store, 4416 Node* adr) { 4417 intptr_t offset = 0; 4418 Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 4419 AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase); 4420 4421 if (offset == Type::OffsetBot) { 4422 return false; // cannot unalias unless there are precise offsets 4423 } 4424 4425 if (alloc == NULL) { 4426 return false; // No allocation found 4427 } 4428 4429 // Start search from Store node 4430 Node* mem = store->in(MemNode::Control); 4431 if (mem->is_Proj() && mem->in(0)->is_Initialize()) { 4432 4433 InitializeNode* st_init = mem->in(0)->as_Initialize(); 4434 AllocateNode* st_alloc = st_init->allocation(); 4435 4436 // Make sure we are looking at the same allocation 4437 if (alloc == st_alloc) { 4438 return true; 4439 } 4440 } 4441 4442 return false; 4443 } 4444 4445 // 4446 // Update the card table and add card address to the queue 4447 // 4448 void GraphKit::g1_mark_card(IdealKit& ideal, 4449 Node* card_adr, 4450 Node* oop_store, 4451 uint oop_alias_idx, 4452 Node* index, 4453 Node* index_adr, 4454 Node* buffer, 4455 const TypeFunc* tf) { 4456 4457 Node* zero = __ ConI(0); 4458 Node* zeroX = __ ConX(0); 4459 Node* no_base = __ top(); 4460 BasicType card_bt = T_BYTE; 4461 // Smash zero into card. MUST BE ORDERED WRT TO STORE 4462 __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw); 4463 4464 // Now do the queue work 4465 __ if_then(index, BoolTest::ne, zeroX); { 4466 4467 Node* next_index = _gvn.transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); 4468 Node* log_addr = __ AddP(no_base, buffer, next_index); 4469 4470 // Order, see storeCM. 4471 __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered); 4472 __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered); 4473 4474 } __ else_(); { 4475 __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread()); 4476 } __ end_if(); 4477 4478 } 4479 4480 void GraphKit::g1_write_barrier_post(Node* oop_store, 4481 Node* obj, 4482 Node* adr, 4483 uint alias_idx, 4484 Node* val, 4485 BasicType bt, 4486 bool use_precise) { 4487 // If we are writing a NULL then we need no post barrier 4488 4489 if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) { 4490 // Must be NULL 4491 const Type* t = val->bottom_type(); 4492 assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL"); 4493 // No post barrier if writing NULLx 4494 return; 4495 } 4496 4497 if (use_ReduceInitialCardMarks() && obj == just_allocated_object(control())) { 4498 // We can skip marks on a freshly-allocated object in Eden. 4499 // Keep this code in sync with new_store_pre_barrier() in runtime.cpp. 4500 // That routine informs GC to take appropriate compensating steps, 4501 // upon a slow-path allocation, so as to make this card-mark 4502 // elision safe. 4503 return; 4504 } 4505 4506 if (use_ReduceInitialCardMarks() 4507 && g1_can_remove_post_barrier(&_gvn, oop_store, adr)) { 4508 return; 4509 } 4510 4511 if (!use_precise) { 4512 // All card marks for a (non-array) instance are in one place: 4513 adr = obj; 4514 } 4515 // (Else it's an array (or unknown), and we want more precise card marks.) 4516 assert(adr != NULL, ""); 4517 4518 IdealKit ideal(this, true); 4519 4520 Node* tls = __ thread(); // ThreadLocalStorage 4521 4522 Node* no_base = __ top(); 4523 float likely = PROB_LIKELY(0.999); 4524 float unlikely = PROB_UNLIKELY(0.999); 4525 Node* young_card = __ ConI((jint)G1SATBCardTableModRefBS::g1_young_card_val()); 4526 Node* dirty_card = __ ConI((jint)CardTableModRefBS::dirty_card_val()); 4527 Node* zeroX = __ ConX(0); 4528 4529 // Get the alias_index for raw card-mark memory 4530 const TypePtr* card_type = TypeRawPtr::BOTTOM; 4531 4532 const TypeFunc *tf = OptoRuntime::g1_wb_post_Type(); 4533 4534 // Offsets into the thread 4535 const int index_offset = in_bytes(JavaThread::dirty_card_queue_offset() + 4536 DirtyCardQueue::byte_offset_of_index()); 4537 const int buffer_offset = in_bytes(JavaThread::dirty_card_queue_offset() + 4538 DirtyCardQueue::byte_offset_of_buf()); 4539 4540 // Pointers into the thread 4541 4542 Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); 4543 Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); 4544 4545 // Now some values 4546 // Use ctrl to avoid hoisting these values past a safepoint, which could 4547 // potentially reset these fields in the JavaThread. 4548 Node* index = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw); 4549 Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); 4550 4551 // Convert the store obj pointer to an int prior to doing math on it 4552 // Must use ctrl to prevent "integerized oop" existing across safepoint 4553 Node* cast = __ CastPX(__ ctrl(), adr); 4554 4555 // Divide pointer by card size 4556 Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) ); 4557 4558 // Combine card table base and card offset 4559 Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset ); 4560 4561 // If we know the value being stored does it cross regions? 4562 4563 if (val != NULL) { 4564 // Does the store cause us to cross regions? 4565 4566 // Should be able to do an unsigned compare of region_size instead of 4567 // and extra shift. Do we have an unsigned compare?? 4568 // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes); 4569 Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes)); 4570 4571 // if (xor_res == 0) same region so skip 4572 __ if_then(xor_res, BoolTest::ne, zeroX); { 4573 4574 // No barrier if we are storing a NULL 4575 __ if_then(val, BoolTest::ne, null(), unlikely); { 4576 4577 // Ok must mark the card if not already dirty 4578 4579 // load the original value of the card 4580 Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); 4581 4582 __ if_then(card_val, BoolTest::ne, young_card); { 4583 sync_kit(ideal); 4584 // Use Op_MemBarVolatile to achieve the effect of a StoreLoad barrier. 4585 insert_mem_bar(Op_MemBarVolatile, oop_store); 4586 __ sync_kit(this); 4587 4588 Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); 4589 __ if_then(card_val_reload, BoolTest::ne, dirty_card); { 4590 g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); 4591 } __ end_if(); 4592 } __ end_if(); 4593 } __ end_if(); 4594 } __ end_if(); 4595 } else { 4596 // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks. 4597 // We don't need a barrier here if the destination is a newly allocated object 4598 // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden 4599 // are set to 'g1_young_gen' (see G1SATBCardTableModRefBS::verify_g1_young_region()). 4600 assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); 4601 Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); 4602 __ if_then(card_val, BoolTest::ne, young_card); { 4603 g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); 4604 } __ end_if(); 4605 } 4606 4607 // Final sync IdealKit and GraphKit. 4608 final_sync(ideal); 4609 } 4610 #undef __ 4611 4612 4613 Node* GraphKit::load_String_length(Node* ctrl, Node* str) { 4614 Node* len = load_array_length(load_String_value(ctrl, str)); 4615 Node* coder = load_String_coder(ctrl, str); 4616 // Divide length by 2 if coder is UTF16 4617 return _gvn.transform(new RShiftINode(len, coder)); 4618 } 4619 4620 Node* GraphKit::load_String_value(Node* ctrl, Node* str) { 4621 int value_offset = java_lang_String::value_offset_in_bytes(); 4622 const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(), 4623 false, NULL, 0); 4624 const TypePtr* value_field_type = string_type->add_offset(value_offset); 4625 const TypeAryPtr* value_type = TypeAryPtr::make(TypePtr::NotNull, 4626 TypeAry::make(TypeInt::BYTE, TypeInt::POS), 4627 ciTypeArrayKlass::make(T_BYTE), true, 0); 4628 int value_field_idx = C->get_alias_index(value_field_type); 4629 4630 if (! ShenandoahOptimizeFinals) { 4631 str = shenandoah_read_barrier(str); 4632 } 4633 4634 Node* load = make_load(ctrl, basic_plus_adr(str, str, value_offset), 4635 value_type, T_OBJECT, value_field_idx, MemNode::unordered); 4636 // String.value field is known to be @Stable. 4637 if (UseImplicitStableValues) { 4638 load = cast_array_to_stable(load, value_type); 4639 } 4640 return load; 4641 } 4642 4643 Node* GraphKit::load_String_coder(Node* ctrl, Node* str) { 4644 if (!CompactStrings) { 4645 return intcon(java_lang_String::CODER_UTF16); 4646 } 4647 int coder_offset = java_lang_String::coder_offset_in_bytes(); 4648 const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(), 4649 false, NULL, 0); 4650 const TypePtr* coder_field_type = string_type->add_offset(coder_offset); 4651 int coder_field_idx = C->get_alias_index(coder_field_type); 4652 4653 if (! ShenandoahOptimizeFinals) { 4654 str = shenandoah_read_barrier(str); 4655 } 4656 4657 return make_load(ctrl, basic_plus_adr(str, str, coder_offset), 4658 TypeInt::BYTE, T_BYTE, coder_field_idx, MemNode::unordered); 4659 } 4660 4661 void GraphKit::store_String_value(Node* ctrl, Node* str, Node* value) { 4662 int value_offset = java_lang_String::value_offset_in_bytes(); 4663 const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(), 4664 false, NULL, 0); 4665 const TypePtr* value_field_type = string_type->add_offset(value_offset); 4666 4667 str = shenandoah_write_barrier(str); 4668 4669 store_oop_to_object(control(), str, basic_plus_adr(str, value_offset), value_field_type, 4670 value, TypeAryPtr::BYTES, T_OBJECT, MemNode::unordered); 4671 } 4672 4673 void GraphKit::store_String_coder(Node* ctrl, Node* str, Node* value) { 4674 int coder_offset = java_lang_String::coder_offset_in_bytes(); 4675 const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(), 4676 false, NULL, 0); 4677 4678 str = shenandoah_write_barrier(str); 4679 4680 const TypePtr* coder_field_type = string_type->add_offset(coder_offset); 4681 int coder_field_idx = C->get_alias_index(coder_field_type); 4682 store_to_memory(control(), basic_plus_adr(str, coder_offset), 4683 value, T_BYTE, coder_field_idx, MemNode::unordered); 4684 } 4685 4686 // Capture src and dst memory state with a MergeMemNode 4687 Node* GraphKit::capture_memory(const TypePtr* src_type, const TypePtr* dst_type) { 4688 if (src_type == dst_type) { 4689 // Types are equal, we don't need a MergeMemNode 4690 return memory(src_type); 4691 } 4692 MergeMemNode* merge = MergeMemNode::make(map()->memory()); 4693 record_for_igvn(merge); // fold it up later, if possible 4694 int src_idx = C->get_alias_index(src_type); 4695 int dst_idx = C->get_alias_index(dst_type); 4696 merge->set_memory_at(src_idx, memory(src_idx)); 4697 merge->set_memory_at(dst_idx, memory(dst_idx)); 4698 return merge; 4699 } 4700 4701 Node* GraphKit::compress_string(Node* src, const TypeAryPtr* src_type, Node* dst, Node* count) { 4702 assert(Matcher::match_rule_supported(Op_StrCompressedCopy), "Intrinsic not supported"); 4703 assert(src_type == TypeAryPtr::BYTES || src_type == TypeAryPtr::CHARS, "invalid source type"); 4704 // If input and output memory types differ, capture both states to preserve 4705 // the dependency between preceding and subsequent loads/stores. 4706 // For example, the following program: 4707 // StoreB 4708 // compress_string 4709 // LoadB 4710 // has this memory graph (use->def): 4711 // LoadB -> compress_string -> CharMem 4712 // ... -> StoreB -> ByteMem 4713 // The intrinsic hides the dependency between LoadB and StoreB, causing 4714 // the load to read from memory not containing the result of the StoreB. 4715 // The correct memory graph should look like this: 4716 // LoadB -> compress_string -> MergeMem(CharMem, StoreB(ByteMem)) 4717 Node* mem = capture_memory(src_type, TypeAryPtr::BYTES); 4718 StrCompressedCopyNode* str = new StrCompressedCopyNode(control(), mem, src, dst, count); 4719 Node* res_mem = _gvn.transform(new SCMemProjNode(str)); 4720 set_memory(res_mem, TypeAryPtr::BYTES); 4721 return str; 4722 } 4723 4724 void GraphKit::inflate_string(Node* src, Node* dst, const TypeAryPtr* dst_type, Node* count) { 4725 assert(Matcher::match_rule_supported(Op_StrInflatedCopy), "Intrinsic not supported"); 4726 assert(dst_type == TypeAryPtr::BYTES || dst_type == TypeAryPtr::CHARS, "invalid dest type"); 4727 // Capture src and dst memory (see comment in 'compress_string'). 4728 Node* mem = capture_memory(TypeAryPtr::BYTES, dst_type); 4729 StrInflatedCopyNode* str = new StrInflatedCopyNode(control(), mem, src, dst, count); 4730 set_memory(_gvn.transform(str), dst_type); 4731 } 4732 4733 void GraphKit::inflate_string_slow(Node* src, Node* dst, Node* start, Node* count) { 4734 4735 src = shenandoah_read_barrier(src); 4736 dst = shenandoah_write_barrier(dst); 4737 4738 /** 4739 * int i_char = start; 4740 * for (int i_byte = 0; i_byte < count; i_byte++) { 4741 * dst[i_char++] = (char)(src[i_byte] & 0xff); 4742 * } 4743 */ 4744 add_predicate(); 4745 RegionNode* head = new RegionNode(3); 4746 head->init_req(1, control()); 4747 gvn().set_type(head, Type::CONTROL); 4748 record_for_igvn(head); 4749 4750 Node* i_byte = new PhiNode(head, TypeInt::INT); 4751 i_byte->init_req(1, intcon(0)); 4752 gvn().set_type(i_byte, TypeInt::INT); 4753 record_for_igvn(i_byte); 4754 4755 Node* i_char = new PhiNode(head, TypeInt::INT); 4756 i_char->init_req(1, start); 4757 gvn().set_type(i_char, TypeInt::INT); 4758 record_for_igvn(i_char); 4759 4760 Node* mem = PhiNode::make(head, memory(TypeAryPtr::BYTES), Type::MEMORY, TypeAryPtr::BYTES); 4761 gvn().set_type(mem, Type::MEMORY); 4762 record_for_igvn(mem); 4763 set_control(head); 4764 set_memory(mem, TypeAryPtr::BYTES); 4765 Node* ch = load_array_element(control(), src, i_byte, TypeAryPtr::BYTES); 4766 Node* st = store_to_memory(control(), array_element_address(dst, i_char, T_BYTE), 4767 AndI(ch, intcon(0xff)), T_CHAR, TypeAryPtr::BYTES, MemNode::unordered, 4768 false, false, true /* mismatched */); 4769 4770 IfNode* iff = create_and_map_if(head, Bool(CmpI(i_byte, count), BoolTest::lt), PROB_FAIR, COUNT_UNKNOWN); 4771 head->init_req(2, IfTrue(iff)); 4772 mem->init_req(2, st); 4773 i_byte->init_req(2, AddI(i_byte, intcon(1))); 4774 i_char->init_req(2, AddI(i_char, intcon(2))); 4775 4776 set_control(IfFalse(iff)); 4777 set_memory(st, TypeAryPtr::BYTES); 4778 } 4779 4780 Node* GraphKit::make_constant_from_field(ciField* field, Node* obj) { 4781 if (!field->is_constant()) { 4782 return NULL; // Field not marked as constant. 4783 } 4784 ciInstance* holder = NULL; 4785 if (!field->is_static()) { 4786 ciObject* const_oop = obj->bottom_type()->is_oopptr()->const_oop(); 4787 if (const_oop != NULL && const_oop->is_instance()) { 4788 holder = const_oop->as_instance(); 4789 } 4790 } 4791 const Type* con_type = Type::make_constant_from_field(field, holder, field->layout_type(), 4792 /*is_unsigned_load=*/false); 4793 if (con_type != NULL) { 4794 return makecon(con_type); 4795 } 4796 return NULL; 4797 } 4798 4799 Node* GraphKit::cast_array_to_stable(Node* ary, const TypeAryPtr* ary_type) { 4800 // Reify the property as a CastPP node in Ideal graph to comply with monotonicity 4801 // assumption of CCP analysis. 4802 return _gvn.transform(new CastPPNode(ary, ary_type->cast_to_stable(true))); 4803 } 4804 4805 Node* GraphKit::shenandoah_read_barrier(Node* obj) { 4806 return shenandoah_read_barrier_impl(obj, false, true, true); 4807 } 4808 4809 Node* GraphKit::shenandoah_read_barrier_storeval(Node* obj) { 4810 return shenandoah_read_barrier_impl(obj, true, false, false); 4811 } 4812 4813 Node* GraphKit::shenandoah_read_barrier_impl(Node* obj, bool use_ctrl, bool use_mem, bool allow_fromspace) { 4814 4815 if (UseShenandoahGC && ShenandoahReadBarrier) { 4816 const Type* obj_type = obj->bottom_type(); 4817 if (obj_type->higher_equal(TypePtr::NULL_PTR)) { 4818 return obj; 4819 } 4820 const TypePtr* adr_type = ShenandoahBarrierNode::brooks_pointer_type(obj_type); 4821 Node* mem = use_mem ? memory(adr_type) : immutable_memory(); 4822 4823 if (! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, obj, mem, allow_fromspace)) { 4824 // We know it is null, no barrier needed. 4825 return obj; 4826 } 4827 4828 4829 if (obj_type->meet(TypePtr::NULL_PTR) == obj_type->remove_speculative()) { 4830 4831 // We don't know if it's null or not. Need null-check. 4832 enum { _not_null_path = 1, _null_path, PATH_LIMIT }; 4833 RegionNode* region = new RegionNode(PATH_LIMIT); 4834 Node* phi = new PhiNode(region, obj_type); 4835 Node* null_ctrl = top(); 4836 Node* not_null_obj = null_check_oop(obj, &null_ctrl); 4837 4838 region->init_req(_null_path, null_ctrl); 4839 phi ->init_req(_null_path, zerocon(T_OBJECT)); 4840 4841 Node* ctrl = use_ctrl ? control() : NULL; 4842 ShenandoahReadBarrierNode* rb = new ShenandoahReadBarrierNode(ctrl, mem, not_null_obj, allow_fromspace); 4843 Node* n = _gvn.transform(rb); 4844 4845 region->init_req(_not_null_path, control()); 4846 phi ->init_req(_not_null_path, n); 4847 4848 set_control(_gvn.transform(region)); 4849 record_for_igvn(region); 4850 return _gvn.transform(phi); 4851 4852 } else { 4853 // We know it is not null. Simple barrier is sufficient. 4854 Node* ctrl = use_ctrl ? control() : NULL; 4855 ShenandoahReadBarrierNode* rb = new ShenandoahReadBarrierNode(ctrl, mem, obj, allow_fromspace); 4856 Node* n = _gvn.transform(rb); 4857 record_for_igvn(n); 4858 return n; 4859 } 4860 4861 } else { 4862 return obj; 4863 } 4864 } 4865 4866 static Node* shenandoah_write_barrier_helper(GraphKit& kit, Node* obj, const TypePtr* adr_type) { 4867 ShenandoahWriteBarrierNode* wb = new ShenandoahWriteBarrierNode(kit.C, kit.control(), kit.memory(adr_type), obj); 4868 Node* n = kit.gvn().transform(wb); 4869 if (n == wb) { // New barrier needs memory projection. 4870 Node* proj = kit.gvn().transform(new ShenandoahWBMemProjNode(n)); 4871 kit.set_memory(proj, adr_type); 4872 } 4873 4874 return n; 4875 } 4876 4877 Node* GraphKit::shenandoah_write_barrier(Node* obj) { 4878 4879 if (UseShenandoahGC && ShenandoahWriteBarrier) { 4880 4881 if (! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, obj, NULL, true)) { 4882 return obj; 4883 } 4884 const Type* obj_type = obj->bottom_type(); 4885 const TypePtr* adr_type = ShenandoahBarrierNode::brooks_pointer_type(obj_type); 4886 if (obj_type->meet(TypePtr::NULL_PTR) == obj_type->remove_speculative()) { 4887 // We don't know if it's null or not. Need null-check. 4888 enum { _not_null_path = 1, _null_path, PATH_LIMIT }; 4889 RegionNode* region = new RegionNode(PATH_LIMIT); 4890 Node* phi = new PhiNode(region, obj_type); 4891 Node* memphi = PhiNode::make(region, memory(adr_type), Type::MEMORY, C->alias_type(adr_type)->adr_type()); 4892 4893 Node* prev_mem = memory(adr_type); 4894 Node* null_ctrl = top(); 4895 Node* not_null_obj = null_check_oop(obj, &null_ctrl); 4896 4897 region->init_req(_null_path, null_ctrl); 4898 phi ->init_req(_null_path, zerocon(T_OBJECT)); 4899 memphi->init_req(_null_path, prev_mem); 4900 4901 Node* n = shenandoah_write_barrier_helper(*this, not_null_obj, adr_type); 4902 4903 region->init_req(_not_null_path, control()); 4904 phi ->init_req(_not_null_path, n); 4905 memphi->init_req(_not_null_path, memory(adr_type)); 4906 4907 set_control(_gvn.transform(region)); 4908 record_for_igvn(region); 4909 set_memory(_gvn.transform(memphi), adr_type); 4910 4911 Node* res_val = _gvn.transform(phi); 4912 // replace_in_map(obj, res_val); 4913 return res_val; 4914 } else { 4915 // We know it is not null. Simple barrier is sufficient. 4916 Node* n = shenandoah_write_barrier_helper(*this, obj, adr_type); 4917 // replace_in_map(obj, n); 4918 record_for_igvn(n); 4919 return n; 4920 } 4921 4922 } else { 4923 return obj; 4924 } 4925 } 4926 4927 /** 4928 * In Shenandoah, we need barriers on acmp (and similar instructions that compare two 4929 * oops) to avoid false negatives. If it compares a from-space and a to-space 4930 * copy of an object, a regular acmp would return false, even though both are 4931 * the same. The acmp barrier compares the two objects, and when they are 4932 * *not equal* it does a read-barrier on both, and compares them again. When it 4933 * failed because of different copies of the object, we know that the object 4934 * must already have been evacuated (and therefore doesn't require a write-barrier). 4935 */ 4936 Node* GraphKit::cmp_objects(Node* a, Node* b) { 4937 // TODO: Refactor into proper GC interface. 4938 if (UseShenandoahGC && ShenandoahAcmpBarrier) { 4939 const Type* a_type = a->bottom_type(); 4940 const Type* b_type = b->bottom_type(); 4941 if (a_type->higher_equal(TypePtr::NULL_PTR) || b_type->higher_equal(TypePtr::NULL_PTR)) { 4942 // We know one arg is gonna be null. No need for barriers. 4943 return _gvn.transform(new CmpPNode(b, a)); 4944 } 4945 4946 const TypePtr* a_adr_type = ShenandoahBarrierNode::brooks_pointer_type(a_type); 4947 const TypePtr* b_adr_type = ShenandoahBarrierNode::brooks_pointer_type(b_type); 4948 if ((! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, a, memory(a_adr_type), false)) && 4949 (! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, b, memory(b_adr_type), false))) { 4950 // We know both args are in to-space already. No acmp barrier needed. 4951 return _gvn.transform(new CmpPNode(b, a)); 4952 } 4953 4954 C->set_has_split_ifs(true); 4955 4956 if (ShenandoahVerifyOptoBarriers) { 4957 a = shenandoah_write_barrier(a); 4958 b = shenandoah_write_barrier(b); 4959 return _gvn.transform(new CmpPNode(b, a)); 4960 } 4961 4962 enum { _equal = 1, _not_equal, PATH_LIMIT }; 4963 RegionNode* region = new RegionNode(PATH_LIMIT); 4964 PhiNode* phiA = PhiNode::make(region, a, _gvn.type(a)->is_oopptr()->cast_to_nonconst()); 4965 PhiNode* phiB = PhiNode::make(region, b, _gvn.type(b)->is_oopptr()->cast_to_nonconst()); 4966 4967 Node* cmp = _gvn.transform(new CmpPNode(b, a)); 4968 Node* tst = _gvn.transform(new BoolNode(cmp, BoolTest::eq)); 4969 4970 // TODO: Use profiling data. 4971 IfNode* iff = create_and_map_if(control(), tst, PROB_FAIR, COUNT_UNKNOWN); 4972 Node* iftrue = _gvn.transform(new IfTrueNode(iff)); 4973 Node* iffalse = _gvn.transform(new IfFalseNode(iff)); 4974 4975 // Equal path: Use original values. 4976 region->init_req(_equal, iftrue); 4977 phiA->init_req(_equal, a); 4978 phiB->init_req(_equal, b); 4979 4980 uint alias_a = C->get_alias_index(a_adr_type); 4981 uint alias_b = C->get_alias_index(b_adr_type); 4982 PhiNode* mem_phi = NULL; 4983 if (alias_a == alias_b) { 4984 mem_phi = PhiNode::make(region, memory(alias_a), Type::MEMORY, C->get_adr_type(alias_a)); 4985 } else { 4986 mem_phi = PhiNode::make(region, map()->memory(), Type::MEMORY, TypePtr::BOTTOM); 4987 } 4988 4989 // Unequal path: retry after read barriers. 4990 set_control(iffalse); 4991 if (!iffalse->is_top()) { 4992 Node* mb = NULL; 4993 if (alias_a == alias_b) { 4994 Node* mem = reset_memory(); 4995 mb = MemBarNode::make(C, Op_MemBarAcquire, alias_a); 4996 mb->init_req(TypeFunc::Control, control()); 4997 mb->init_req(TypeFunc::Memory, mem); 4998 Node* membar = _gvn.transform(mb); 4999 set_control(_gvn.transform(new ProjNode(membar, TypeFunc::Control))); 5000 Node* newmem = _gvn.transform(new ProjNode(membar, TypeFunc::Memory)); 5001 set_all_memory(mem); 5002 set_memory(newmem, alias_a); 5003 } else { 5004 mb = insert_mem_bar(Op_MemBarAcquire); 5005 } 5006 } else { 5007 a = top(); 5008 b = top(); 5009 } 5010 5011 a = shenandoah_read_barrier_impl(a, true, true, false); 5012 b = shenandoah_read_barrier_impl(b, true, true, false); 5013 5014 region->init_req(_not_equal, control()); 5015 phiA->init_req(_not_equal, a); 5016 phiB->init_req(_not_equal, b); 5017 if (alias_a == alias_b) { 5018 mem_phi->init_req(_not_equal, memory(alias_a)); 5019 set_memory(mem_phi, alias_a); 5020 } else { 5021 mem_phi->init_req(_not_equal, reset_memory()); 5022 set_all_memory(mem_phi); 5023 } 5024 record_for_igvn(mem_phi); 5025 _gvn.set_type(mem_phi, Type::MEMORY); 5026 set_control(_gvn.transform(region)); 5027 record_for_igvn(region); 5028 5029 a = _gvn.transform(phiA); 5030 b = _gvn.transform(phiB); 5031 return _gvn.transform(new CmpPNode(b, a)); 5032 } else { 5033 return _gvn.transform(new CmpPNode(b, a)); 5034 } 5035 }