1 /* 2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/vmSymbols.hpp" 27 #include "interpreter/interpreter.hpp" 28 #include "memory/allocation.inline.hpp" 29 #include "memory/resourceArea.hpp" 30 #include "memory/universe.inline.hpp" 31 #include "oops/methodDataOop.hpp" 32 #include "oops/oop.inline.hpp" 33 #include "prims/jvmtiThreadState.hpp" 34 #include "runtime/handles.inline.hpp" 35 #include "runtime/monitorChunk.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/vframe.hpp" 38 #include "runtime/vframeArray.hpp" 39 #include "runtime/vframe_hp.hpp" 40 #include "utilities/events.hpp" 41 #ifdef COMPILER2 42 #include "opto/runtime.hpp" 43 #endif 44 45 46 int vframeArrayElement:: bci(void) const { return (_bci == SynchronizationEntryBCI ? 0 : _bci); } 47 48 void vframeArrayElement::free_monitors(JavaThread* jt) { 49 if (_monitors != NULL) { 50 MonitorChunk* chunk = _monitors; 51 _monitors = NULL; 52 jt->remove_monitor_chunk(chunk); 53 delete chunk; 54 } 55 } 56 57 void vframeArrayElement::fill_in(compiledVFrame* vf) { 58 59 // Copy the information from the compiled vframe to the 60 // interpreter frame we will be creating to replace vf 61 62 _method = vf->method(); 63 _bci = vf->raw_bci(); 64 _reexecute = vf->should_reexecute(); 65 66 int index; 67 68 // Get the monitors off-stack 69 70 GrowableArray<MonitorInfo*>* list = vf->monitors(); 71 if (list->is_empty()) { 72 _monitors = NULL; 73 } else { 74 75 // Allocate monitor chunk 76 _monitors = new MonitorChunk(list->length()); 77 vf->thread()->add_monitor_chunk(_monitors); 78 79 // Migrate the BasicLocks from the stack to the monitor chunk 80 for (index = 0; index < list->length(); index++) { 81 MonitorInfo* monitor = list->at(index); 82 assert(!monitor->owner_is_scalar_replaced(), "object should be reallocated already"); 83 assert(monitor->owner() == NULL || (!monitor->owner()->is_unlocked() && !monitor->owner()->has_bias_pattern()), "object must be null or locked, and unbiased"); 84 BasicObjectLock* dest = _monitors->at(index); 85 dest->set_obj(monitor->owner()); 86 monitor->lock()->move_to(monitor->owner(), dest->lock()); 87 } 88 } 89 90 // Convert the vframe locals and expressions to off stack 91 // values. Because we will not gc all oops can be converted to 92 // intptr_t (i.e. a stack slot) and we are fine. This is 93 // good since we are inside a HandleMark and the oops in our 94 // collection would go away between packing them here and 95 // unpacking them in unpack_on_stack. 96 97 // First the locals go off-stack 98 99 // FIXME this seems silly it creates a StackValueCollection 100 // in order to get the size to then copy them and 101 // convert the types to intptr_t size slots. Seems like it 102 // could do it in place... Still uses less memory than the 103 // old way though 104 105 StackValueCollection *locs = vf->locals(); 106 _locals = new StackValueCollection(locs->size()); 107 for(index = 0; index < locs->size(); index++) { 108 StackValue* value = locs->at(index); 109 switch(value->type()) { 110 case T_OBJECT: 111 assert(!value->obj_is_scalar_replaced(), "object should be reallocated already"); 112 // preserve object type 113 _locals->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT )); 114 break; 115 case T_CONFLICT: 116 // A dead local. Will be initialized to null/zero. 117 _locals->add( new StackValue()); 118 break; 119 case T_INT: 120 _locals->add( new StackValue(value->get_int())); 121 break; 122 default: 123 ShouldNotReachHere(); 124 } 125 } 126 127 // Now the expressions off-stack 128 // Same silliness as above 129 130 StackValueCollection *exprs = vf->expressions(); 131 _expressions = new StackValueCollection(exprs->size()); 132 for(index = 0; index < exprs->size(); index++) { 133 StackValue* value = exprs->at(index); 134 switch(value->type()) { 135 case T_OBJECT: 136 assert(!value->obj_is_scalar_replaced(), "object should be reallocated already"); 137 // preserve object type 138 _expressions->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT )); 139 break; 140 case T_CONFLICT: 141 // A dead stack element. Will be initialized to null/zero. 142 // This can occur when the compiler emits a state in which stack 143 // elements are known to be dead (because of an imminent exception). 144 _expressions->add( new StackValue()); 145 break; 146 case T_INT: 147 _expressions->add( new StackValue(value->get_int())); 148 break; 149 default: 150 ShouldNotReachHere(); 151 } 152 } 153 } 154 155 int unpack_counter = 0; 156 157 void vframeArrayElement::unpack_on_stack(int callee_parameters, 158 int callee_locals, 159 frame* caller, 160 bool is_top_frame, 161 int exec_mode) { 162 JavaThread* thread = (JavaThread*) Thread::current(); 163 164 // Look at bci and decide on bcp and continuation pc 165 address bcp; 166 // C++ interpreter doesn't need a pc since it will figure out what to do when it 167 // begins execution 168 address pc; 169 bool use_next_mdp = false; // true if we should use the mdp associated with the next bci 170 // rather than the one associated with bcp 171 if (raw_bci() == SynchronizationEntryBCI) { 172 // We are deoptimizing while hanging in prologue code for synchronized method 173 bcp = method()->bcp_from(0); // first byte code 174 pc = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode 175 } else if (should_reexecute()) { //reexecute this bytecode 176 assert(is_top_frame, "reexecute allowed only for the top frame"); 177 bcp = method()->bcp_from(bci()); 178 pc = Interpreter::deopt_reexecute_entry(method(), bcp); 179 } else { 180 bcp = method()->bcp_from(bci()); 181 pc = Interpreter::deopt_continue_after_entry(method(), bcp, callee_parameters, is_top_frame); 182 use_next_mdp = true; 183 } 184 assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode"); 185 186 // Monitorenter and pending exceptions: 187 // 188 // For Compiler2, there should be no pending exception when deoptimizing at monitorenter 189 // because there is no safepoint at the null pointer check (it is either handled explicitly 190 // or prior to the monitorenter) and asynchronous exceptions are not made "pending" by the 191 // runtime interface for the slow case (see JRT_ENTRY_FOR_MONITORENTER). If an asynchronous 192 // exception was processed, the bytecode pointer would have to be extended one bytecode beyond 193 // the monitorenter to place it in the proper exception range. 194 // 195 // For Compiler1, deoptimization can occur while throwing a NullPointerException at monitorenter, 196 // in which case bcp should point to the monitorenter since it is within the exception's range. 197 198 assert(*bcp != Bytecodes::_monitorenter || is_top_frame, "a _monitorenter must be a top frame"); 199 // TIERED Must know the compiler of the deoptee QQQ 200 COMPILER2_PRESENT(guarantee(*bcp != Bytecodes::_monitorenter || exec_mode != Deoptimization::Unpack_exception, 201 "shouldn't get exception during monitorenter");) 202 203 int popframe_preserved_args_size_in_bytes = 0; 204 int popframe_preserved_args_size_in_words = 0; 205 if (is_top_frame) { 206 JvmtiThreadState *state = thread->jvmti_thread_state(); 207 if (JvmtiExport::can_pop_frame() && 208 (thread->has_pending_popframe() || thread->popframe_forcing_deopt_reexecution())) { 209 if (thread->has_pending_popframe()) { 210 // Pop top frame after deoptimization 211 #ifndef CC_INTERP 212 pc = Interpreter::remove_activation_preserving_args_entry(); 213 #else 214 // Do an uncommon trap type entry. c++ interpreter will know 215 // to pop frame and preserve the args 216 pc = Interpreter::deopt_entry(vtos, 0); 217 use_next_mdp = false; 218 #endif 219 } else { 220 // Reexecute invoke in top frame 221 pc = Interpreter::deopt_entry(vtos, 0); 222 use_next_mdp = false; 223 popframe_preserved_args_size_in_bytes = in_bytes(thread->popframe_preserved_args_size()); 224 // Note: the PopFrame-related extension of the expression stack size is done in 225 // Deoptimization::fetch_unroll_info_helper 226 popframe_preserved_args_size_in_words = in_words(thread->popframe_preserved_args_size_in_words()); 227 } 228 } else if (JvmtiExport::can_force_early_return() && state != NULL && state->is_earlyret_pending()) { 229 // Force early return from top frame after deoptimization 230 #ifndef CC_INTERP 231 pc = Interpreter::remove_activation_early_entry(state->earlyret_tos()); 232 #else 233 // TBD: Need to implement ForceEarlyReturn for CC_INTERP (ia64) 234 #endif 235 } else { 236 // Possibly override the previous pc computation of the top (youngest) frame 237 switch (exec_mode) { 238 case Deoptimization::Unpack_deopt: 239 // use what we've got 240 break; 241 case Deoptimization::Unpack_exception: 242 // exception is pending 243 pc = SharedRuntime::raw_exception_handler_for_return_address(thread, pc); 244 // [phh] We're going to end up in some handler or other, so it doesn't 245 // matter what mdp we point to. See exception_handler_for_exception() 246 // in interpreterRuntime.cpp. 247 break; 248 case Deoptimization::Unpack_uncommon_trap: 249 case Deoptimization::Unpack_reexecute: 250 // redo last byte code 251 pc = Interpreter::deopt_entry(vtos, 0); 252 use_next_mdp = false; 253 break; 254 default: 255 ShouldNotReachHere(); 256 } 257 } 258 } 259 260 // Setup the interpreter frame 261 262 assert(method() != NULL, "method must exist"); 263 int temps = expressions()->size(); 264 265 int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors(); 266 267 Interpreter::layout_activation(method(), 268 temps + callee_parameters, 269 popframe_preserved_args_size_in_words, 270 locks, 271 callee_parameters, 272 callee_locals, 273 caller, 274 iframe(), 275 is_top_frame); 276 277 // Update the pc in the frame object and overwrite the temporary pc 278 // we placed in the skeletal frame now that we finally know the 279 // exact interpreter address we should use. 280 281 _frame.patch_pc(thread, pc); 282 283 assert (!method()->is_synchronized() || locks > 0, "synchronized methods must have monitors"); 284 285 BasicObjectLock* top = iframe()->interpreter_frame_monitor_begin(); 286 for (int index = 0; index < locks; index++) { 287 top = iframe()->previous_monitor_in_interpreter_frame(top); 288 BasicObjectLock* src = _monitors->at(index); 289 top->set_obj(src->obj()); 290 src->lock()->move_to(src->obj(), top->lock()); 291 } 292 if (ProfileInterpreter) { 293 iframe()->interpreter_frame_set_mdx(0); // clear out the mdp. 294 } 295 iframe()->interpreter_frame_set_bcx((intptr_t)bcp); // cannot use bcp because frame is not initialized yet 296 if (ProfileInterpreter) { 297 methodDataOop mdo = method()->method_data(); 298 if (mdo != NULL) { 299 int bci = iframe()->interpreter_frame_bci(); 300 if (use_next_mdp) ++bci; 301 address mdp = mdo->bci_to_dp(bci); 302 iframe()->interpreter_frame_set_mdp(mdp); 303 } 304 } 305 306 // Unpack expression stack 307 // If this is an intermediate frame (i.e. not top frame) then this 308 // only unpacks the part of the expression stack not used by callee 309 // as parameters. The callee parameters are unpacked as part of the 310 // callee locals. 311 int i; 312 for(i = 0; i < expressions()->size(); i++) { 313 StackValue *value = expressions()->at(i); 314 intptr_t* addr = iframe()->interpreter_frame_expression_stack_at(i); 315 switch(value->type()) { 316 case T_INT: 317 *addr = value->get_int(); 318 break; 319 case T_OBJECT: 320 *addr = value->get_int(T_OBJECT); 321 break; 322 case T_CONFLICT: 323 // A dead stack slot. Initialize to null in case it is an oop. 324 *addr = NULL_WORD; 325 break; 326 default: 327 ShouldNotReachHere(); 328 } 329 } 330 331 332 // Unpack the locals 333 for(i = 0; i < locals()->size(); i++) { 334 StackValue *value = locals()->at(i); 335 intptr_t* addr = iframe()->interpreter_frame_local_at(i); 336 switch(value->type()) { 337 case T_INT: 338 *addr = value->get_int(); 339 break; 340 case T_OBJECT: 341 *addr = value->get_int(T_OBJECT); 342 break; 343 case T_CONFLICT: 344 // A dead location. If it is an oop then we need a NULL to prevent GC from following it 345 *addr = NULL_WORD; 346 break; 347 default: 348 ShouldNotReachHere(); 349 } 350 } 351 352 if (is_top_frame && JvmtiExport::can_pop_frame() && thread->popframe_forcing_deopt_reexecution()) { 353 // An interpreted frame was popped but it returns to a deoptimized 354 // frame. The incoming arguments to the interpreted activation 355 // were preserved in thread-local storage by the 356 // remove_activation_preserving_args_entry in the interpreter; now 357 // we put them back into the just-unpacked interpreter frame. 358 // Note that this assumes that the locals arena grows toward lower 359 // addresses. 360 if (popframe_preserved_args_size_in_words != 0) { 361 void* saved_args = thread->popframe_preserved_args(); 362 assert(saved_args != NULL, "must have been saved by interpreter"); 363 #ifdef ASSERT 364 assert(popframe_preserved_args_size_in_words <= 365 iframe()->interpreter_frame_expression_stack_size()*Interpreter::stackElementWords, 366 "expression stack size should have been extended"); 367 #endif // ASSERT 368 int top_element = iframe()->interpreter_frame_expression_stack_size()-1; 369 intptr_t* base; 370 if (frame::interpreter_frame_expression_stack_direction() < 0) { 371 base = iframe()->interpreter_frame_expression_stack_at(top_element); 372 } else { 373 base = iframe()->interpreter_frame_expression_stack(); 374 } 375 Copy::conjoint_jbytes(saved_args, 376 base, 377 popframe_preserved_args_size_in_bytes); 378 thread->popframe_free_preserved_args(); 379 } 380 } 381 382 #ifndef PRODUCT 383 if (TraceDeoptimization && Verbose) { 384 ttyLocker ttyl; 385 tty->print_cr("[%d Interpreted Frame]", ++unpack_counter); 386 iframe()->print_on(tty); 387 RegisterMap map(thread); 388 vframe* f = vframe::new_vframe(iframe(), &map, thread); 389 f->print(); 390 391 tty->print_cr("locals size %d", locals()->size()); 392 tty->print_cr("expression size %d", expressions()->size()); 393 394 method()->print_value(); 395 tty->cr(); 396 // method()->print_codes(); 397 } else if (TraceDeoptimization) { 398 tty->print(" "); 399 method()->print_value(); 400 Bytecodes::Code code = Bytecodes::java_code_at(bcp); 401 int bci = method()->bci_from(bcp); 402 tty->print(" - %s", Bytecodes::name(code)); 403 tty->print(" @ bci %d ", bci); 404 tty->print_cr("sp = " PTR_FORMAT, iframe()->sp()); 405 } 406 #endif // PRODUCT 407 408 // The expression stack and locals are in the resource area don't leave 409 // a dangling pointer in the vframeArray we leave around for debug 410 // purposes 411 412 _locals = _expressions = NULL; 413 414 } 415 416 int vframeArrayElement::on_stack_size(int callee_parameters, 417 int callee_locals, 418 bool is_top_frame, 419 int popframe_extra_stack_expression_els) const { 420 assert(method()->max_locals() == locals()->size(), "just checking"); 421 int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors(); 422 int temps = expressions()->size(); 423 return Interpreter::size_activation(method(), 424 temps + callee_parameters, 425 popframe_extra_stack_expression_els, 426 locks, 427 callee_parameters, 428 callee_locals, 429 is_top_frame); 430 } 431 432 433 434 vframeArray* vframeArray::allocate(JavaThread* thread, int frame_size, GrowableArray<compiledVFrame*>* chunk, 435 RegisterMap *reg_map, frame sender, frame caller, frame self) { 436 437 // Allocate the vframeArray 438 vframeArray * result = (vframeArray*) AllocateHeap(sizeof(vframeArray) + // fixed part 439 sizeof(vframeArrayElement) * (chunk->length() - 1), // variable part 440 "vframeArray::allocate"); 441 result->_frames = chunk->length(); 442 result->_owner_thread = thread; 443 result->_sender = sender; 444 result->_caller = caller; 445 result->_original = self; 446 result->set_unroll_block(NULL); // initialize it 447 result->fill_in(thread, frame_size, chunk, reg_map); 448 return result; 449 } 450 451 void vframeArray::fill_in(JavaThread* thread, 452 int frame_size, 453 GrowableArray<compiledVFrame*>* chunk, 454 const RegisterMap *reg_map) { 455 // Set owner first, it is used when adding monitor chunks 456 457 _frame_size = frame_size; 458 for(int i = 0; i < chunk->length(); i++) { 459 element(i)->fill_in(chunk->at(i)); 460 } 461 462 // Copy registers for callee-saved registers 463 if (reg_map != NULL) { 464 for(int i = 0; i < RegisterMap::reg_count; i++) { 465 #ifdef AMD64 466 // The register map has one entry for every int (32-bit value), so 467 // 64-bit physical registers have two entries in the map, one for 468 // each half. Ignore the high halves of 64-bit registers, just like 469 // frame::oopmapreg_to_location does. 470 // 471 // [phh] FIXME: this is a temporary hack! This code *should* work 472 // correctly w/o this hack, possibly by changing RegisterMap::pd_location 473 // in frame_amd64.cpp and the values of the phantom high half registers 474 // in amd64.ad. 475 // if (VMReg::Name(i) < SharedInfo::stack0 && is_even(i)) { 476 intptr_t* src = (intptr_t*) reg_map->location(VMRegImpl::as_VMReg(i)); 477 _callee_registers[i] = src != NULL ? *src : NULL_WORD; 478 // } else { 479 // jint* src = (jint*) reg_map->location(VMReg::Name(i)); 480 // _callee_registers[i] = src != NULL ? *src : NULL_WORD; 481 // } 482 #else 483 jint* src = (jint*) reg_map->location(VMRegImpl::as_VMReg(i)); 484 _callee_registers[i] = src != NULL ? *src : NULL_WORD; 485 #endif 486 if (src == NULL) { 487 set_location_valid(i, false); 488 } else { 489 set_location_valid(i, true); 490 jint* dst = (jint*) register_location(i); 491 *dst = *src; 492 } 493 } 494 } 495 } 496 497 void vframeArray::unpack_to_stack(frame &unpack_frame, int exec_mode) { 498 // stack picture 499 // unpack_frame 500 // [new interpreter frames ] (frames are skeletal but walkable) 501 // caller_frame 502 // 503 // This routine fills in the missing data for the skeletal interpreter frames 504 // in the above picture. 505 506 // Find the skeletal interpreter frames to unpack into 507 RegisterMap map(JavaThread::current(), false); 508 // Get the youngest frame we will unpack (last to be unpacked) 509 frame me = unpack_frame.sender(&map); 510 int index; 511 for (index = 0; index < frames(); index++ ) { 512 *element(index)->iframe() = me; 513 // Get the caller frame (possibly skeletal) 514 me = me.sender(&map); 515 } 516 517 frame caller_frame = me; 518 519 // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee 520 521 // Unpack the frames from the oldest (frames() -1) to the youngest (0) 522 523 for (index = frames() - 1; index >= 0 ; index--) { 524 int callee_parameters = index == 0 ? 0 : element(index-1)->method()->size_of_parameters(); 525 int callee_locals = index == 0 ? 0 : element(index-1)->method()->max_locals(); 526 element(index)->unpack_on_stack(callee_parameters, 527 callee_locals, 528 &caller_frame, 529 index == 0, 530 exec_mode); 531 if (index == frames() - 1) { 532 Deoptimization::unwind_callee_save_values(element(index)->iframe(), this); 533 } 534 caller_frame = *element(index)->iframe(); 535 } 536 537 538 deallocate_monitor_chunks(); 539 } 540 541 void vframeArray::deallocate_monitor_chunks() { 542 JavaThread* jt = JavaThread::current(); 543 for (int index = 0; index < frames(); index++ ) { 544 element(index)->free_monitors(jt); 545 } 546 } 547 548 #ifndef PRODUCT 549 550 bool vframeArray::structural_compare(JavaThread* thread, GrowableArray<compiledVFrame*>* chunk) { 551 if (owner_thread() != thread) return false; 552 int index = 0; 553 #if 0 // FIXME can't do this comparison 554 555 // Compare only within vframe array. 556 for (deoptimizedVFrame* vf = deoptimizedVFrame::cast(vframe_at(first_index())); vf; vf = vf->deoptimized_sender_or_null()) { 557 if (index >= chunk->length() || !vf->structural_compare(chunk->at(index))) return false; 558 index++; 559 } 560 if (index != chunk->length()) return false; 561 #endif 562 563 return true; 564 } 565 566 #endif 567 568 address vframeArray::register_location(int i) const { 569 assert(0 <= i && i < RegisterMap::reg_count, "index out of bounds"); 570 return (address) & _callee_registers[i]; 571 } 572 573 574 #ifndef PRODUCT 575 576 // Printing 577 578 // Note: we cannot have print_on as const, as we allocate inside the method 579 void vframeArray::print_on_2(outputStream* st) { 580 st->print_cr(" - sp: " INTPTR_FORMAT, sp()); 581 st->print(" - thread: "); 582 Thread::current()->print(); 583 st->print_cr(" - frame size: %d", frame_size()); 584 for (int index = 0; index < frames() ; index++ ) { 585 element(index)->print(st); 586 } 587 } 588 589 void vframeArrayElement::print(outputStream* st) { 590 st->print_cr(" - interpreter_frame -> sp: " INTPTR_FORMAT, iframe()->sp()); 591 } 592 593 void vframeArray::print_value_on(outputStream* st) const { 594 st->print_cr("vframeArray [%d] ", frames()); 595 } 596 597 598 #endif