1 /*
   2  * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20  * CA 95054 USA or visit www.sun.com if you need additional information or
  21  * have any questions.
  22  *
  23  */
  24 
  25 # include "incls/_precompiled.incl"
  26 # include "incls/_vframeArray.cpp.incl"
  27 
  28 
  29 int vframeArrayElement:: bci(void) const { return (_bci == SynchronizationEntryBCI ? 0 : _bci); }
  30 
  31 void vframeArrayElement::free_monitors(JavaThread* jt) {
  32   if (_monitors != NULL) {
  33      MonitorChunk* chunk = _monitors;
  34      _monitors = NULL;
  35      jt->remove_monitor_chunk(chunk);
  36      delete chunk;
  37   }
  38 }
  39 
  40 void vframeArrayElement::fill_in(compiledVFrame* vf) {
  41 
  42 // Copy the information from the compiled vframe to the
  43 // interpreter frame we will be creating to replace vf
  44 
  45   _method = vf->method();
  46   _bci    = vf->raw_bci();
  47 
  48   int index;
  49 
  50   // Get the monitors off-stack
  51 
  52   GrowableArray<MonitorInfo*>* list = vf->monitors();
  53   if (list->is_empty()) {
  54     _monitors = NULL;
  55   } else {
  56 
  57     // Allocate monitor chunk
  58     _monitors = new MonitorChunk(list->length());
  59     vf->thread()->add_monitor_chunk(_monitors);
  60 
  61     // Migrate the BasicLocks from the stack to the monitor chunk
  62     for (index = 0; index < list->length(); index++) {
  63       MonitorInfo* monitor = list->at(index);
  64       assert(!monitor->owner_is_scalar_replaced(), "object should be reallocated already");
  65       assert(monitor->owner() == NULL || (!monitor->owner()->is_unlocked() && !monitor->owner()->has_bias_pattern()), "object must be null or locked, and unbiased");
  66       BasicObjectLock* dest = _monitors->at(index);
  67       dest->set_obj(monitor->owner());
  68       monitor->lock()->move_to(monitor->owner(), dest->lock());
  69     }
  70   }
  71 
  72   // Convert the vframe locals and expressions to off stack
  73   // values. Because we will not gc all oops can be converted to
  74   // intptr_t (i.e. a stack slot) and we are fine. This is
  75   // good since we are inside a HandleMark and the oops in our
  76   // collection would go away between packing them here and
  77   // unpacking them in unpack_on_stack.
  78 
  79   // First the locals go off-stack
  80 
  81   // FIXME this seems silly it creates a StackValueCollection
  82   // in order to get the size to then copy them and
  83   // convert the types to intptr_t size slots. Seems like it
  84   // could do it in place... Still uses less memory than the
  85   // old way though
  86 
  87   StackValueCollection *locs = vf->locals();
  88   _locals = new StackValueCollection(locs->size());
  89   for(index = 0; index < locs->size(); index++) {
  90     StackValue* value = locs->at(index);
  91     switch(value->type()) {
  92       case T_OBJECT:
  93         assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
  94         // preserve object type
  95         _locals->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
  96         break;
  97       case T_CONFLICT:
  98         // A dead local.  Will be initialized to null/zero.
  99         _locals->add( new StackValue());
 100         break;
 101       case T_INT:
 102         _locals->add( new StackValue(value->get_int()));
 103         break;
 104       default:
 105         ShouldNotReachHere();
 106     }
 107   }
 108 
 109   // Now the expressions off-stack
 110   // Same silliness as above
 111 
 112   StackValueCollection *exprs = vf->expressions();
 113   _expressions = new StackValueCollection(exprs->size());
 114   for(index = 0; index < exprs->size(); index++) {
 115     StackValue* value = exprs->at(index);
 116     switch(value->type()) {
 117       case T_OBJECT:
 118         assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
 119         // preserve object type
 120         _expressions->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
 121         break;
 122       case T_CONFLICT:
 123         // A dead stack element.  Will be initialized to null/zero.
 124         // This can occur when the compiler emits a state in which stack
 125         // elements are known to be dead (because of an imminent exception).
 126         _expressions->add( new StackValue());
 127         break;
 128       case T_INT:
 129         _expressions->add( new StackValue(value->get_int()));
 130         break;
 131       default:
 132         ShouldNotReachHere();
 133     }
 134   }
 135 }
 136 
 137 int unpack_counter = 0;
 138 
 139 void vframeArrayElement::unpack_on_stack(int callee_parameters,
 140                                          int callee_locals,
 141                                          frame* caller,
 142                                          bool is_top_frame,
 143                                          int exec_mode) {
 144   JavaThread* thread = (JavaThread*) Thread::current();
 145 
 146   // Look at bci and decide on bcp and continuation pc
 147   address bcp;
 148   // C++ interpreter doesn't need a pc since it will figure out what to do when it
 149   // begins execution
 150   address pc;
 151   bool use_next_mdp; // true if we should use the mdp associated with the next bci
 152                      // rather than the one associated with bcp
 153   if (raw_bci() == SynchronizationEntryBCI) {
 154     // We are deoptimizing while hanging in prologue code for synchronized method
 155     bcp = method()->bcp_from(0); // first byte code
 156     pc  = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode
 157     use_next_mdp = false;
 158   } else {
 159     bcp = method()->bcp_from(bci());
 160     pc  = Interpreter::continuation_for(method(), bcp, callee_parameters, is_top_frame, use_next_mdp);
 161   }
 162   assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode");
 163 
 164   // Monitorenter and pending exceptions:
 165   //
 166   // For Compiler2, there should be no pending exception when deoptimizing at monitorenter
 167   // because there is no safepoint at the null pointer check (it is either handled explicitly
 168   // or prior to the monitorenter) and asynchronous exceptions are not made "pending" by the
 169   // runtime interface for the slow case (see JRT_ENTRY_FOR_MONITORENTER).  If an asynchronous
 170   // exception was processed, the bytecode pointer would have to be extended one bytecode beyond
 171   // the monitorenter to place it in the proper exception range.
 172   //
 173   // For Compiler1, deoptimization can occur while throwing a NullPointerException at monitorenter,
 174   // in which case bcp should point to the monitorenter since it is within the exception's range.
 175 
 176   assert(*bcp != Bytecodes::_monitorenter || is_top_frame, "a _monitorenter must be a top frame");
 177   // TIERED Must know the compiler of the deoptee QQQ
 178   COMPILER2_PRESENT(guarantee(*bcp != Bytecodes::_monitorenter || exec_mode != Deoptimization::Unpack_exception,
 179                               "shouldn't get exception during monitorenter");)
 180 
 181   int popframe_preserved_args_size_in_bytes = 0;
 182   int popframe_preserved_args_size_in_words = 0;
 183   if (is_top_frame) {
 184   JvmtiThreadState *state = thread->jvmti_thread_state();
 185     if (JvmtiExport::can_pop_frame() &&
 186         (thread->has_pending_popframe() || thread->popframe_forcing_deopt_reexecution())) {
 187       if (thread->has_pending_popframe()) {
 188         // Pop top frame after deoptimization
 189 #ifndef CC_INTERP
 190         pc = Interpreter::remove_activation_preserving_args_entry();
 191 #else
 192         // Do an uncommon trap type entry. c++ interpreter will know
 193         // to pop frame and preserve the args
 194         pc = Interpreter::deopt_entry(vtos, 0);
 195         use_next_mdp = false;
 196 #endif
 197       } else {
 198         // Reexecute invoke in top frame
 199         pc = Interpreter::deopt_entry(vtos, 0);
 200         use_next_mdp = false;
 201         popframe_preserved_args_size_in_bytes = in_bytes(thread->popframe_preserved_args_size());
 202         // Note: the PopFrame-related extension of the expression stack size is done in
 203         // Deoptimization::fetch_unroll_info_helper
 204         popframe_preserved_args_size_in_words = in_words(thread->popframe_preserved_args_size_in_words());
 205       }
 206     } else if (JvmtiExport::can_force_early_return() && state != NULL && state->is_earlyret_pending()) {
 207       // Force early return from top frame after deoptimization
 208 #ifndef CC_INTERP
 209       pc = Interpreter::remove_activation_early_entry(state->earlyret_tos());
 210 #else
 211      // TBD: Need to implement ForceEarlyReturn for CC_INTERP (ia64)
 212 #endif
 213     } else {
 214       // Possibly override the previous pc computation of the top (youngest) frame
 215       switch (exec_mode) {
 216       case Deoptimization::Unpack_deopt:
 217         // use what we've got
 218         break;
 219       case Deoptimization::Unpack_exception:
 220         // exception is pending
 221         pc = SharedRuntime::raw_exception_handler_for_return_address(pc);
 222         // [phh] We're going to end up in some handler or other, so it doesn't
 223         // matter what mdp we point to.  See exception_handler_for_exception()
 224         // in interpreterRuntime.cpp.
 225         break;
 226       case Deoptimization::Unpack_uncommon_trap:
 227       case Deoptimization::Unpack_reexecute:
 228         // redo last byte code
 229         pc  = Interpreter::deopt_entry(vtos, 0);
 230         use_next_mdp = false;
 231         break;
 232       default:
 233         ShouldNotReachHere();
 234       }
 235     }
 236   }
 237 
 238   // Setup the interpreter frame
 239 
 240   assert(method() != NULL, "method must exist");
 241   int temps = expressions()->size();
 242 
 243   int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
 244 
 245   Interpreter::layout_activation(method(),
 246                                  temps + callee_parameters,
 247                                  popframe_preserved_args_size_in_words,
 248                                  locks,
 249                                  callee_parameters,
 250                                  callee_locals,
 251                                  caller,
 252                                  iframe(),
 253                                  is_top_frame);
 254 
 255   // Update the pc in the frame object and overwrite the temporary pc
 256   // we placed in the skeletal frame now that we finally know the
 257   // exact interpreter address we should use.
 258 
 259   _frame.patch_pc(thread, pc);
 260 
 261   assert (!method()->is_synchronized() || locks > 0, "synchronized methods must have monitors");
 262 
 263   BasicObjectLock* top = iframe()->interpreter_frame_monitor_begin();
 264   for (int index = 0; index < locks; index++) {
 265     top = iframe()->previous_monitor_in_interpreter_frame(top);
 266     BasicObjectLock* src = _monitors->at(index);
 267     top->set_obj(src->obj());
 268     src->lock()->move_to(src->obj(), top->lock());
 269   }
 270   if (ProfileInterpreter) {
 271     iframe()->interpreter_frame_set_mdx(0); // clear out the mdp.
 272   }
 273   iframe()->interpreter_frame_set_bcx((intptr_t)bcp); // cannot use bcp because frame is not initialized yet
 274   if (ProfileInterpreter) {
 275     methodDataOop mdo = method()->method_data();
 276     if (mdo != NULL) {
 277       int bci = iframe()->interpreter_frame_bci();
 278       if (use_next_mdp) ++bci;
 279       address mdp = mdo->bci_to_dp(bci);
 280       iframe()->interpreter_frame_set_mdp(mdp);
 281     }
 282   }
 283 
 284   // Unpack expression stack
 285   // If this is an intermediate frame (i.e. not top frame) then this
 286   // only unpacks the part of the expression stack not used by callee
 287   // as parameters. The callee parameters are unpacked as part of the
 288   // callee locals.
 289   int i;
 290   for(i = 0; i < expressions()->size(); i++) {
 291     StackValue *value = expressions()->at(i);
 292     intptr_t*   addr  = iframe()->interpreter_frame_expression_stack_at(i);
 293     switch(value->type()) {
 294       case T_INT:
 295         *addr = value->get_int();
 296         break;
 297       case T_OBJECT:
 298         *addr = value->get_int(T_OBJECT);
 299         break;
 300       case T_CONFLICT:
 301         // A dead stack slot.  Initialize to null in case it is an oop.
 302         *addr = NULL_WORD;
 303         break;
 304       default:
 305         ShouldNotReachHere();
 306     }
 307     if (TaggedStackInterpreter) {
 308       // Write tag to the stack
 309       iframe()->interpreter_frame_set_expression_stack_tag(i,
 310                                   frame::tag_for_basic_type(value->type()));
 311     }
 312   }
 313 
 314 
 315   // Unpack the locals
 316   for(i = 0; i < locals()->size(); i++) {
 317     StackValue *value = locals()->at(i);
 318     intptr_t* addr  = iframe()->interpreter_frame_local_at(i);
 319     switch(value->type()) {
 320       case T_INT:
 321         *addr = value->get_int();
 322         break;
 323       case T_OBJECT:
 324         *addr = value->get_int(T_OBJECT);
 325         break;
 326       case T_CONFLICT:
 327         // A dead location. If it is an oop then we need a NULL to prevent GC from following it
 328         *addr = NULL_WORD;
 329         break;
 330       default:
 331         ShouldNotReachHere();
 332     }
 333     if (TaggedStackInterpreter) {
 334       // Write tag to stack
 335       iframe()->interpreter_frame_set_local_tag(i,
 336                                   frame::tag_for_basic_type(value->type()));
 337     }
 338   }
 339 
 340   if (is_top_frame && JvmtiExport::can_pop_frame() && thread->popframe_forcing_deopt_reexecution()) {
 341     // An interpreted frame was popped but it returns to a deoptimized
 342     // frame. The incoming arguments to the interpreted activation
 343     // were preserved in thread-local storage by the
 344     // remove_activation_preserving_args_entry in the interpreter; now
 345     // we put them back into the just-unpacked interpreter frame.
 346     // Note that this assumes that the locals arena grows toward lower
 347     // addresses.
 348     if (popframe_preserved_args_size_in_words != 0) {
 349       void* saved_args = thread->popframe_preserved_args();
 350       assert(saved_args != NULL, "must have been saved by interpreter");
 351 #ifdef ASSERT
 352       int stack_words = Interpreter::stackElementWords();
 353       assert(popframe_preserved_args_size_in_words <=
 354              iframe()->interpreter_frame_expression_stack_size()*stack_words,
 355              "expression stack size should have been extended");
 356 #endif // ASSERT
 357       int top_element = iframe()->interpreter_frame_expression_stack_size()-1;
 358       intptr_t* base;
 359       if (frame::interpreter_frame_expression_stack_direction() < 0) {
 360         base = iframe()->interpreter_frame_expression_stack_at(top_element);
 361       } else {
 362         base = iframe()->interpreter_frame_expression_stack();
 363       }
 364       Copy::conjoint_bytes(saved_args,
 365                            base,
 366                            popframe_preserved_args_size_in_bytes);
 367       thread->popframe_free_preserved_args();
 368     }
 369   }
 370 
 371 #ifndef PRODUCT
 372   if (TraceDeoptimization && Verbose) {
 373     ttyLocker ttyl;
 374     tty->print_cr("[%d Interpreted Frame]", ++unpack_counter);
 375     iframe()->print_on(tty);
 376     RegisterMap map(thread);
 377     vframe* f = vframe::new_vframe(iframe(), &map, thread);
 378     f->print();
 379     iframe()->interpreter_frame_print_on(tty);
 380 
 381     tty->print_cr("locals size     %d", locals()->size());
 382     tty->print_cr("expression size %d", expressions()->size());
 383 
 384     method()->print_value();
 385     tty->cr();
 386     // method()->print_codes();
 387   } else if (TraceDeoptimization) {
 388     tty->print("     ");
 389     method()->print_value();
 390     Bytecodes::Code code = Bytecodes::java_code_at(bcp);
 391     int bci = method()->bci_from(bcp);
 392     tty->print(" - %s", Bytecodes::name(code));
 393     tty->print(" @ bci %d ", bci);
 394     tty->print_cr("sp = " PTR_FORMAT, iframe()->sp());
 395   }
 396 #endif // PRODUCT
 397 
 398   // The expression stack and locals are in the resource area don't leave
 399   // a dangling pointer in the vframeArray we leave around for debug
 400   // purposes
 401 
 402   _locals = _expressions = NULL;
 403 
 404 }
 405 
 406 int vframeArrayElement::on_stack_size(int callee_parameters,
 407                                       int callee_locals,
 408                                       bool is_top_frame,
 409                                       int popframe_extra_stack_expression_els) const {
 410   assert(method()->max_locals() == locals()->size(), "just checking");
 411   int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
 412   int temps = expressions()->size();
 413   return Interpreter::size_activation(method(),
 414                                       temps + callee_parameters,
 415                                       popframe_extra_stack_expression_els,
 416                                       locks,
 417                                       callee_parameters,
 418                                       callee_locals,
 419                                       is_top_frame);
 420 }
 421 
 422 
 423 
 424 vframeArray* vframeArray::allocate(JavaThread* thread, int frame_size, GrowableArray<compiledVFrame*>* chunk,
 425                                    RegisterMap *reg_map, frame sender, frame caller, frame self) {
 426 
 427   // Allocate the vframeArray
 428   vframeArray * result = (vframeArray*) AllocateHeap(sizeof(vframeArray) + // fixed part
 429                                                      sizeof(vframeArrayElement) * (chunk->length() - 1), // variable part
 430                                                      "vframeArray::allocate");
 431   result->_frames = chunk->length();
 432   result->_owner_thread = thread;
 433   result->_sender = sender;
 434   result->_caller = caller;
 435   result->_original = self;
 436   result->set_unroll_block(NULL); // initialize it
 437   result->fill_in(thread, frame_size, chunk, reg_map);
 438   return result;
 439 }
 440 
 441 void vframeArray::fill_in(JavaThread* thread,
 442                           int frame_size,
 443                           GrowableArray<compiledVFrame*>* chunk,
 444                           const RegisterMap *reg_map) {
 445   // Set owner first, it is used when adding monitor chunks
 446 
 447   _frame_size = frame_size;
 448   for(int i = 0; i < chunk->length(); i++) {
 449     element(i)->fill_in(chunk->at(i));
 450   }
 451 
 452   // Copy registers for callee-saved registers
 453   if (reg_map != NULL) {
 454     for(int i = 0; i < RegisterMap::reg_count; i++) {
 455 #ifdef AMD64
 456       // The register map has one entry for every int (32-bit value), so
 457       // 64-bit physical registers have two entries in the map, one for
 458       // each half.  Ignore the high halves of 64-bit registers, just like
 459       // frame::oopmapreg_to_location does.
 460       //
 461       // [phh] FIXME: this is a temporary hack!  This code *should* work
 462       // correctly w/o this hack, possibly by changing RegisterMap::pd_location
 463       // in frame_amd64.cpp and the values of the phantom high half registers
 464       // in amd64.ad.
 465       //      if (VMReg::Name(i) < SharedInfo::stack0 && is_even(i)) {
 466         intptr_t* src = (intptr_t*) reg_map->location(VMRegImpl::as_VMReg(i));
 467         _callee_registers[i] = src != NULL ? *src : NULL_WORD;
 468         //      } else {
 469         //      jint* src = (jint*) reg_map->location(VMReg::Name(i));
 470         //      _callee_registers[i] = src != NULL ? *src : NULL_WORD;
 471         //      }
 472 #else
 473       jint* src = (jint*) reg_map->location(VMRegImpl::as_VMReg(i));
 474       _callee_registers[i] = src != NULL ? *src : NULL_WORD;
 475 #endif
 476       if (src == NULL) {
 477         set_location_valid(i, false);
 478       } else {
 479         set_location_valid(i, true);
 480         jint* dst = (jint*) register_location(i);
 481         *dst = *src;
 482       }
 483     }
 484   }
 485 }
 486 
 487 void vframeArray::unpack_to_stack(frame &unpack_frame, int exec_mode) {
 488   // stack picture
 489   //   unpack_frame
 490   //   [new interpreter frames ] (frames are skeletal but walkable)
 491   //   caller_frame
 492   //
 493   //  This routine fills in the missing data for the skeletal interpreter frames
 494   //  in the above picture.
 495 
 496   // Find the skeletal interpreter frames to unpack into
 497   RegisterMap map(JavaThread::current(), false);
 498   // Get the youngest frame we will unpack (last to be unpacked)
 499   frame me = unpack_frame.sender(&map);
 500   int index;
 501   for (index = 0; index < frames(); index++ ) {
 502     *element(index)->iframe() = me;
 503     // Get the caller frame (possibly skeletal)
 504     me = me.sender(&map);
 505   }
 506 
 507   frame caller_frame = me;
 508 
 509   // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee
 510 
 511   // Unpack the frames from the oldest (frames() -1) to the youngest (0)
 512 
 513   for (index = frames() - 1; index >= 0 ; index--) {
 514     int callee_parameters = index == 0 ? 0 : element(index-1)->method()->size_of_parameters();
 515     int callee_locals     = index == 0 ? 0 : element(index-1)->method()->max_locals();
 516     element(index)->unpack_on_stack(callee_parameters,
 517                                     callee_locals,
 518                                     &caller_frame,
 519                                     index == 0,
 520                                     exec_mode);
 521     if (index == frames() - 1) {
 522       Deoptimization::unwind_callee_save_values(element(index)->iframe(), this);
 523     }
 524     caller_frame = *element(index)->iframe();
 525   }
 526 
 527 
 528   deallocate_monitor_chunks();
 529 }
 530 
 531 void vframeArray::deallocate_monitor_chunks() {
 532   JavaThread* jt = JavaThread::current();
 533   for (int index = 0; index < frames(); index++ ) {
 534      element(index)->free_monitors(jt);
 535   }
 536 }
 537 
 538 #ifndef PRODUCT
 539 
 540 bool vframeArray::structural_compare(JavaThread* thread, GrowableArray<compiledVFrame*>* chunk) {
 541   if (owner_thread() != thread) return false;
 542   int index = 0;
 543 #if 0 // FIXME can't do this comparison
 544 
 545   // Compare only within vframe array.
 546   for (deoptimizedVFrame* vf = deoptimizedVFrame::cast(vframe_at(first_index())); vf; vf = vf->deoptimized_sender_or_null()) {
 547     if (index >= chunk->length() || !vf->structural_compare(chunk->at(index))) return false;
 548     index++;
 549   }
 550   if (index != chunk->length()) return false;
 551 #endif
 552 
 553   return true;
 554 }
 555 
 556 #endif
 557 
 558 address vframeArray::register_location(int i) const {
 559   assert(0 <= i && i < RegisterMap::reg_count, "index out of bounds");
 560   return (address) & _callee_registers[i];
 561 }
 562 
 563 
 564 #ifndef PRODUCT
 565 
 566 // Printing
 567 
 568 // Note: we cannot have print_on as const, as we allocate inside the method
 569 void vframeArray::print_on_2(outputStream* st)  {
 570   st->print_cr(" - sp: " INTPTR_FORMAT, sp());
 571   st->print(" - thread: ");
 572   Thread::current()->print();
 573   st->print_cr(" - frame size: %d", frame_size());
 574   for (int index = 0; index < frames() ; index++ ) {
 575     element(index)->print(st);
 576   }
 577 }
 578 
 579 void vframeArrayElement::print(outputStream* st) {
 580   st->print_cr(" - interpreter_frame -> sp: ", INTPTR_FORMAT, iframe()->sp());
 581 }
 582 
 583 void vframeArray::print_value_on(outputStream* st) const {
 584   st->print_cr("vframeArray [%d] ", frames());
 585 }
 586 
 587 
 588 #endif