Print this page
rev 1081 : imported patch indy-cleanup-6893081.patch

Split Close
Expand all
Collapse all
          --- old/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
          +++ new/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
↓ open down ↓ 630 lines elided ↑ open up ↑
 631  631    // we may do a i2c -> c2i transition if we lose a race where compiled
 632  632    // code goes non-entrant while we get args ready.
 633  633    // In addition we use r13 to locate all the interpreter args as
 634  634    // we must align the stack to 16 bytes on an i2c entry else we
 635  635    // lose alignment we expect in all compiled code and register
 636  636    // save code can segv when fxsave instructions find improperly
 637  637    // aligned stack pointer.
 638  638  
 639  639    __ movptr(rax, Address(rsp, 0));
 640  640  
      641 +  // Must preserve original SP for loading incoming arguments because
      642 +  // we need to align the outgoing SP for compiled code.
      643 +  __ movptr(r11, rsp);
      644 +
 641  645    // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
 642  646    // in registers, we will occasionally have no stack args.
 643  647    int comp_words_on_stack = 0;
 644  648    if (comp_args_on_stack) {
 645  649      // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
 646  650      // registers are below.  By subtracting stack0, we either get a negative
 647  651      // number (all values in registers) or the maximum stack slot accessed.
 648  652  
 649  653      // Convert 4-byte c2 stack slots to words.
 650  654      comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
↓ open down ↓ 3 lines elided ↑ open up ↑
 654  658    }
 655  659  
 656  660  
 657  661    // Ensure compiled code always sees stack at proper alignment
 658  662    __ andptr(rsp, -16);
 659  663  
 660  664    // push the return address and misalign the stack that youngest frame always sees
 661  665    // as far as the placement of the call instruction
 662  666    __ push(rax);
 663  667  
      668 +  // Put saved SP in another register
      669 +  const Register saved_sp = rax;
      670 +  __ movptr(saved_sp, r11);
      671 +
 664  672    // Will jump to the compiled code just as if compiled code was doing it.
 665  673    // Pre-load the register-jump target early, to schedule it better.
 666  674    __ movptr(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
 667  675  
 668  676    // Now generate the shuffle code.  Pick up all register args and move the
 669  677    // rest through the floating point stack top.
 670  678    for (int i = 0; i < total_args_passed; i++) {
 671  679      if (sig_bt[i] == T_VOID) {
 672  680        // Longs and doubles are passed in native word order, but misaligned
 673  681        // in the 32-bit build.
 674  682        assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 675  683        continue;
 676  684      }
 677  685  
 678  686      // Pick up 0, 1 or 2 words from SP+offset.
 679  687  
 680  688      assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 681  689              "scrambled load targets?");
 682  690      // Load in argument order going down.
 683      -    // int ld_off = (total_args_passed + comp_words_on_stack -i)*wordSize;
 684      -    // base ld_off on r13 (sender_sp) as the stack alignment makes offsets from rsp
 685      -    // unpredictable
 686      -    int ld_off = ((total_args_passed - 1) - i)*Interpreter::stackElementSize();
 687      -
      691 +    int ld_off = (total_args_passed - i)*Interpreter::stackElementSize() + Interpreter::value_offset_in_bytes();
 688  692      // Point to interpreter value (vs. tag)
 689  693      int next_off = ld_off - Interpreter::stackElementSize();
 690  694      //
 691  695      //
 692  696      //
 693  697      VMReg r_1 = regs[i].first();
 694  698      VMReg r_2 = regs[i].second();
 695  699      if (!r_1->is_valid()) {
 696  700        assert(!r_2->is_valid(), "");
 697  701        continue;
 698  702      }
 699  703      if (r_1->is_stack()) {
 700  704        // Convert stack slot to an SP offset (+ wordSize to account for return address )
 701  705        int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
      706 +
      707 +      // We can use r13 as a temp here because compiled code doesn't need r13 as an input
      708 +      // and if we end up going thru a c2i because of a miss a reasonable value of r13
      709 +      // will be generated.
 702  710        if (!r_2->is_valid()) {
 703  711          // sign extend???
 704      -        __ movl(rax, Address(r13, ld_off));
 705      -        __ movptr(Address(rsp, st_off), rax);
      712 +        __ movl(r13, Address(saved_sp, ld_off));
      713 +        __ movptr(Address(rsp, st_off), r13);
 706  714        } else {
 707  715          //
 708  716          // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 709  717          // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 710  718          // So we must adjust where to pick up the data to match the interpreter.
 711  719          //
 712  720          // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 713  721          // are accessed as negative so LSW is at LOW address
 714  722  
 715  723          // ld_off is MSW so get LSW
 716  724          const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 717  725                             next_off : ld_off;
 718      -        __ movq(rax, Address(r13, offset));
      726 +        __ movq(r13, Address(saved_sp, offset));
 719  727          // st_off is LSW (i.e. reg.first())
 720      -        __ movq(Address(rsp, st_off), rax);
      728 +        __ movq(Address(rsp, st_off), r13);
 721  729        }
 722  730      } else if (r_1->is_Register()) {  // Register argument
 723  731        Register r = r_1->as_Register();
 724  732        assert(r != rax, "must be different");
 725  733        if (r_2->is_valid()) {
 726  734          //
 727  735          // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 728  736          // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 729  737          // So we must adjust where to pick up the data to match the interpreter.
 730  738  
 731  739          const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 732  740                             next_off : ld_off;
 733  741  
 734  742          // this can be a misaligned move
 735      -        __ movq(r, Address(r13, offset));
      743 +        __ movq(r, Address(saved_sp, offset));
 736  744        } else {
 737  745          // sign extend and use a full word?
 738      -        __ movl(r, Address(r13, ld_off));
      746 +        __ movl(r, Address(saved_sp, ld_off));
 739  747        }
 740  748      } else {
 741  749        if (!r_2->is_valid()) {
 742      -        __ movflt(r_1->as_XMMRegister(), Address(r13, ld_off));
      750 +        __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
 743  751        } else {
 744      -        __ movdbl(r_1->as_XMMRegister(), Address(r13, next_off));
      752 +        __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
 745  753        }
 746  754      }
 747  755    }
 748  756  
 749  757    // 6243940 We might end up in handle_wrong_method if
 750  758    // the callee is deoptimized as we race thru here. If that
 751  759    // happens we don't want to take a safepoint because the
 752  760    // caller frame will look interpreted and arguments are now
 753  761    // "compiled" so it is much better to make this transition
 754  762    // invisible to the stack walking code. Unfortunately if
↓ open down ↓ 2557 lines elided ↑ open up ↑
3312 3320    // convention will save restore it in prolog/epilog) Other than that
3313 3321    // there are no callee save registers no that adapter frames are gone.
3314 3322  
3315 3323    __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
3316 3324  
3317 3325    __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog
3318 3326    __ pop(rdx);                  // No need for exception pc anymore
3319 3327  
3320 3328    // rax: exception handler
3321 3329  
     3330 +  // Restore SP from BP if the exception PC is a MethodHandle call.
     3331 +  __ cmpl(Address(r15_thread, JavaThread::is_method_handle_exception_offset()), 0);
     3332 +  __ cmovptr(Assembler::notEqual, rsp, rbp);
     3333 +
3322 3334    // We have a handler in rax (could be deopt blob).
3323 3335    __ mov(r8, rax);
3324 3336  
3325 3337    // Get the exception oop
3326 3338    __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
3327 3339    // Get the exception pc in case we are deoptimized
3328 3340    __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
3329 3341  #ifdef ASSERT
3330 3342    __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD);
3331 3343    __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
↓ open down ↓ 18 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX