< prev index next >

src/cpu/x86/vm/sharedRuntime_x86_32.cpp

Print this page
rev 10530 : fix x86 32 bit build (broken by change to calling convention)


 540     __ addptr(rsp, 2*wordSize);
 541   }
 542   if (UseSSE >= 2) {
 543     __ movdbl(xmm0, Address(rsp, 0));
 544     __ movdbl(xmm1, Address(rsp, 2*wordSize));
 545     __ addptr(rsp, 4*wordSize);
 546   }
 547 
 548   __ popf();
 549   __ popa();
 550   __ bind(L);
 551 }
 552 
 553 
 554 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
 555   int next_off = st_off - Interpreter::stackElementSize;
 556   __ movdbl(Address(rsp, next_off), r);
 557 }
 558 
 559 static void gen_c2i_adapter(MacroAssembler *masm,
 560                             int total_args_passed,
 561                             int comp_args_on_stack,
 562                             const BasicType *sig_bt,
 563                             const VMRegPair *regs,
 564                             Label& skip_fixup) {




 565   // Before we get into the guts of the C2I adapter, see if we should be here
 566   // at all.  We've come from compiled code and are attempting to jump to the
 567   // interpreter, which means the caller made a static call to get here
 568   // (vcalls always get a compiled target if there is one).  Check for a
 569   // compiled target.  If there is one, we need to patch the caller's call.
 570   patch_callers_callsite(masm);
 571 
 572   __ bind(skip_fixup);
 573 
 574 #ifdef COMPILER2
 575   // C2 may leave the stack dirty if not in SSE2+ mode
 576   if (UseSSE >= 2) {
 577     __ verify_FPU(0, "c2i transition should have clean FPU stack");
 578   } else {
 579     __ empty_FPU_stack();
 580   }
 581 #endif /* COMPILER2 */
 582 
 583   // Since all args are passed on the stack, total_args_passed * interpreter_
 584   // stack_element_size  is the
 585   // space we need.
 586   int extraspace = total_args_passed * Interpreter::stackElementSize;
 587 
 588   // Get return address
 589   __ pop(rax);
 590 
 591   // set senderSP value
 592   __ movptr(rsi, rsp);
 593 
 594   __ subptr(rsp, extraspace);
 595 
 596   // Now write the args into the outgoing interpreter space
 597   for (int i = 0; i < total_args_passed; i++) {
 598     if (sig_bt[i] == T_VOID) {
 599       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 600       continue;
 601     }
 602 
 603     // st_off points to lowest address on stack.
 604     int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
 605     int next_off = st_off - Interpreter::stackElementSize;
 606 
 607     // Say 4 args:
 608     // i   st_off
 609     // 0   12 T_LONG
 610     // 1    8 T_VOID
 611     // 2    4 T_OBJECT
 612     // 3    0 T_BOOL
 613     VMReg r_1 = regs[i].first();
 614     VMReg r_2 = regs[i].second();
 615     if (!r_1->is_valid()) {
 616       assert(!r_2->is_valid(), "");
 617       continue;
 618     }
 619 
 620     if (r_1->is_stack()) {
 621       // memory to memory use fpu stack top
 622       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 623 
 624       if (!r_2->is_valid()) {


 634 #ifndef _LP64
 635         __ movptr(rdi, Address(rsp, ld_off + wordSize));
 636         __ movptr(Address(rsp, st_off), rdi);
 637 #else
 638 #ifdef ASSERT
 639         // Overwrite the unused slot with known junk
 640         __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 641         __ movptr(Address(rsp, st_off), rax);
 642 #endif /* ASSERT */
 643 #endif // _LP64
 644       }
 645     } else if (r_1->is_Register()) {
 646       Register r = r_1->as_Register();
 647       if (!r_2->is_valid()) {
 648         __ movl(Address(rsp, st_off), r);
 649       } else {
 650         // long/double in gpr
 651         NOT_LP64(ShouldNotReachHere());
 652         // Two VMRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 653         // T_DOUBLE and T_LONG use two slots in the interpreter
 654         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 655           // long/double in gpr
 656 #ifdef ASSERT
 657           // Overwrite the unused slot with known junk
 658           LP64_ONLY(__ mov64(rax, CONST64(0xdeadffffdeadaaab)));
 659           __ movptr(Address(rsp, st_off), rax);
 660 #endif /* ASSERT */
 661           __ movptr(Address(rsp, next_off), r);
 662         } else {
 663           __ movptr(Address(rsp, st_off), r);
 664         }
 665       }
 666     } else {
 667       assert(r_1->is_XMMRegister(), "");
 668       if (!r_2->is_valid()) {
 669         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
 670       } else {
 671         assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type");
 672         move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
 673       }
 674     }
 675   }
 676 
 677   // Schedule the branch target address early.
 678   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 679   // And repush original return address
 680   __ push(rax);
 681   __ jmp(rcx);
 682 }
 683 
 684 
 685 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
 686   int next_val_off = ld_off - Interpreter::stackElementSize;
 687   __ movdbl(r, Address(saved_sp, next_val_off));
 688 }
 689 
 690 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 691                         address code_start, address code_end,
 692                         Label& L_ok) {
 693   Label L_fail;
 694   __ lea(temp_reg, ExternalAddress(code_start));
 695   __ cmpptr(pc_reg, temp_reg);
 696   __ jcc(Assembler::belowEqual, L_fail);
 697   __ lea(temp_reg, ExternalAddress(code_end));
 698   __ cmpptr(pc_reg, temp_reg);
 699   __ jcc(Assembler::below, L_ok);
 700   __ bind(L_fail);
 701 }
 702 
 703 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 704                                     int total_args_passed,
 705                                     int comp_args_on_stack,
 706                                     const BasicType *sig_bt,
 707                                     const VMRegPair *regs) {

 708   // Note: rsi contains the senderSP on entry. We must preserve it since
 709   // we may do a i2c -> c2i transition if we lose a race where compiled
 710   // code goes non-entrant while we get args ready.
 711 
 712   // Adapters can be frameless because they do not require the caller
 713   // to perform additional cleanup work, such as correcting the stack pointer.
 714   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 715   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 716   // even if a callee has modified the stack pointer.
 717   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 718   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 719   // up via the senderSP register).
 720   // In other words, if *either* the caller or callee is interpreted, we can
 721   // get the stack pointer repaired after a call.
 722   // This is why c2i and i2c adapters cannot be indefinitely composed.
 723   // In particular, if a c2i adapter were to somehow call an i2c adapter,
 724   // both caller and callee would be compiled methods, and neither would
 725   // clean up the stack pointer changes performed by the two adapters.
 726   // If this happens, control eventually transfers back to the compiled
 727   // caller, but with an uncorrected stack, causing delayed havoc.


 776   }
 777 
 778   // Align the outgoing SP
 779   __ andptr(rsp, -(StackAlignmentInBytes));
 780 
 781   // push the return address on the stack (note that pushing, rather
 782   // than storing it, yields the correct frame alignment for the callee)
 783   __ push(rax);
 784 
 785   // Put saved SP in another register
 786   const Register saved_sp = rax;
 787   __ movptr(saved_sp, rdi);
 788 
 789 
 790   // Will jump to the compiled code just as if compiled code was doing it.
 791   // Pre-load the register-jump target early, to schedule it better.
 792   __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
 793 
 794   // Now generate the shuffle code.  Pick up all register args and move the
 795   // rest through the floating point stack top.
 796   for (int i = 0; i < total_args_passed; i++) {
 797     if (sig_bt[i] == T_VOID) {
 798       // Longs and doubles are passed in native word order, but misaligned
 799       // in the 32-bit build.
 800       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 801       continue;
 802     }
 803 
 804     // Pick up 0, 1 or 2 words from SP+offset.
 805 
 806     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 807             "scrambled load targets?");
 808     // Load in argument order going down.
 809     int ld_off = (total_args_passed - i) * Interpreter::stackElementSize;
 810     // Point to interpreter value (vs. tag)
 811     int next_off = ld_off - Interpreter::stackElementSize;
 812     //
 813     //
 814     //
 815     VMReg r_1 = regs[i].first();
 816     VMReg r_2 = regs[i].second();
 817     if (!r_1->is_valid()) {
 818       assert(!r_2->is_valid(), "");
 819       continue;
 820     }
 821     if (r_1->is_stack()) {
 822       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 823       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 824 
 825       // We can use rsi as a temp here because compiled code doesn't need rsi as an input
 826       // and if we end up going thru a c2i because of a miss a reasonable value of rsi
 827       // we be generated.
 828       if (!r_2->is_valid()) {
 829         // __ fld_s(Address(saved_sp, ld_off));
 830         // __ fstp_s(Address(rsp, st_off));
 831         __ movl(rsi, Address(saved_sp, ld_off));
 832         __ movptr(Address(rsp, st_off), rsi);
 833       } else {
 834         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 835         // are accessed as negative so LSW is at LOW address
 836 
 837         // ld_off is MSW so get LSW
 838         // st_off is LSW (i.e. reg.first())
 839         // __ fld_d(Address(saved_sp, next_off));
 840         // __ fstp_d(Address(rsp, st_off));
 841         //
 842         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 843         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 844         // So we must adjust where to pick up the data to match the interpreter.
 845         //
 846         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 847         // are accessed as negative so LSW is at LOW address
 848 
 849         // ld_off is MSW so get LSW
 850         const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 851                            next_off : ld_off;
 852         __ movptr(rsi, Address(saved_sp, offset));
 853         __ movptr(Address(rsp, st_off), rsi);
 854 #ifndef _LP64
 855         __ movptr(rsi, Address(saved_sp, ld_off));
 856         __ movptr(Address(rsp, st_off + wordSize), rsi);
 857 #endif // _LP64
 858       }
 859     } else if (r_1->is_Register()) {  // Register argument
 860       Register r = r_1->as_Register();
 861       assert(r != rax, "must be different");
 862       if (r_2->is_valid()) {
 863         //
 864         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 865         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 866         // So we must adjust where to pick up the data to match the interpreter.
 867 
 868         const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 869                            next_off : ld_off;
 870 
 871         // this can be a misaligned move
 872         __ movptr(r, Address(saved_sp, offset));
 873 #ifndef _LP64
 874         assert(r_2->as_Register() != rax, "need another temporary register");
 875         // Remember r_1 is low address (and LSB on x86)
 876         // So r_2 gets loaded from high address regardless of the platform
 877         __ movptr(r_2->as_Register(), Address(saved_sp, ld_off));
 878 #endif // _LP64
 879       } else {
 880         __ movl(r, Address(saved_sp, ld_off));
 881       }
 882     } else {
 883       assert(r_1->is_XMMRegister(), "");
 884       if (!r_2->is_valid()) {
 885         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
 886       } else {
 887         move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
 888       }


 896   // "compiled" so it is much better to make this transition
 897   // invisible to the stack walking code. Unfortunately if
 898   // we try and find the callee by normal means a safepoint
 899   // is possible. So we stash the desired callee in the thread
 900   // and the vm will find there should this case occur.
 901 
 902   __ get_thread(rax);
 903   __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
 904 
 905   // move Method* to rax, in case we end up in an c2i adapter.
 906   // the c2i adapters expect Method* in rax, (c2) because c2's
 907   // resolve stubs return the result (the method) in rax,.
 908   // I'd love to fix this.
 909   __ mov(rax, rbx);
 910 
 911   __ jmp(rdi);
 912 }
 913 
 914 // ---------------------------------------------------------------
 915 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 916                                                             int total_args_passed,
 917                                                             int comp_args_on_stack,
 918                                                             const BasicType *sig_bt,
 919                                                             const VMRegPair *regs,
 920                                                             AdapterFingerPrint* fingerprint) {

 921   address i2c_entry = __ pc();
 922 
 923   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 924 
 925   // -------------------------------------------------------------------------
 926   // Generate a C2I adapter.  On entry we know rbx, holds the Method* during calls
 927   // to the interpreter.  The args start out packed in the compiled layout.  They
 928   // need to be unpacked into the interpreter layout.  This will almost always
 929   // require some stack space.  We grow the current (compiled) stack, then repack
 930   // the args.  We  finally end in a jump to the generic interpreter entry point.
 931   // On exit from the interpreter, the interpreter will restore our SP (lest the
 932   // compiled code, which relys solely on SP and not EBP, get sick).
 933 
 934   address c2i_unverified_entry = __ pc();
 935   Label skip_fixup;
 936 
 937   Register holder = rax;
 938   Register receiver = rcx;
 939   Register temp = rbx;
 940 
 941   {
 942 
 943     Label missed;
 944     __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
 945     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
 946     __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset()));
 947     __ jcc(Assembler::notEqual, missed);
 948     // Method might have been compiled since the call site was patched to
 949     // interpreted if that is the case treat it as a miss so we can get
 950     // the call site corrected.
 951     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 952     __ jcc(Assembler::equal, skip_fixup);
 953 
 954     __ bind(missed);
 955     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 956   }
 957 
 958   address c2i_entry = __ pc();
 959 
 960   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);



 961 
 962   __ flush();

 963   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 964 }
 965 
 966 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 967                                          VMRegPair *regs,
 968                                          VMRegPair *regs2,
 969                                          int total_args_passed) {
 970   assert(regs2 == NULL, "not needed on x86");
 971 // We return the amount of VMRegImpl stack slots we need to reserve for all
 972 // the arguments NOT counting out_preserve_stack_slots.
 973 
 974   uint    stack = 0;        // All arguments on stack
 975 
 976   for( int i = 0; i < total_args_passed; i++) {
 977     // From the type and the argument number (count) compute the location
 978     switch( sig_bt[i] ) {
 979     case T_BOOLEAN:
 980     case T_CHAR:
 981     case T_FLOAT:
 982     case T_BYTE:




 540     __ addptr(rsp, 2*wordSize);
 541   }
 542   if (UseSSE >= 2) {
 543     __ movdbl(xmm0, Address(rsp, 0));
 544     __ movdbl(xmm1, Address(rsp, 2*wordSize));
 545     __ addptr(rsp, 4*wordSize);
 546   }
 547 
 548   __ popf();
 549   __ popa();
 550   __ bind(L);
 551 }
 552 
 553 
 554 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
 555   int next_off = st_off - Interpreter::stackElementSize;
 556   __ movdbl(Address(rsp, next_off), r);
 557 }
 558 
 559 static void gen_c2i_adapter(MacroAssembler *masm,
 560                             const GrowableArray<SigEntry>& sig_extended,


 561                             const VMRegPair *regs,
 562                             Label& skip_fixup,
 563                             address start,
 564                             OopMapSet*& oop_maps,
 565                             int& frame_complete,
 566                             int& frame_size_in_words) {
 567   // Before we get into the guts of the C2I adapter, see if we should be here
 568   // at all.  We've come from compiled code and are attempting to jump to the
 569   // interpreter, which means the caller made a static call to get here
 570   // (vcalls always get a compiled target if there is one).  Check for a
 571   // compiled target.  If there is one, we need to patch the caller's call.
 572   patch_callers_callsite(masm);
 573 
 574   __ bind(skip_fixup);
 575 
 576 #ifdef COMPILER2
 577   // C2 may leave the stack dirty if not in SSE2+ mode
 578   if (UseSSE >= 2) {
 579     __ verify_FPU(0, "c2i transition should have clean FPU stack");
 580   } else {
 581     __ empty_FPU_stack();
 582   }
 583 #endif /* COMPILER2 */
 584 
 585   // Since all args are passed on the stack, total_args_passed * interpreter_
 586   // stack_element_size  is the
 587   // space we need.
 588   int extraspace = sig_extended.length() * Interpreter::stackElementSize;
 589 
 590   // Get return address
 591   __ pop(rax);
 592 
 593   // set senderSP value
 594   __ movptr(rsi, rsp);
 595 
 596   __ subptr(rsp, extraspace);
 597 
 598   // Now write the args into the outgoing interpreter space
 599   for (int i = 0; i < sig_extended.length(); i++) {
 600     if (sig_extended.at(i)._bt == T_VOID) {
 601       assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
 602       continue;
 603     }
 604 
 605     // st_off points to lowest address on stack.
 606     int st_off = ((sig_extended.length() - 1) - i) * Interpreter::stackElementSize;
 607     int next_off = st_off - Interpreter::stackElementSize;
 608 
 609     // Say 4 args:
 610     // i   st_off
 611     // 0   12 T_LONG
 612     // 1    8 T_VOID
 613     // 2    4 T_OBJECT
 614     // 3    0 T_BOOL
 615     VMReg r_1 = regs[i].first();
 616     VMReg r_2 = regs[i].second();
 617     if (!r_1->is_valid()) {
 618       assert(!r_2->is_valid(), "");
 619       continue;
 620     }
 621 
 622     if (r_1->is_stack()) {
 623       // memory to memory use fpu stack top
 624       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 625 
 626       if (!r_2->is_valid()) {


 636 #ifndef _LP64
 637         __ movptr(rdi, Address(rsp, ld_off + wordSize));
 638         __ movptr(Address(rsp, st_off), rdi);
 639 #else
 640 #ifdef ASSERT
 641         // Overwrite the unused slot with known junk
 642         __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 643         __ movptr(Address(rsp, st_off), rax);
 644 #endif /* ASSERT */
 645 #endif // _LP64
 646       }
 647     } else if (r_1->is_Register()) {
 648       Register r = r_1->as_Register();
 649       if (!r_2->is_valid()) {
 650         __ movl(Address(rsp, st_off), r);
 651       } else {
 652         // long/double in gpr
 653         NOT_LP64(ShouldNotReachHere());
 654         // Two VMRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 655         // T_DOUBLE and T_LONG use two slots in the interpreter
 656         if ( sig_extended.at(i)._bt == T_LONG || sig_extended.at(i)._bt == T_DOUBLE) {
 657           // long/double in gpr
 658 #ifdef ASSERT
 659           // Overwrite the unused slot with known junk
 660           LP64_ONLY(__ mov64(rax, CONST64(0xdeadffffdeadaaab)));
 661           __ movptr(Address(rsp, st_off), rax);
 662 #endif /* ASSERT */
 663           __ movptr(Address(rsp, next_off), r);
 664         } else {
 665           __ movptr(Address(rsp, st_off), r);
 666         }
 667       }
 668     } else {
 669       assert(r_1->is_XMMRegister(), "");
 670       if (!r_2->is_valid()) {
 671         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
 672       } else {
 673         assert(sig_extended.at(i)._bt == T_DOUBLE || sig_extended.at(i)._bt == T_LONG, "wrong type");
 674         move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
 675       }
 676     }
 677   }
 678 
 679   // Schedule the branch target address early.
 680   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 681   // And repush original return address
 682   __ push(rax);
 683   __ jmp(rcx);
 684 }
 685 
 686 
 687 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
 688   int next_val_off = ld_off - Interpreter::stackElementSize;
 689   __ movdbl(r, Address(saved_sp, next_val_off));
 690 }
 691 
 692 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 693                         address code_start, address code_end,
 694                         Label& L_ok) {
 695   Label L_fail;
 696   __ lea(temp_reg, ExternalAddress(code_start));
 697   __ cmpptr(pc_reg, temp_reg);
 698   __ jcc(Assembler::belowEqual, L_fail);
 699   __ lea(temp_reg, ExternalAddress(code_end));
 700   __ cmpptr(pc_reg, temp_reg);
 701   __ jcc(Assembler::below, L_ok);
 702   __ bind(L_fail);
 703 }
 704 
 705 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,

 706                                     int comp_args_on_stack,
 707                                     const GrowableArray<SigEntry>& sig_extended,
 708                                     const VMRegPair *regs) {
 709 
 710   // Note: rsi contains the senderSP on entry. We must preserve it since
 711   // we may do a i2c -> c2i transition if we lose a race where compiled
 712   // code goes non-entrant while we get args ready.
 713 
 714   // Adapters can be frameless because they do not require the caller
 715   // to perform additional cleanup work, such as correcting the stack pointer.
 716   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 717   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 718   // even if a callee has modified the stack pointer.
 719   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 720   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 721   // up via the senderSP register).
 722   // In other words, if *either* the caller or callee is interpreted, we can
 723   // get the stack pointer repaired after a call.
 724   // This is why c2i and i2c adapters cannot be indefinitely composed.
 725   // In particular, if a c2i adapter were to somehow call an i2c adapter,
 726   // both caller and callee would be compiled methods, and neither would
 727   // clean up the stack pointer changes performed by the two adapters.
 728   // If this happens, control eventually transfers back to the compiled
 729   // caller, but with an uncorrected stack, causing delayed havoc.


 778   }
 779 
 780   // Align the outgoing SP
 781   __ andptr(rsp, -(StackAlignmentInBytes));
 782 
 783   // push the return address on the stack (note that pushing, rather
 784   // than storing it, yields the correct frame alignment for the callee)
 785   __ push(rax);
 786 
 787   // Put saved SP in another register
 788   const Register saved_sp = rax;
 789   __ movptr(saved_sp, rdi);
 790 
 791 
 792   // Will jump to the compiled code just as if compiled code was doing it.
 793   // Pre-load the register-jump target early, to schedule it better.
 794   __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
 795 
 796   // Now generate the shuffle code.  Pick up all register args and move the
 797   // rest through the floating point stack top.
 798   for (int i = 0; i < sig_extended.length(); i++) {
 799     if (sig_extended.at(i)._bt == T_VOID) {
 800       // Longs and doubles are passed in native word order, but misaligned
 801       // in the 32-bit build.
 802       assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
 803       continue;
 804     }
 805 
 806     // Pick up 0, 1 or 2 words from SP+offset.
 807 
 808     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 809             "scrambled load targets?");
 810     // Load in argument order going down.
 811     int ld_off = (sig_extended.length() - i) * Interpreter::stackElementSize;
 812     // Point to interpreter value (vs. tag)
 813     int next_off = ld_off - Interpreter::stackElementSize;
 814     //
 815     //
 816     //
 817     VMReg r_1 = regs[i].first();
 818     VMReg r_2 = regs[i].second();
 819     if (!r_1->is_valid()) {
 820       assert(!r_2->is_valid(), "");
 821       continue;
 822     }
 823     if (r_1->is_stack()) {
 824       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 825       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 826 
 827       // We can use rsi as a temp here because compiled code doesn't need rsi as an input
 828       // and if we end up going thru a c2i because of a miss a reasonable value of rsi
 829       // we be generated.
 830       if (!r_2->is_valid()) {
 831         // __ fld_s(Address(saved_sp, ld_off));
 832         // __ fstp_s(Address(rsp, st_off));
 833         __ movl(rsi, Address(saved_sp, ld_off));
 834         __ movptr(Address(rsp, st_off), rsi);
 835       } else {
 836         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 837         // are accessed as negative so LSW is at LOW address
 838 
 839         // ld_off is MSW so get LSW
 840         // st_off is LSW (i.e. reg.first())
 841         // __ fld_d(Address(saved_sp, next_off));
 842         // __ fstp_d(Address(rsp, st_off));
 843         //
 844         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 845         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 846         // So we must adjust where to pick up the data to match the interpreter.
 847         //
 848         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 849         // are accessed as negative so LSW is at LOW address
 850 
 851         // ld_off is MSW so get LSW
 852         const int offset = (NOT_LP64(true ||) sig_extended.at(i)._bt==T_LONG||sig_extended.at(i)._bt==T_DOUBLE)?
 853                            next_off : ld_off;
 854         __ movptr(rsi, Address(saved_sp, offset));
 855         __ movptr(Address(rsp, st_off), rsi);
 856 #ifndef _LP64
 857         __ movptr(rsi, Address(saved_sp, ld_off));
 858         __ movptr(Address(rsp, st_off + wordSize), rsi);
 859 #endif // _LP64
 860       }
 861     } else if (r_1->is_Register()) {  // Register argument
 862       Register r = r_1->as_Register();
 863       assert(r != rax, "must be different");
 864       if (r_2->is_valid()) {
 865         //
 866         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 867         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 868         // So we must adjust where to pick up the data to match the interpreter.
 869 
 870         const int offset = (NOT_LP64(true ||) sig_extended.at(i)._bt==T_LONG||sig_extended.at(i)._bt==T_DOUBLE)?
 871                            next_off : ld_off;
 872 
 873         // this can be a misaligned move
 874         __ movptr(r, Address(saved_sp, offset));
 875 #ifndef _LP64
 876         assert(r_2->as_Register() != rax, "need another temporary register");
 877         // Remember r_1 is low address (and LSB on x86)
 878         // So r_2 gets loaded from high address regardless of the platform
 879         __ movptr(r_2->as_Register(), Address(saved_sp, ld_off));
 880 #endif // _LP64
 881       } else {
 882         __ movl(r, Address(saved_sp, ld_off));
 883       }
 884     } else {
 885       assert(r_1->is_XMMRegister(), "");
 886       if (!r_2->is_valid()) {
 887         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
 888       } else {
 889         move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
 890       }


 898   // "compiled" so it is much better to make this transition
 899   // invisible to the stack walking code. Unfortunately if
 900   // we try and find the callee by normal means a safepoint
 901   // is possible. So we stash the desired callee in the thread
 902   // and the vm will find there should this case occur.
 903 
 904   __ get_thread(rax);
 905   __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
 906 
 907   // move Method* to rax, in case we end up in an c2i adapter.
 908   // the c2i adapters expect Method* in rax, (c2) because c2's
 909   // resolve stubs return the result (the method) in rax,.
 910   // I'd love to fix this.
 911   __ mov(rax, rbx);
 912 
 913   __ jmp(rdi);
 914 }
 915 
 916 // ---------------------------------------------------------------
 917 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,

 918                                                             int comp_args_on_stack,
 919                                                             const GrowableArray<SigEntry>& sig_extended,
 920                                                             const VMRegPair *regs,
 921                                                             AdapterFingerPrint* fingerprint,
 922                                                             AdapterBlob*& new_adapter) {
 923   address i2c_entry = __ pc();
 924 
 925   gen_i2c_adapter(masm, comp_args_on_stack, sig_extended, regs);
 926 
 927   // -------------------------------------------------------------------------
 928   // Generate a C2I adapter.  On entry we know rbx, holds the Method* during calls
 929   // to the interpreter.  The args start out packed in the compiled layout.  They
 930   // need to be unpacked into the interpreter layout.  This will almost always
 931   // require some stack space.  We grow the current (compiled) stack, then repack
 932   // the args.  We  finally end in a jump to the generic interpreter entry point.
 933   // On exit from the interpreter, the interpreter will restore our SP (lest the
 934   // compiled code, which relys solely on SP and not EBP, get sick).
 935 
 936   address c2i_unverified_entry = __ pc();
 937   Label skip_fixup;
 938 
 939   Register holder = rax;
 940   Register receiver = rcx;
 941   Register temp = rbx;
 942 
 943   {
 944 
 945     Label missed;
 946     __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
 947     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
 948     __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset()));
 949     __ jcc(Assembler::notEqual, missed);
 950     // Method might have been compiled since the call site was patched to
 951     // interpreted if that is the case treat it as a miss so we can get
 952     // the call site corrected.
 953     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 954     __ jcc(Assembler::equal, skip_fixup);
 955 
 956     __ bind(missed);
 957     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 958   }
 959 
 960   address c2i_entry = __ pc();
 961 
 962   OopMapSet* oop_maps = NULL;
 963   int frame_complete = CodeOffsets::frame_never_safe;
 964   int frame_size_in_words = 0;
 965   gen_c2i_adapter(masm, sig_extended, regs, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words);
 966 
 967   __ flush();
 968   new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps);
 969   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 970 }
 971 
 972 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 973                                          VMRegPair *regs,
 974                                          VMRegPair *regs2,
 975                                          int total_args_passed) {
 976   assert(regs2 == NULL, "not needed on x86");
 977 // We return the amount of VMRegImpl stack slots we need to reserve for all
 978 // the arguments NOT counting out_preserve_stack_slots.
 979 
 980   uint    stack = 0;        // All arguments on stack
 981 
 982   for( int i = 0; i < total_args_passed; i++) {
 983     // From the type and the argument number (count) compute the location
 984     switch( sig_bt[i] ) {
 985     case T_BOOLEAN:
 986     case T_CHAR:
 987     case T_FLOAT:
 988     case T_BYTE:


< prev index next >