< prev index next >

src/cpu/x86/vm/sharedRuntime_x86_64.cpp

Print this page
rev 10511 : value type calling convention
rev 10512 : more
rev 10513 : reviews
rev 10514 : more reviews


 528 
 529   // Allocate argument register save area
 530   if (frame::arg_reg_save_area_bytes != 0) {
 531     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 532   }
 533   __ mov(c_rarg0, rbx);
 534   __ mov(c_rarg1, rax);
 535   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 536 
 537   // De-allocate argument register save area
 538   if (frame::arg_reg_save_area_bytes != 0) {
 539     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 540   }
 541 
 542   __ pop_CPU_state();
 543   // restore sp
 544   __ mov(rsp, r13);
 545   __ bind(L);
 546 }
 547 










































 548 
 549 static void gen_c2i_adapter(MacroAssembler *masm,
 550                             int total_args_passed,
 551                             int comp_args_on_stack,
 552                             const BasicType *sig_bt,
 553                             const VMRegPair *regs,
 554                             Label& skip_fixup) {
 555   // Before we get into the guts of the C2I adapter, see if we should be here
 556   // at all.  We've come from compiled code and are attempting to jump to the
 557   // interpreter, which means the caller made a static call to get here
 558   // (vcalls always get a compiled target if there is one).  Check for a
 559   // compiled target.  If there is one, we need to patch the caller's call.
 560   patch_callers_callsite(masm);
 561 
 562   __ bind(skip_fixup);
 563 
 564   // Since all args are passed on the stack, total_args_passed *
 565   // Interpreter::stackElementSize is the space we need. Plus 1 because
 566   // we also account for the return address location since
 567   // we store it first rather than hold it in rax across all the shuffling
 568 
 569   int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
 570 
 571   // stack is aligned, keep it that way
 572   extraspace = round_to(extraspace, 2*wordSize);
 573 
 574   // Get return address
 575   __ pop(rax);
 576 
 577   // set senderSP value
 578   __ mov(r13, rsp);
 579 
 580   __ subptr(rsp, extraspace);
 581 
 582   // Store the return address in the expected location
 583   __ movptr(Address(rsp, 0), rax);
 584 
 585   // Now write the args into the outgoing interpreter space
 586   for (int i = 0; i < total_args_passed; i++) {
 587     if (sig_bt[i] == T_VOID) {
 588       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 589       continue;





 590     }
 591 
 592     // offset to start parameters
 593     int st_off   = (total_args_passed - i) * Interpreter::stackElementSize;
 594     int next_off = st_off - Interpreter::stackElementSize;
 595 
 596     // Say 4 args:
 597     // i   st_off
 598     // 0   32 T_LONG
 599     // 1   24 T_VOID
 600     // 2   16 T_OBJECT
 601     // 3    8 T_BOOL
 602     // -    0 return address
 603     //
 604     // However to make thing extra confusing. Because we can fit a long/double in
 605     // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
 606     // leaves one slot empty and only stores to a single slot. In this case the
 607     // slot that is occupied is the T_VOID slot. See I said it was confusing.
 608 
 609     VMReg r_1 = regs[i].first();
 610     VMReg r_2 = regs[i].second();
 611     if (!r_1->is_valid()) {
 612       assert(!r_2->is_valid(), "");
 613       continue;
 614     }
 615     if (r_1->is_stack()) {
 616       // memory to memory use rax
 617       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 618       if (!r_2->is_valid()) {
 619         // sign extend??
 620         __ movl(rax, Address(rsp, ld_off));
 621         __ movptr(Address(rsp, st_off), rax);
 622 
 623       } else {
 624 
 625         __ movq(rax, Address(rsp, ld_off));
 626 
 627         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 628         // T_DOUBLE and T_LONG use two slots in the interpreter
 629         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 630           // ld_off == LSW, ld_off+wordSize == MSW
 631           // st_off == MSW, next_off == LSW
 632           __ movq(Address(rsp, next_off), rax);
 633 #ifdef ASSERT
 634           // Overwrite the unused slot with known junk
 635           __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 636           __ movptr(Address(rsp, st_off), rax);
 637 #endif /* ASSERT */
 638         } else {
 639           __ movq(Address(rsp, st_off), rax);
 640         }
 641       }
 642     } else if (r_1->is_Register()) {
 643       Register r = r_1->as_Register();
 644       if (!r_2->is_valid()) {
 645         // must be only an int (or less ) so move only 32bits to slot
 646         // why not sign extend??
 647         __ movl(Address(rsp, st_off), r);
 648       } else {
 649         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 650         // T_DOUBLE and T_LONG use two slots in the interpreter
 651         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 652           // long/double in gpr
 653 #ifdef ASSERT
 654           // Overwrite the unused slot with known junk
 655           __ mov64(rax, CONST64(0xdeadffffdeadaaab));
 656           __ movptr(Address(rsp, st_off), rax);
 657 #endif /* ASSERT */
 658           __ movq(Address(rsp, next_off), r);
 659         } else {
 660           __ movptr(Address(rsp, st_off), r);
 661         }
 662       }
 663     } else {
 664       assert(r_1->is_XMMRegister(), "");
 665       if (!r_2->is_valid()) {
 666         // only a float use just part of the slot
 667         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
 668       } else {

















































































































 669 #ifdef ASSERT

 670         // Overwrite the unused slot with known junk
 671         __ mov64(rax, CONST64(0xdeadffffdeadaaac));
 672         __ movptr(Address(rsp, st_off), rax);
 673 #endif /* ASSERT */
 674         __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
 675       }


































 676     }
 677   }
 678 
 679   // Schedule the branch target address early.
 680   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 681   __ jmp(rcx);
 682 }
 683 
 684 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 685                         address code_start, address code_end,
 686                         Label& L_ok) {
 687   Label L_fail;
 688   __ lea(temp_reg, ExternalAddress(code_start));
 689   __ cmpptr(pc_reg, temp_reg);
 690   __ jcc(Assembler::belowEqual, L_fail);
 691   __ lea(temp_reg, ExternalAddress(code_end));
 692   __ cmpptr(pc_reg, temp_reg);
 693   __ jcc(Assembler::below, L_ok);
 694   __ bind(L_fail);
 695 }
 696 










































































 697 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 698                                     int total_args_passed,
 699                                     int comp_args_on_stack,
 700                                     const BasicType *sig_bt,
 701                                     const VMRegPair *regs) {
 702 
 703   // Note: r13 contains the senderSP on entry. We must preserve it since
 704   // we may do a i2c -> c2i transition if we lose a race where compiled
 705   // code goes non-entrant while we get args ready.
 706   // In addition we use r13 to locate all the interpreter args as
 707   // we must align the stack to 16 bytes on an i2c entry else we
 708   // lose alignment we expect in all compiled code and register
 709   // save code can segv when fxsave instructions find improperly
 710   // aligned stack pointer.
 711 
 712   // Adapters can be frameless because they do not require the caller
 713   // to perform additional cleanup work, such as correcting the stack pointer.
 714   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 715   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 716   // even if a callee has modified the stack pointer.
 717   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 718   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 719   // up via the senderSP register).
 720   // In other words, if *either* the caller or callee is interpreted, we can


 786   // Put saved SP in another register
 787   const Register saved_sp = rax;
 788   __ movptr(saved_sp, r11);
 789 
 790   // Will jump to the compiled code just as if compiled code was doing it.
 791   // Pre-load the register-jump target early, to schedule it better.
 792   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
 793 
 794 #if INCLUDE_JVMCI
 795   if (EnableJVMCI) {
 796     // check if this call should be routed towards a specific entry point
 797     __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
 798     Label no_alternative_target;
 799     __ jcc(Assembler::equal, no_alternative_target);
 800     __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 801     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
 802     __ bind(no_alternative_target);
 803   }
 804 #endif // INCLUDE_JVMCI
 805 

 806   // Now generate the shuffle code.  Pick up all register args and move the
 807   // rest through the floating point stack top.
 808   for (int i = 0; i < total_args_passed; i++) {
 809     if (sig_bt[i] == T_VOID) {
 810       // Longs and doubles are passed in native word order, but misaligned
 811       // in the 32-bit build.
 812       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 813       continue;
 814     }
 815 
 816     // Pick up 0, 1 or 2 words from SP+offset.
 817 
 818     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 819             "scrambled load targets?");











 820     // Load in argument order going down.
 821     int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
 822     // Point to interpreter value (vs. tag)
 823     int next_off = ld_off - Interpreter::stackElementSize;
 824     //
 825     //
 826     //
 827     VMReg r_1 = regs[i].first();
 828     VMReg r_2 = regs[i].second();
 829     if (!r_1->is_valid()) {
 830       assert(!r_2->is_valid(), "");
 831       continue;
 832     }
 833     if (r_1->is_stack()) {
 834       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 835       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 836 
 837       // We can use r13 as a temp here because compiled code doesn't need r13 as an input
 838       // and if we end up going thru a c2i because of a miss a reasonable value of r13
 839       // will be generated.
 840       if (!r_2->is_valid()) {
 841         // sign extend???
 842         __ movl(r13, Address(saved_sp, ld_off));
 843         __ movptr(Address(rsp, st_off), r13);
 844       } else {
 845         //
 846         // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 847         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 848         // So we must adjust where to pick up the data to match the interpreter.
 849         //
 850         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 851         // are accessed as negative so LSW is at LOW address
 852 
 853         // ld_off is MSW so get LSW
 854         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 855                            next_off : ld_off;
 856         __ movq(r13, Address(saved_sp, offset));
 857         // st_off is LSW (i.e. reg.first())
 858         __ movq(Address(rsp, st_off), r13);
 859       }
 860     } else if (r_1->is_Register()) {  // Register argument
 861       Register r = r_1->as_Register();
 862       assert(r != rax, "must be different");
 863       if (r_2->is_valid()) {
 864         //
 865         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 866         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 867         // So we must adjust where to pick up the data to match the interpreter.
 868 
 869         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 870                            next_off : ld_off;
 871 
 872         // this can be a misaligned move
 873         __ movq(r, Address(saved_sp, offset));
 874       } else {
 875         // sign extend and use a full word?
 876         __ movl(r, Address(saved_sp, ld_off));
 877       }
 878     } else {
 879       if (!r_2->is_valid()) {
 880         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));





















 881       } else {
 882         __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));


 883       }

 884     }
 885   }
 886 
 887   // 6243940 We might end up in handle_wrong_method if
 888   // the callee is deoptimized as we race thru here. If that
 889   // happens we don't want to take a safepoint because the
 890   // caller frame will look interpreted and arguments are now
 891   // "compiled" so it is much better to make this transition
 892   // invisible to the stack walking code. Unfortunately if
 893   // we try and find the callee by normal means a safepoint
 894   // is possible. So we stash the desired callee in the thread
 895   // and the vm will find there should this case occur.
 896 
 897   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
 898 
 899   // put Method* where a c2i would expect should we end up there
 900   // only needed becaus eof c2 resolve stubs return Method* as a result in
 901   // rax
 902   __ mov(rax, rbx);
 903   __ jmp(r11);
 904 }
 905 
 906 // ---------------------------------------------------------------
 907 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 908                                                             int total_args_passed,
 909                                                             int comp_args_on_stack,
 910                                                             const BasicType *sig_bt,
 911                                                             const VMRegPair *regs,
 912                                                             AdapterFingerPrint* fingerprint) {

 913   address i2c_entry = __ pc();
 914 
 915   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 916 
 917   // -------------------------------------------------------------------------
 918   // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
 919   // to the interpreter.  The args start out packed in the compiled layout.  They
 920   // need to be unpacked into the interpreter layout.  This will almost always
 921   // require some stack space.  We grow the current (compiled) stack, then repack
 922   // the args.  We  finally end in a jump to the generic interpreter entry point.
 923   // On exit from the interpreter, the interpreter will restore our SP (lest the
 924   // compiled code, which relys solely on SP and not RBP, get sick).
 925 
 926   address c2i_unverified_entry = __ pc();
 927   Label skip_fixup;
 928   Label ok;
 929 
 930   Register holder = rax;
 931   Register receiver = j_rarg0;
 932   Register temp = rbx;
 933 
 934   {
 935     __ load_klass(temp, receiver);
 936     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
 937     __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset()));
 938     __ jcc(Assembler::equal, ok);
 939     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 940 
 941     __ bind(ok);
 942     // Method might have been compiled since the call site was patched to
 943     // interpreted if that is the case treat it as a miss so we can get
 944     // the call site corrected.
 945     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 946     __ jcc(Assembler::equal, skip_fixup);
 947     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 948   }
 949 
 950   address c2i_entry = __ pc();
 951 
 952   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);



 953 
 954   __ flush();

 955   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 956 }
 957 
 958 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 959                                          VMRegPair *regs,
 960                                          VMRegPair *regs2,
 961                                          int total_args_passed) {
 962   assert(regs2 == NULL, "not needed on x86");
 963 // We return the amount of VMRegImpl stack slots we need to reserve for all
 964 // the arguments NOT counting out_preserve_stack_slots.
 965 
 966 // NOTE: These arrays will have to change when c1 is ported
 967 #ifdef _WIN64
 968     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
 969       c_rarg0, c_rarg1, c_rarg2, c_rarg3
 970     };
 971     static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
 972       c_farg0, c_farg1, c_farg2, c_farg3
 973     };
 974 #else




 528 
 529   // Allocate argument register save area
 530   if (frame::arg_reg_save_area_bytes != 0) {
 531     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 532   }
 533   __ mov(c_rarg0, rbx);
 534   __ mov(c_rarg1, rax);
 535   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 536 
 537   // De-allocate argument register save area
 538   if (frame::arg_reg_save_area_bytes != 0) {
 539     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 540   }
 541 
 542   __ pop_CPU_state();
 543   // restore sp
 544   __ mov(rsp, r13);
 545   __ bind(L);
 546 }
 547 
 548 // For each value type argument, sig includes the list of fields of
 549 // the value type. This utility function computes the number of
 550 // arguments for the call if value types are passed by reference (the
 551 // calling convention the interpreter expects).
 552 static int compute_total_args_passed_int(const GrowableArray<SigEntry>& sig_extended) {
 553   int total_args_passed = 0;
 554   if (ValueTypePassFieldsAsArgs) {
 555     for (int i = 0; i < sig_extended.length(); i++) {
 556       BasicType bt = sig_extended.at(i)._bt;
 557       if (bt == T_VALUETYPE) {
 558         // In sig_extended, a value type argument starts with:
 559         // T_VALUETYPE, followed by the types of the fields of the
 560         // value type and T_VOID to mark the end of the value
 561         // type. Value types are flattened so, for instance, in the
 562         // case of a value type with an int field and a value type
 563         // field that itself has 2 fields, an int and a long:
 564         // T_VALUETYPE T_INT T_VALUETYPE T_INT T_LONG T_VOID (second
 565         // slot for the T_LONG) T_VOID (inner T_VALUETYPE) T_VOID
 566         // (outer T_VALUETYPE)
 567         total_args_passed++;
 568         int vt = 1;
 569         do {
 570           i++;
 571           BasicType bt = sig_extended.at(i)._bt;
 572           BasicType prev_bt = sig_extended.at(i-1)._bt;
 573           if (bt == T_VALUETYPE) {
 574             vt++;
 575           } else if (bt == T_VOID &&
 576                      prev_bt != T_LONG &&
 577                      prev_bt != T_DOUBLE) {
 578             vt--;
 579           }
 580         } while (vt != 0);
 581       } else {
 582         total_args_passed++;
 583       }
 584     }
 585   } else {
 586     total_args_passed = sig_extended.length();
 587   }
 588   return total_args_passed;
 589 }
 590 



































 591 
 592 static void gen_c2i_adapter_helper(MacroAssembler *masm,
 593                                    BasicType bt,
 594                                    BasicType prev_bt,
 595                                    const VMRegPair& reg_pair,
 596                                    const Address& to,
 597                                    int extraspace) {
 598   assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here");
 599   if (bt == T_VOID) {
 600     assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
 601     return;
 602   }
 603 




 604   // Say 4 args:
 605   // i   st_off
 606   // 0   32 T_LONG
 607   // 1   24 T_VOID
 608   // 2   16 T_OBJECT
 609   // 3    8 T_BOOL
 610   // -    0 return address
 611   //
 612   // However to make thing extra confusing. Because we can fit a long/double in
 613   // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
 614   // leaves one slot empty and only stores to a single slot. In this case the
 615   // slot that is occupied is the T_VOID slot. See I said it was confusing.
 616 
 617   VMReg r_1 = reg_pair.first();
 618   VMReg r_2 = reg_pair.second();
 619   if (!r_1->is_valid()) {
 620     assert(!r_2->is_valid(), "");
 621     return;
 622   }
 623   if (r_1->is_stack()) {
 624     // memory to memory use rax
 625     int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 626     if (!r_2->is_valid()) {
 627       // sign extend??
 628       __ movl(rax, Address(rsp, ld_off));
 629       __ movl(to, rax);
 630 
 631     } else {
 632 
 633       __ movq(rax, Address(rsp, ld_off));
 634       __ movq(to, rax);














 635     }
 636   } else if (r_1->is_Register()) {
 637     Register r = r_1->as_Register();
 638     if (!r_2->is_valid()) {
 639       // must be only an int (or less ) so move only 32bits to slot
 640       // why not sign extend??
 641       __ movl(to, r);











 642     } else {
 643       __ movq(to, r);

 644     }
 645   } else {
 646     assert(r_1->is_XMMRegister(), "");
 647     if (!r_2->is_valid()) {
 648       // only a float use just part of the slot
 649       __ movflt(to, r_1->as_XMMRegister());
 650     } else {
 651       __ movdbl(to, r_1->as_XMMRegister());
 652     }
 653   }
 654 }
 655       
 656 static void gen_c2i_adapter(MacroAssembler *masm,
 657                             const GrowableArray<SigEntry>& sig_extended,
 658                             const VMRegPair *regs,
 659                             Label& skip_fixup,
 660                             address start,
 661                             OopMapSet*& oop_maps,
 662                             int& frame_complete,
 663                             int& frame_size_in_words) {
 664   // Before we get into the guts of the C2I adapter, see if we should be here
 665   // at all.  We've come from compiled code and are attempting to jump to the
 666   // interpreter, which means the caller made a static call to get here
 667   // (vcalls always get a compiled target if there is one).  Check for a
 668   // compiled target.  If there is one, we need to patch the caller's call.
 669   patch_callers_callsite(masm);
 670 
 671   __ bind(skip_fixup);
 672 
 673   if (ValueTypePassFieldsAsArgs) {
 674     // Is there a value type arguments?
 675     int i = 0;
 676     for (; i < sig_extended.length() && sig_extended.at(i)._bt != T_VALUETYPE; i++);
 677 
 678     if (i < sig_extended.length()) {
 679       // There is at least a value type argument: we're coming from
 680       // compiled code so we have no buffers to back the value
 681       // types. Allocate the buffers here with a runtime call.
 682       oop_maps = new OopMapSet();
 683       OopMap* map = NULL;
 684 
 685       map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
 686       
 687       frame_complete = __ offset();
 688 
 689       __ set_last_Java_frame(noreg, noreg, NULL);
 690     
 691       __ mov(c_rarg0, r15_thread);
 692 
 693       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_value_types)));
 694 
 695       oop_maps->add_gc_map((int)(__ pc() - start), map);
 696       __ reset_last_Java_frame(false, false);
 697     
 698       RegisterSaver::restore_live_registers(masm);
 699 
 700       Label no_exception;
 701       __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
 702       __ jcc(Assembler::equal, no_exception);
 703 
 704       __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
 705       __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
 706       __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 707 
 708       __ bind(no_exception);
 709 
 710       // We get an array of objects from the runtime call
 711       int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
 712       __ get_vm_result(r13, r15_thread);
 713       __ addptr(r13, offset_in_bytes);
 714       __ mov(r10, r13);
 715     }
 716   }
 717 
 718 
 719   // Since all args are passed on the stack, total_args_passed *
 720   // Interpreter::stackElementSize is the space we need. Plus 1 because
 721   // we also account for the return address location since
 722   // we store it first rather than hold it in rax across all the shuffling
 723   int total_args_passed = compute_total_args_passed_int(sig_extended);
 724   int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
 725 
 726   // stack is aligned, keep it that way
 727   extraspace = round_to(extraspace, 2*wordSize);
 728 
 729   // Get return address
 730   __ pop(rax);
 731 
 732   // set senderSP value
 733   __ mov(r13, rsp);
 734 
 735   __ subptr(rsp, extraspace);
 736 
 737   // Store the return address in the expected location
 738   __ movptr(Address(rsp, 0), rax);
 739 
 740   // Now write the args into the outgoing interpreter space
 741 
 742   // next_arg_comp is the next argument from the compiler point of
 743   // view (value type fields are passed in registers/on the stack). In
 744   // sig_extended, a value type argument starts with: T_VALUETYPE,
 745   // followed by the types of the fields of the value type and T_VOID
 746   // to mark the end of the value type. ignored counts the number of
 747   // T_VALUETYPE/T_VOID. next_vt_arg is the next value type argument:
 748   // used to get the buffer for that argument from the pool of buffers
 749   // we allocated above and want to pass to the
 750   // interpreter. next_arg_int is the next argument from the
 751   // interpreter point of view (value types are passed by reference).
 752   for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
 753        next_arg_comp < sig_extended.length(); next_arg_comp++) {
 754     assert((next_arg_comp == 0 && ignored == 0) || ignored < next_arg_comp, "shouldn't skip over more slot than there are arguments");
 755     assert(next_arg_int < total_args_passed, "more arguments for the interpreter than expected?");
 756     BasicType bt = sig_extended.at(next_arg_comp)._bt;
 757     int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
 758     if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) {
 759       int next_off = st_off - Interpreter::stackElementSize;
 760       const int offset = (bt==T_LONG||bt==T_DOUBLE) ? next_off : st_off;
 761       gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended.at(next_arg_comp-1)._bt : T_ILLEGAL,
 762                              regs[next_arg_comp-ignored], Address(rsp, offset), extraspace);
 763       next_arg_int++;
 764 #ifdef ASSERT
 765       if (bt==T_LONG || bt==T_DOUBLE) {
 766         // Overwrite the unused slot with known junk
 767         __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 768         __ movptr(Address(rsp, st_off), rax);


 769       }
 770 #endif /* ASSERT */
 771     } else {
 772       ignored++;
 773       // get the buffer from the just allocated pool of buffers
 774       __ load_heap_oop(r11, Address(r10, next_vt_arg * type2aelembytes(T_VALUETYPE)));
 775       next_vt_arg++; next_arg_int++;
 776       int vt = 1;
 777       // write fields we get from compiled code in registers/stack
 778       // slots to the buffer: we know we are done with that value type
 779       // argument when we hit the T_VOID that acts as an end of value
 780       // type delimiter for this value type. Value types are flattened
 781       // so we might encounter a embedded value types. Each entry in
 782       // sig_extended contains a field offset in the buffer.
 783       do {
 784         next_arg_comp++;
 785         BasicType bt = sig_extended.at(next_arg_comp)._bt;
 786         BasicType prev_bt = sig_extended.at(next_arg_comp-1)._bt;
 787         if (bt == T_VALUETYPE) {
 788           vt++;
 789           ignored++;
 790         } else if (bt == T_VOID &&
 791                    prev_bt != T_LONG &&
 792                    prev_bt != T_DOUBLE) {
 793           vt--;
 794           ignored++;
 795         } else {
 796           int off = sig_extended.at(next_arg_comp)._offset;
 797           assert(off > 0, "offset in object should be positive");
 798           gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended.at(next_arg_comp-1)._bt : T_ILLEGAL,
 799                                  regs[next_arg_comp-ignored], Address(r11, off), extraspace);
 800         }
 801       } while (vt != 0);
 802       // pass the buffer to the interpreter
 803       __ movptr(Address(rsp, st_off), r11);
 804     }
 805   }
 806 
 807   // Schedule the branch target address early.
 808   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 809   __ jmp(rcx);
 810 }
 811 
 812 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 813                         address code_start, address code_end,
 814                         Label& L_ok) {
 815   Label L_fail;
 816   __ lea(temp_reg, ExternalAddress(code_start));
 817   __ cmpptr(pc_reg, temp_reg);
 818   __ jcc(Assembler::belowEqual, L_fail);
 819   __ lea(temp_reg, ExternalAddress(code_end));
 820   __ cmpptr(pc_reg, temp_reg);
 821   __ jcc(Assembler::below, L_ok);
 822   __ bind(L_fail);
 823 }
 824 
 825 static void gen_i2c_adapter_helper(MacroAssembler *masm,
 826                                    BasicType bt,
 827                                    BasicType prev_bt,
 828                                    const VMRegPair& reg_pair,
 829                                    const Address& from) {
 830   assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here");
 831   if (bt == T_VOID) {
 832     // Longs and doubles are passed in native word order, but misaligned
 833     // in the 32-bit build.
 834     assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
 835     return;
 836   }
 837   // Pick up 0, 1 or 2 words from SP+offset.
 838 
 839   assert(!reg_pair.second()->is_valid() || reg_pair.first()->next() == reg_pair.second(),
 840          "scrambled load targets?");
 841   //
 842   //
 843   //
 844   VMReg r_1 = reg_pair.first();
 845   VMReg r_2 = reg_pair.second();
 846   if (!r_1->is_valid()) {
 847     assert(!r_2->is_valid(), "");
 848     return;
 849   }
 850   if (r_1->is_stack()) {
 851     // Convert stack slot to an SP offset (+ wordSize to account for return address )
 852     int st_off = reg_pair.first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 853 
 854     // We can use r13 as a temp here because compiled code doesn't need r13 as an input
 855     // and if we end up going thru a c2i because of a miss a reasonable value of r13
 856     // will be generated.
 857     if (!r_2->is_valid()) {
 858       // sign extend???
 859       __ movl(r13, from);
 860       __ movptr(Address(rsp, st_off), r13);
 861     } else {
 862       //
 863       // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 864       // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 865       // So we must adjust where to pick up the data to match the interpreter.
 866       //
 867       // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 868       // are accessed as negative so LSW is at LOW address
 869 
 870       // ld_off is MSW so get LSW
 871       __ movq(r13, from);
 872       // st_off is LSW (i.e. reg.first())
 873       __ movq(Address(rsp, st_off), r13);
 874     }
 875   } else if (r_1->is_Register()) {  // Register argument
 876     Register r = r_1->as_Register();
 877     assert(r != rax, "must be different");
 878     if (r_2->is_valid()) {
 879       //
 880       // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 881       // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 882       // So we must adjust where to pick up the data to match the interpreter.
 883 
 884       // this can be a misaligned move
 885       __ movq(r, from);
 886     } else {
 887       // sign extend and use a full word?
 888       __ movl(r, from);
 889     }
 890   } else {
 891     if (!r_2->is_valid()) {
 892       __ movflt(r_1->as_XMMRegister(), from);
 893     } else {
 894       __ movdbl(r_1->as_XMMRegister(), from);
 895     }
 896   }
 897 }
 898 
 899 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,

 900                                     int comp_args_on_stack,
 901                                     const GrowableArray<SigEntry>& sig_extended,
 902                                     const VMRegPair *regs) {
 903 
 904   // Note: r13 contains the senderSP on entry. We must preserve it since
 905   // we may do a i2c -> c2i transition if we lose a race where compiled
 906   // code goes non-entrant while we get args ready.
 907   // In addition we use r13 to locate all the interpreter args as
 908   // we must align the stack to 16 bytes on an i2c entry else we
 909   // lose alignment we expect in all compiled code and register
 910   // save code can segv when fxsave instructions find improperly
 911   // aligned stack pointer.
 912 
 913   // Adapters can be frameless because they do not require the caller
 914   // to perform additional cleanup work, such as correcting the stack pointer.
 915   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 916   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 917   // even if a callee has modified the stack pointer.
 918   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 919   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 920   // up via the senderSP register).
 921   // In other words, if *either* the caller or callee is interpreted, we can


 987   // Put saved SP in another register
 988   const Register saved_sp = rax;
 989   __ movptr(saved_sp, r11);
 990 
 991   // Will jump to the compiled code just as if compiled code was doing it.
 992   // Pre-load the register-jump target early, to schedule it better.
 993   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
 994 
 995 #if INCLUDE_JVMCI
 996   if (EnableJVMCI) {
 997     // check if this call should be routed towards a specific entry point
 998     __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
 999     Label no_alternative_target;
1000     __ jcc(Assembler::equal, no_alternative_target);
1001     __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
1002     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1003     __ bind(no_alternative_target);
1004   }
1005 #endif // INCLUDE_JVMCI
1006 
1007   int total_args_passed = compute_total_args_passed_int(sig_extended);
1008   // Now generate the shuffle code.  Pick up all register args and move the
1009   // rest through the floating point stack top.









1010 
1011   // next_arg_comp is the next argument from the compiler point of
1012   // view (value type fields are passed in registers/on the stack). In
1013   // sig_extended, a value type argument starts with: T_VALUETYPE,
1014   // followed by the types of the fields of the value type and T_VOID
1015   // to mark the end of the value type. ignored counts the number of
1016   // T_VALUETYPE/T_VOID. next_arg_int is the next argument from the
1017   // interpreter point of view (value types are passed by reference).
1018   for (int next_arg_comp = 0, ignored = 0, next_arg_int = 0; next_arg_comp < sig_extended.length(); next_arg_comp++) {
1019     assert((next_arg_comp == 0 && ignored == 0) || ignored < next_arg_comp, "shouldn't skip over more slot than there are arguments");
1020     assert(next_arg_int < total_args_passed, "more arguments from the interpreter than expected?");
1021     BasicType bt = sig_extended.at(next_arg_comp)._bt;
1022     int ld_off = (total_args_passed - next_arg_int)*Interpreter::stackElementSize;
1023     if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) {
1024       // Load in argument order going down.

1025       // Point to interpreter value (vs. tag)
1026       int next_off = ld_off - Interpreter::stackElementSize;
1027       const int offset = (bt==T_LONG||bt==T_DOUBLE) ? next_off : ld_off;
1028       gen_i2c_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended.at(next_arg_comp-1)._bt : T_ILLEGAL,
1029                              regs[next_arg_comp-ignored], Address(saved_sp, offset));
1030       next_arg_int++;


















































1031     } else {
1032       next_arg_int++;
1033       ignored++;
1034       // get the buffer for that value type
1035       __ movptr(r10, Address(saved_sp, ld_off));
1036       int vt = 1;
1037       // load fields to registers/stack slots from the buffer: we know
1038       // we are done with that value type argument when we hit the
1039       // T_VOID that acts as an end of value type delimiter for this
1040       // value type. Value types are flattened so we might encounter a
1041       // embedded value types. Each entry in sig_extended contains a
1042       // field offset in the buffer.
1043       do {
1044         next_arg_comp++;
1045         BasicType bt = sig_extended.at(next_arg_comp)._bt;
1046         BasicType prev_bt = sig_extended.at(next_arg_comp-1)._bt;
1047         if (bt == T_VALUETYPE) {
1048           vt++;
1049           ignored++;
1050         } else if (bt == T_VOID &&
1051                    prev_bt != T_LONG &&
1052                    prev_bt != T_DOUBLE) {
1053           vt--;
1054           ignored++;
1055         } else {
1056           int off = sig_extended.at(next_arg_comp)._offset;
1057           assert(off > 0, "offset in object should be positive");
1058           gen_i2c_adapter_helper(masm, bt, prev_bt, regs[next_arg_comp - ignored], Address(r10, off));
1059         }
1060       } while (vt != 0);
1061     }
1062   }
1063 
1064   // 6243940 We might end up in handle_wrong_method if
1065   // the callee is deoptimized as we race thru here. If that
1066   // happens we don't want to take a safepoint because the
1067   // caller frame will look interpreted and arguments are now
1068   // "compiled" so it is much better to make this transition
1069   // invisible to the stack walking code. Unfortunately if
1070   // we try and find the callee by normal means a safepoint
1071   // is possible. So we stash the desired callee in the thread
1072   // and the vm will find there should this case occur.
1073 
1074   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
1075 
1076   // put Method* where a c2i would expect should we end up there
1077   // only needed because of c2 resolve stubs return Method* as a result in
1078   // rax
1079   __ mov(rax, rbx);
1080   __ jmp(r11);
1081 }
1082 
1083 // ---------------------------------------------------------------
1084 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,

1085                                                             int comp_args_on_stack,
1086                                                             const GrowableArray<SigEntry>& sig_extended,
1087                                                             const VMRegPair *regs,
1088                                                             AdapterFingerPrint* fingerprint,
1089                                                             AdapterBlob*& new_adapter) {
1090   address i2c_entry = __ pc();
1091 
1092   gen_i2c_adapter(masm, comp_args_on_stack, sig_extended, regs);
1093 
1094   // -------------------------------------------------------------------------
1095   // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
1096   // to the interpreter.  The args start out packed in the compiled layout.  They
1097   // need to be unpacked into the interpreter layout.  This will almost always
1098   // require some stack space.  We grow the current (compiled) stack, then repack
1099   // the args.  We  finally end in a jump to the generic interpreter entry point.
1100   // On exit from the interpreter, the interpreter will restore our SP (lest the
1101   // compiled code, which relys solely on SP and not RBP, get sick).
1102 
1103   address c2i_unverified_entry = __ pc();
1104   Label skip_fixup;
1105   Label ok;
1106 
1107   Register holder = rax;
1108   Register receiver = j_rarg0;
1109   Register temp = rbx;
1110 
1111   {
1112     __ load_klass(temp, receiver);
1113     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
1114     __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset()));
1115     __ jcc(Assembler::equal, ok);
1116     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1117 
1118     __ bind(ok);
1119     // Method might have been compiled since the call site was patched to
1120     // interpreted if that is the case treat it as a miss so we can get
1121     // the call site corrected.
1122     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
1123     __ jcc(Assembler::equal, skip_fixup);
1124     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1125   }
1126 
1127   address c2i_entry = __ pc();
1128 
1129   OopMapSet* oop_maps = NULL;
1130   int frame_complete = CodeOffsets::frame_never_safe;
1131   int frame_size_in_words = 0;
1132   gen_c2i_adapter(masm, sig_extended, regs, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words);
1133 
1134   __ flush();
1135   new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps);
1136   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1137 }
1138 
1139 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1140                                          VMRegPair *regs,
1141                                          VMRegPair *regs2,
1142                                          int total_args_passed) {
1143   assert(regs2 == NULL, "not needed on x86");
1144 // We return the amount of VMRegImpl stack slots we need to reserve for all
1145 // the arguments NOT counting out_preserve_stack_slots.
1146 
1147 // NOTE: These arrays will have to change when c1 is ported
1148 #ifdef _WIN64
1149     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1150       c_rarg0, c_rarg1, c_rarg2, c_rarg3
1151     };
1152     static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1153       c_farg0, c_farg1, c_farg2, c_farg3
1154     };
1155 #else


< prev index next >