< prev index next >

src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp

Print this page




 446       else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
 447       else // Else double is passed low on the stack to be aligned.
 448         stack += 2;
 449     } else if( sig_bt[i] == T_LONG ) {
 450       stack += 2;
 451     }
 452   }
 453   int dstack = 0;             // Separate counter for placing doubles
 454 
 455   // Now pick where all else goes.
 456   for( i = 0; i < total_args_passed; i++) {
 457     // From the type and the argument number (count) compute the location
 458     switch( sig_bt[i] ) {
 459     case T_SHORT:
 460     case T_CHAR:
 461     case T_BYTE:
 462     case T_BOOLEAN:
 463     case T_INT:
 464     case T_ARRAY:
 465     case T_OBJECT:

 466     case T_ADDRESS:
 467       if( reg_arg0 == 9999 )  {
 468         reg_arg0 = i;
 469         regs[i].set1(rcx->as_VMReg());
 470       } else if( reg_arg1 == 9999 )  {
 471         reg_arg1 = i;
 472         regs[i].set1(rdx->as_VMReg());
 473       } else {
 474         regs[i].set1(VMRegImpl::stack2reg(stack++));
 475       }
 476       break;
 477     case T_FLOAT:
 478       if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
 479         freg_arg0 = i;
 480         regs[i].set1(xmm0->as_VMReg());
 481       } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
 482         freg_arg1 = i;
 483         regs[i].set1(xmm1->as_VMReg());
 484       } else {
 485         regs[i].set1(VMRegImpl::stack2reg(stack++));


 496         regs[i].set2(xmm0->as_VMReg());
 497       } else if( freg_arg1 == (uint)i ) {
 498         regs[i].set2(xmm1->as_VMReg());
 499       } else {
 500         regs[i].set2(VMRegImpl::stack2reg(dstack));
 501         dstack += 2;
 502       }
 503       break;
 504     case T_VOID: regs[i].set_bad(); break;
 505       break;
 506     default:
 507       ShouldNotReachHere();
 508       break;
 509     }
 510   }
 511 
 512   // return value can be odd number of VMRegImpl stack slots make multiple of 2
 513   return align_up(stack, 2);
 514 }
 515 









 516 // Patch the callers callsite with entry to compiled code if it exists.
 517 static void patch_callers_callsite(MacroAssembler *masm) {
 518   Label L;
 519   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 520   __ jcc(Assembler::equal, L);
 521   // Schedule the branch target address early.
 522   // Call into the VM to patch the caller, then jump to compiled callee
 523   // rax, isn't live so capture return address while we easily can
 524   __ movptr(rax, Address(rsp, 0));
 525   __ pusha();
 526   __ pushf();
 527 
 528   if (UseSSE == 1) {
 529     __ subptr(rsp, 2*wordSize);
 530     __ movflt(Address(rsp, 0), xmm0);
 531     __ movflt(Address(rsp, wordSize), xmm1);
 532   }
 533   if (UseSSE >= 2) {
 534     __ subptr(rsp, 4*wordSize);
 535     __ movdbl(Address(rsp, 0), xmm0);


 557     __ addptr(rsp, 2*wordSize);
 558   }
 559   if (UseSSE >= 2) {
 560     __ movdbl(xmm0, Address(rsp, 0));
 561     __ movdbl(xmm1, Address(rsp, 2*wordSize));
 562     __ addptr(rsp, 4*wordSize);
 563   }
 564 
 565   __ popf();
 566   __ popa();
 567   __ bind(L);
 568 }
 569 
 570 
 571 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
 572   int next_off = st_off - Interpreter::stackElementSize;
 573   __ movdbl(Address(rsp, next_off), r);
 574 }
 575 
 576 static void gen_c2i_adapter(MacroAssembler *masm,
 577                             int total_args_passed,
 578                             int comp_args_on_stack,
 579                             const BasicType *sig_bt,
 580                             const VMRegPair *regs,
 581                             Label& skip_fixup) {




 582   // Before we get into the guts of the C2I adapter, see if we should be here
 583   // at all.  We've come from compiled code and are attempting to jump to the
 584   // interpreter, which means the caller made a static call to get here
 585   // (vcalls always get a compiled target if there is one).  Check for a
 586   // compiled target.  If there is one, we need to patch the caller's call.
 587   patch_callers_callsite(masm);
 588 
 589   __ bind(skip_fixup);
 590 
 591 #ifdef COMPILER2
 592   // C2 may leave the stack dirty if not in SSE2+ mode
 593   if (UseSSE >= 2) {
 594     __ verify_FPU(0, "c2i transition should have clean FPU stack");
 595   } else {
 596     __ empty_FPU_stack();
 597   }
 598 #endif /* COMPILER2 */
 599 
 600   // Since all args are passed on the stack, total_args_passed * interpreter_
 601   // stack_element_size  is the
 602   // space we need.
 603   int extraspace = total_args_passed * Interpreter::stackElementSize;
 604 
 605   // Get return address
 606   __ pop(rax);
 607 
 608   // set senderSP value
 609   __ movptr(rsi, rsp);
 610 
 611   __ subptr(rsp, extraspace);
 612 
 613   // Now write the args into the outgoing interpreter space
 614   for (int i = 0; i < total_args_passed; i++) {
 615     if (sig_bt[i] == T_VOID) {
 616       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 617       continue;
 618     }
 619 
 620     // st_off points to lowest address on stack.
 621     int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
 622     int next_off = st_off - Interpreter::stackElementSize;
 623 
 624     // Say 4 args:
 625     // i   st_off
 626     // 0   12 T_LONG
 627     // 1    8 T_VOID
 628     // 2    4 T_OBJECT
 629     // 3    0 T_BOOL
 630     VMReg r_1 = regs[i].first();
 631     VMReg r_2 = regs[i].second();
 632     if (!r_1->is_valid()) {
 633       assert(!r_2->is_valid(), "");
 634       continue;
 635     }
 636 
 637     if (r_1->is_stack()) {
 638       // memory to memory use fpu stack top
 639       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 640 
 641       if (!r_2->is_valid()) {


 651 #ifndef _LP64
 652         __ movptr(rdi, Address(rsp, ld_off + wordSize));
 653         __ movptr(Address(rsp, st_off), rdi);
 654 #else
 655 #ifdef ASSERT
 656         // Overwrite the unused slot with known junk
 657         __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 658         __ movptr(Address(rsp, st_off), rax);
 659 #endif /* ASSERT */
 660 #endif // _LP64
 661       }
 662     } else if (r_1->is_Register()) {
 663       Register r = r_1->as_Register();
 664       if (!r_2->is_valid()) {
 665         __ movl(Address(rsp, st_off), r);
 666       } else {
 667         // long/double in gpr
 668         NOT_LP64(ShouldNotReachHere());
 669         // Two VMRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 670         // T_DOUBLE and T_LONG use two slots in the interpreter
 671         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 672           // long/double in gpr
 673 #ifdef ASSERT
 674           // Overwrite the unused slot with known junk
 675           LP64_ONLY(__ mov64(rax, CONST64(0xdeadffffdeadaaab)));
 676           __ movptr(Address(rsp, st_off), rax);
 677 #endif /* ASSERT */
 678           __ movptr(Address(rsp, next_off), r);
 679         } else {
 680           __ movptr(Address(rsp, st_off), r);
 681         }
 682       }
 683     } else {
 684       assert(r_1->is_XMMRegister(), "");
 685       if (!r_2->is_valid()) {
 686         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
 687       } else {
 688         assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type");
 689         move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
 690       }
 691     }
 692   }
 693 
 694   // Schedule the branch target address early.
 695   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 696   // And repush original return address
 697   __ push(rax);
 698   __ jmp(rcx);
 699 }
 700 
 701 
 702 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
 703   int next_val_off = ld_off - Interpreter::stackElementSize;
 704   __ movdbl(r, Address(saved_sp, next_val_off));
 705 }
 706 
 707 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 708                         address code_start, address code_end,
 709                         Label& L_ok) {
 710   Label L_fail;
 711   __ lea(temp_reg, ExternalAddress(code_start));
 712   __ cmpptr(pc_reg, temp_reg);
 713   __ jcc(Assembler::belowEqual, L_fail);
 714   __ lea(temp_reg, ExternalAddress(code_end));
 715   __ cmpptr(pc_reg, temp_reg);
 716   __ jcc(Assembler::below, L_ok);
 717   __ bind(L_fail);
 718 }
 719 
 720 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 721                                     int total_args_passed,
 722                                     int comp_args_on_stack,
 723                                     const BasicType *sig_bt,
 724                                     const VMRegPair *regs) {

 725   // Note: rsi contains the senderSP on entry. We must preserve it since
 726   // we may do a i2c -> c2i transition if we lose a race where compiled
 727   // code goes non-entrant while we get args ready.
 728 
 729   // Adapters can be frameless because they do not require the caller
 730   // to perform additional cleanup work, such as correcting the stack pointer.
 731   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 732   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 733   // even if a callee has modified the stack pointer.
 734   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 735   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 736   // up via the senderSP register).
 737   // In other words, if *either* the caller or callee is interpreted, we can
 738   // get the stack pointer repaired after a call.
 739   // This is why c2i and i2c adapters cannot be indefinitely composed.
 740   // In particular, if a c2i adapter were to somehow call an i2c adapter,
 741   // both caller and callee would be compiled methods, and neither would
 742   // clean up the stack pointer changes performed by the two adapters.
 743   // If this happens, control eventually transfers back to the compiled
 744   // caller, but with an uncorrected stack, causing delayed havoc.


 793   }
 794 
 795   // Align the outgoing SP
 796   __ andptr(rsp, -(StackAlignmentInBytes));
 797 
 798   // push the return address on the stack (note that pushing, rather
 799   // than storing it, yields the correct frame alignment for the callee)
 800   __ push(rax);
 801 
 802   // Put saved SP in another register
 803   const Register saved_sp = rax;
 804   __ movptr(saved_sp, rdi);
 805 
 806 
 807   // Will jump to the compiled code just as if compiled code was doing it.
 808   // Pre-load the register-jump target early, to schedule it better.
 809   __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
 810 
 811   // Now generate the shuffle code.  Pick up all register args and move the
 812   // rest through the floating point stack top.
 813   for (int i = 0; i < total_args_passed; i++) {
 814     if (sig_bt[i] == T_VOID) {
 815       // Longs and doubles are passed in native word order, but misaligned
 816       // in the 32-bit build.
 817       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 818       continue;
 819     }
 820 
 821     // Pick up 0, 1 or 2 words from SP+offset.
 822 
 823     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 824             "scrambled load targets?");
 825     // Load in argument order going down.
 826     int ld_off = (total_args_passed - i) * Interpreter::stackElementSize;
 827     // Point to interpreter value (vs. tag)
 828     int next_off = ld_off - Interpreter::stackElementSize;
 829     //
 830     //
 831     //
 832     VMReg r_1 = regs[i].first();
 833     VMReg r_2 = regs[i].second();
 834     if (!r_1->is_valid()) {
 835       assert(!r_2->is_valid(), "");
 836       continue;
 837     }
 838     if (r_1->is_stack()) {
 839       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 840       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 841 
 842       // We can use rsi as a temp here because compiled code doesn't need rsi as an input
 843       // and if we end up going thru a c2i because of a miss a reasonable value of rsi
 844       // we be generated.
 845       if (!r_2->is_valid()) {
 846         // __ fld_s(Address(saved_sp, ld_off));
 847         // __ fstp_s(Address(rsp, st_off));
 848         __ movl(rsi, Address(saved_sp, ld_off));
 849         __ movptr(Address(rsp, st_off), rsi);
 850       } else {
 851         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 852         // are accessed as negative so LSW is at LOW address
 853 
 854         // ld_off is MSW so get LSW
 855         // st_off is LSW (i.e. reg.first())
 856         // __ fld_d(Address(saved_sp, next_off));
 857         // __ fstp_d(Address(rsp, st_off));
 858         //
 859         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 860         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 861         // So we must adjust where to pick up the data to match the interpreter.
 862         //
 863         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 864         // are accessed as negative so LSW is at LOW address
 865 
 866         // ld_off is MSW so get LSW
 867         const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 868                            next_off : ld_off;
 869         __ movptr(rsi, Address(saved_sp, offset));
 870         __ movptr(Address(rsp, st_off), rsi);
 871 #ifndef _LP64
 872         __ movptr(rsi, Address(saved_sp, ld_off));
 873         __ movptr(Address(rsp, st_off + wordSize), rsi);
 874 #endif // _LP64
 875       }
 876     } else if (r_1->is_Register()) {  // Register argument
 877       Register r = r_1->as_Register();
 878       assert(r != rax, "must be different");
 879       if (r_2->is_valid()) {
 880         //
 881         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 882         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 883         // So we must adjust where to pick up the data to match the interpreter.
 884 
 885         const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 886                            next_off : ld_off;
 887 
 888         // this can be a misaligned move
 889         __ movptr(r, Address(saved_sp, offset));
 890 #ifndef _LP64
 891         assert(r_2->as_Register() != rax, "need another temporary register");
 892         // Remember r_1 is low address (and LSB on x86)
 893         // So r_2 gets loaded from high address regardless of the platform
 894         __ movptr(r_2->as_Register(), Address(saved_sp, ld_off));
 895 #endif // _LP64
 896       } else {
 897         __ movl(r, Address(saved_sp, ld_off));
 898       }
 899     } else {
 900       assert(r_1->is_XMMRegister(), "");
 901       if (!r_2->is_valid()) {
 902         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
 903       } else {
 904         move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
 905       }


 913   // "compiled" so it is much better to make this transition
 914   // invisible to the stack walking code. Unfortunately if
 915   // we try and find the callee by normal means a safepoint
 916   // is possible. So we stash the desired callee in the thread
 917   // and the vm will find there should this case occur.
 918 
 919   __ get_thread(rax);
 920   __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
 921 
 922   // move Method* to rax, in case we end up in an c2i adapter.
 923   // the c2i adapters expect Method* in rax, (c2) because c2's
 924   // resolve stubs return the result (the method) in rax,.
 925   // I'd love to fix this.
 926   __ mov(rax, rbx);
 927 
 928   __ jmp(rdi);
 929 }
 930 
 931 // ---------------------------------------------------------------
 932 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 933                                                             int total_args_passed,
 934                                                             int comp_args_on_stack,
 935                                                             const BasicType *sig_bt,
 936                                                             const VMRegPair *regs,
 937                                                             AdapterFingerPrint* fingerprint) {

 938   address i2c_entry = __ pc();
 939 
 940   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 941 
 942   // -------------------------------------------------------------------------
 943   // Generate a C2I adapter.  On entry we know rbx, holds the Method* during calls
 944   // to the interpreter.  The args start out packed in the compiled layout.  They
 945   // need to be unpacked into the interpreter layout.  This will almost always
 946   // require some stack space.  We grow the current (compiled) stack, then repack
 947   // the args.  We  finally end in a jump to the generic interpreter entry point.
 948   // On exit from the interpreter, the interpreter will restore our SP (lest the
 949   // compiled code, which relys solely on SP and not EBP, get sick).
 950 
 951   address c2i_unverified_entry = __ pc();
 952   Label skip_fixup;
 953 
 954   Register holder = rax;
 955   Register receiver = rcx;
 956   Register temp = rbx;
 957 
 958   {
 959 
 960     Label missed;
 961     __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
 962     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
 963     __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
 964     __ jcc(Assembler::notEqual, missed);
 965     // Method might have been compiled since the call site was patched to
 966     // interpreted if that is the case treat it as a miss so we can get
 967     // the call site corrected.
 968     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 969     __ jcc(Assembler::equal, skip_fixup);
 970 
 971     __ bind(missed);
 972     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 973   }
 974 
 975   address c2i_entry = __ pc();
 976 
 977   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);



 978 
 979   __ flush();

 980   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 981 }
 982 
 983 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 984                                          VMRegPair *regs,
 985                                          VMRegPair *regs2,
 986                                          int total_args_passed) {
 987   assert(regs2 == NULL, "not needed on x86");
 988 // We return the amount of VMRegImpl stack slots we need to reserve for all
 989 // the arguments NOT counting out_preserve_stack_slots.
 990 
 991   uint    stack = 0;        // All arguments on stack
 992 
 993   for( int i = 0; i < total_args_passed; i++) {
 994     // From the type and the argument number (count) compute the location
 995     switch( sig_bt[i] ) {
 996     case T_BOOLEAN:
 997     case T_CHAR:
 998     case T_FLOAT:
 999     case T_BYTE:
1000     case T_SHORT:
1001     case T_INT:
1002     case T_OBJECT:

1003     case T_ARRAY:
1004     case T_ADDRESS:
1005     case T_METADATA:
1006       regs[i].set1(VMRegImpl::stack2reg(stack++));
1007       break;
1008     case T_LONG:
1009     case T_DOUBLE: // The stack numbering is reversed from Java
1010       // Since C arguments do not get reversed, the ordering for
1011       // doubles on the stack must be opposite the Java convention
1012       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
1013       regs[i].set2(VMRegImpl::stack2reg(stack));
1014       stack += 2;
1015       break;
1016     case T_VOID: regs[i].set_bad(); break;
1017     default:
1018       ShouldNotReachHere();
1019       break;
1020     }
1021   }
1022   return stack;


1264       switch (in_sig_bt[i]) {
1265         case T_ARRAY:
1266           if (map != NULL) {
1267             __ movptr(Address(rsp, offset), reg);
1268           } else {
1269             __ movptr(reg, Address(rsp, offset));
1270           }
1271           break;
1272         case T_BOOLEAN:
1273         case T_CHAR:
1274         case T_BYTE:
1275         case T_SHORT:
1276         case T_INT:
1277           if (map != NULL) {
1278             __ movl(Address(rsp, offset), reg);
1279           } else {
1280             __ movl(reg, Address(rsp, offset));
1281           }
1282           break;
1283         case T_OBJECT:

1284         default: ShouldNotReachHere();
1285       }
1286     } else if (in_regs[i].first()->is_XMMRegister()) {
1287       if (in_sig_bt[i] == T_FLOAT) {
1288         int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
1289         int offset = slot * VMRegImpl::stack_slot_size;
1290         assert(handle_index <= stack_slots, "overflow");
1291         if (map != NULL) {
1292           __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
1293         } else {
1294           __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
1295         }
1296       }
1297     } else if (in_regs[i].first()->is_stack()) {
1298       if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1299         int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1300         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1301       }
1302     }
1303   }


1400   __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
1401                            arrayOopDesc::base_offset_in_bytes(in_elem_type)));
1402   simple_move32(masm, tmp, length_arg);
1403   __ jmpb(done);
1404   __ bind(is_null);
1405   // Pass zeros
1406   __ xorptr(tmp_reg, tmp_reg);
1407   simple_move32(masm, tmp, body_arg);
1408   simple_move32(masm, tmp, length_arg);
1409   __ bind(done);
1410 }
1411 
1412 static void verify_oop_args(MacroAssembler* masm,
1413                             const methodHandle& method,
1414                             const BasicType* sig_bt,
1415                             const VMRegPair* regs) {
1416   Register temp_reg = rbx;  // not part of any compiled calling seq
1417   if (VerifyOops) {
1418     for (int i = 0; i < method->size_of_parameters(); i++) {
1419       if (sig_bt[i] == T_OBJECT ||
1420           sig_bt[i] == T_ARRAY) {
1421         VMReg r = regs[i].first();
1422         assert(r->is_valid(), "bad oop arg");
1423         if (r->is_stack()) {
1424           __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1425           __ verify_oop(temp_reg);
1426         } else {
1427           __ verify_oop(r->as_Register());
1428         }
1429       }
1430     }
1431   }
1432 }
1433 
1434 static void gen_special_dispatch(MacroAssembler* masm,
1435                                  const methodHandle& method,
1436                                  const BasicType* sig_bt,
1437                                  const VMRegPair* regs) {
1438   verify_oop_args(masm, method, sig_bt, regs);
1439   vmIntrinsics::ID iid = method->intrinsic_id();
1440 


1873   // sure we can capture all the incoming oop args from the
1874   // caller.
1875   //
1876   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1877 
1878   // Mark location of rbp,
1879   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1880 
1881   // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1882   // Are free to temporaries if we have to do  stack to steck moves.
1883   // All inbound args are referenced based on rbp, and all outbound args via rsp.
1884 
1885   for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1886     switch (in_sig_bt[i]) {
1887       case T_ARRAY:
1888         if (is_critical_native) {
1889           unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1890           c_arg++;
1891           break;
1892         }

1893       case T_OBJECT:
1894         assert(!is_critical_native, "no oop arguments");
1895         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1896                     ((i == 0) && (!is_static)),
1897                     &receiver_offset);
1898         break;
1899       case T_VOID:
1900         break;
1901 
1902       case T_FLOAT:
1903         float_move(masm, in_regs[i], out_regs[c_arg]);
1904           break;
1905 
1906       case T_DOUBLE:
1907         assert( i + 1 < total_in_args &&
1908                 in_sig_bt[i + 1] == T_VOID &&
1909                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1910         double_move(masm, in_regs[i], out_regs[c_arg]);
1911         break;
1912 


2055   // Verify or restore cpu control state after JNI call
2056   __ restore_cpu_control_state_after_jni();
2057 
2058   // WARNING - on Windows Java Natives use pascal calling convention and pop the
2059   // arguments off of the stack. We could just re-adjust the stack pointer here
2060   // and continue to do SP relative addressing but we instead switch to FP
2061   // relative addressing.
2062 
2063   // Unpack native results.
2064   switch (ret_type) {
2065   case T_BOOLEAN: __ c2bool(rax);            break;
2066   case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
2067   case T_BYTE   : __ sign_extend_byte (rax); break;
2068   case T_SHORT  : __ sign_extend_short(rax); break;
2069   case T_INT    : /* nothing to do */        break;
2070   case T_DOUBLE :
2071   case T_FLOAT  :
2072     // Result is in st0 we'll save as needed
2073     break;
2074   case T_ARRAY:                 // Really a handle

2075   case T_OBJECT:                // Really a handle
2076       break; // can't de-handlize until after safepoint check
2077   case T_VOID: break;
2078   case T_LONG: break;
2079   default       : ShouldNotReachHere();
2080   }
2081 
2082   // Switch thread to "native transition" state before reading the synchronization state.
2083   // This additional state is necessary because reading and testing the synchronization
2084   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2085   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2086   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2087   //     Thread A is resumed to finish this native method, but doesn't block here since it
2088   //     didn't see any synchronization is progress, and escapes.
2089   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2090 
2091   // Force this write out before the read below
2092   __ membar(Assembler::Membar_mask_bits(
2093             Assembler::LoadLoad | Assembler::LoadStore |
2094             Assembler::StoreLoad | Assembler::StoreStore));


2201     __ bind(done);
2202 
2203   }
2204 
2205   {
2206     SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2207     // Tell dtrace about this method exit
2208     save_native_result(masm, ret_type, stack_slots);
2209     __ mov_metadata(rax, method());
2210     __ call_VM_leaf(
2211          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2212          thread, rax);
2213     restore_native_result(masm, ret_type, stack_slots);
2214   }
2215 
2216   // We can finally stop using that last_Java_frame we setup ages ago
2217 
2218   __ reset_last_Java_frame(thread, false);
2219 
2220   // Unbox oop result, e.g. JNIHandles::resolve value.
2221   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2222     __ resolve_jobject(rax /* value */,
2223                        thread /* thread */,
2224                        rcx /* tmp */);
2225   }
2226 
2227   if (CheckJNICalls) {
2228     // clear_pending_jni_exception_check
2229     __ movptr(Address(thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD);
2230   }
2231 
2232   if (!is_critical_native) {
2233     // reset handle block
2234     __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
2235     __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
2236 
2237     // Any exception pending?
2238     __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2239     __ jcc(Assembler::notEqual, exception_pending);
2240   }
2241 


3152   // Pending exception after the safepoint
3153 
3154   __ bind(pending);
3155 
3156   RegisterSaver::restore_live_registers(masm);
3157 
3158   // exception pending => remove activation and forward to exception handler
3159 
3160   __ get_thread(thread);
3161   __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
3162   __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
3163   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3164 
3165   // -------------
3166   // make sure all code is generated
3167   masm->flush();
3168 
3169   // return the  blob
3170   // frame_size_words or bytes??
3171   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);





3172 }


 446       else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
 447       else // Else double is passed low on the stack to be aligned.
 448         stack += 2;
 449     } else if( sig_bt[i] == T_LONG ) {
 450       stack += 2;
 451     }
 452   }
 453   int dstack = 0;             // Separate counter for placing doubles
 454 
 455   // Now pick where all else goes.
 456   for( i = 0; i < total_args_passed; i++) {
 457     // From the type and the argument number (count) compute the location
 458     switch( sig_bt[i] ) {
 459     case T_SHORT:
 460     case T_CHAR:
 461     case T_BYTE:
 462     case T_BOOLEAN:
 463     case T_INT:
 464     case T_ARRAY:
 465     case T_OBJECT:
 466     case T_VALUETYPE:
 467     case T_ADDRESS:
 468       if( reg_arg0 == 9999 )  {
 469         reg_arg0 = i;
 470         regs[i].set1(rcx->as_VMReg());
 471       } else if( reg_arg1 == 9999 )  {
 472         reg_arg1 = i;
 473         regs[i].set1(rdx->as_VMReg());
 474       } else {
 475         regs[i].set1(VMRegImpl::stack2reg(stack++));
 476       }
 477       break;
 478     case T_FLOAT:
 479       if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
 480         freg_arg0 = i;
 481         regs[i].set1(xmm0->as_VMReg());
 482       } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
 483         freg_arg1 = i;
 484         regs[i].set1(xmm1->as_VMReg());
 485       } else {
 486         regs[i].set1(VMRegImpl::stack2reg(stack++));


 497         regs[i].set2(xmm0->as_VMReg());
 498       } else if( freg_arg1 == (uint)i ) {
 499         regs[i].set2(xmm1->as_VMReg());
 500       } else {
 501         regs[i].set2(VMRegImpl::stack2reg(dstack));
 502         dstack += 2;
 503       }
 504       break;
 505     case T_VOID: regs[i].set_bad(); break;
 506       break;
 507     default:
 508       ShouldNotReachHere();
 509       break;
 510     }
 511   }
 512 
 513   // return value can be odd number of VMRegImpl stack slots make multiple of 2
 514   return align_up(stack, 2);
 515 }
 516 
 517 const uint SharedRuntime::java_return_convention_max_int = 1;
 518 const uint SharedRuntime::java_return_convention_max_float = 1;
 519 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
 520                                           VMRegPair *regs,
 521                                           int total_args_passed) {
 522   Unimplemented();
 523   return 0;
 524 }
 525 
 526 // Patch the callers callsite with entry to compiled code if it exists.
 527 static void patch_callers_callsite(MacroAssembler *masm) {
 528   Label L;
 529   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 530   __ jcc(Assembler::equal, L);
 531   // Schedule the branch target address early.
 532   // Call into the VM to patch the caller, then jump to compiled callee
 533   // rax, isn't live so capture return address while we easily can
 534   __ movptr(rax, Address(rsp, 0));
 535   __ pusha();
 536   __ pushf();
 537 
 538   if (UseSSE == 1) {
 539     __ subptr(rsp, 2*wordSize);
 540     __ movflt(Address(rsp, 0), xmm0);
 541     __ movflt(Address(rsp, wordSize), xmm1);
 542   }
 543   if (UseSSE >= 2) {
 544     __ subptr(rsp, 4*wordSize);
 545     __ movdbl(Address(rsp, 0), xmm0);


 567     __ addptr(rsp, 2*wordSize);
 568   }
 569   if (UseSSE >= 2) {
 570     __ movdbl(xmm0, Address(rsp, 0));
 571     __ movdbl(xmm1, Address(rsp, 2*wordSize));
 572     __ addptr(rsp, 4*wordSize);
 573   }
 574 
 575   __ popf();
 576   __ popa();
 577   __ bind(L);
 578 }
 579 
 580 
 581 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
 582   int next_off = st_off - Interpreter::stackElementSize;
 583   __ movdbl(Address(rsp, next_off), r);
 584 }
 585 
 586 static void gen_c2i_adapter(MacroAssembler *masm,
 587                             const GrowableArray<SigEntry>& sig_extended,


 588                             const VMRegPair *regs,
 589                             Label& skip_fixup,
 590                             address start,
 591                             OopMapSet*& oop_maps,
 592                             int& frame_complete,
 593                             int& frame_size_in_words) {
 594   // Before we get into the guts of the C2I adapter, see if we should be here
 595   // at all.  We've come from compiled code and are attempting to jump to the
 596   // interpreter, which means the caller made a static call to get here
 597   // (vcalls always get a compiled target if there is one).  Check for a
 598   // compiled target.  If there is one, we need to patch the caller's call.
 599   patch_callers_callsite(masm);
 600 
 601   __ bind(skip_fixup);
 602 
 603 #ifdef COMPILER2
 604   // C2 may leave the stack dirty if not in SSE2+ mode
 605   if (UseSSE >= 2) {
 606     __ verify_FPU(0, "c2i transition should have clean FPU stack");
 607   } else {
 608     __ empty_FPU_stack();
 609   }
 610 #endif /* COMPILER2 */
 611 
 612   // Since all args are passed on the stack, total_args_passed * interpreter_
 613   // stack_element_size  is the
 614   // space we need.
 615   int extraspace = sig_extended.length() * Interpreter::stackElementSize;
 616 
 617   // Get return address
 618   __ pop(rax);
 619 
 620   // set senderSP value
 621   __ movptr(rsi, rsp);
 622 
 623   __ subptr(rsp, extraspace);
 624 
 625   // Now write the args into the outgoing interpreter space
 626   for (int i = 0; i < sig_extended.length(); i++) {
 627     if (sig_extended.at(i)._bt == T_VOID) {
 628       assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
 629       continue;
 630     }
 631 
 632     // st_off points to lowest address on stack.
 633     int st_off = ((sig_extended.length() - 1) - i) * Interpreter::stackElementSize;
 634     int next_off = st_off - Interpreter::stackElementSize;
 635 
 636     // Say 4 args:
 637     // i   st_off
 638     // 0   12 T_LONG
 639     // 1    8 T_VOID
 640     // 2    4 T_OBJECT
 641     // 3    0 T_BOOL
 642     VMReg r_1 = regs[i].first();
 643     VMReg r_2 = regs[i].second();
 644     if (!r_1->is_valid()) {
 645       assert(!r_2->is_valid(), "");
 646       continue;
 647     }
 648 
 649     if (r_1->is_stack()) {
 650       // memory to memory use fpu stack top
 651       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 652 
 653       if (!r_2->is_valid()) {


 663 #ifndef _LP64
 664         __ movptr(rdi, Address(rsp, ld_off + wordSize));
 665         __ movptr(Address(rsp, st_off), rdi);
 666 #else
 667 #ifdef ASSERT
 668         // Overwrite the unused slot with known junk
 669         __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 670         __ movptr(Address(rsp, st_off), rax);
 671 #endif /* ASSERT */
 672 #endif // _LP64
 673       }
 674     } else if (r_1->is_Register()) {
 675       Register r = r_1->as_Register();
 676       if (!r_2->is_valid()) {
 677         __ movl(Address(rsp, st_off), r);
 678       } else {
 679         // long/double in gpr
 680         NOT_LP64(ShouldNotReachHere());
 681         // Two VMRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 682         // T_DOUBLE and T_LONG use two slots in the interpreter
 683         if (sig_extended.at(i)._bt == T_LONG || sig_extended.at(i)._bt == T_DOUBLE) {
 684           // long/double in gpr
 685 #ifdef ASSERT
 686           // Overwrite the unused slot with known junk
 687           LP64_ONLY(__ mov64(rax, CONST64(0xdeadffffdeadaaab)));
 688           __ movptr(Address(rsp, st_off), rax);
 689 #endif /* ASSERT */
 690           __ movptr(Address(rsp, next_off), r);
 691         } else {
 692           __ movptr(Address(rsp, st_off), r);
 693         }
 694       }
 695     } else {
 696       assert(r_1->is_XMMRegister(), "");
 697       if (!r_2->is_valid()) {
 698         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
 699       } else {
 700         assert(sig_extended.at(i)._bt == T_DOUBLE || sig_extended.at(i)._bt == T_LONG, "wrong type");
 701         move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
 702       }
 703     }
 704   }
 705 
 706   // Schedule the branch target address early.
 707   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 708   // And repush original return address
 709   __ push(rax);
 710   __ jmp(rcx);
 711 }
 712 
 713 
 714 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
 715   int next_val_off = ld_off - Interpreter::stackElementSize;
 716   __ movdbl(r, Address(saved_sp, next_val_off));
 717 }
 718 
 719 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 720                         address code_start, address code_end,
 721                         Label& L_ok) {
 722   Label L_fail;
 723   __ lea(temp_reg, ExternalAddress(code_start));
 724   __ cmpptr(pc_reg, temp_reg);
 725   __ jcc(Assembler::belowEqual, L_fail);
 726   __ lea(temp_reg, ExternalAddress(code_end));
 727   __ cmpptr(pc_reg, temp_reg);
 728   __ jcc(Assembler::below, L_ok);
 729   __ bind(L_fail);
 730 }
 731 
 732 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,

 733                                     int comp_args_on_stack,
 734                                     const GrowableArray<SigEntry>& sig_extended,
 735                                     const VMRegPair *regs) {
 736 
 737   // Note: rsi contains the senderSP on entry. We must preserve it since
 738   // we may do a i2c -> c2i transition if we lose a race where compiled
 739   // code goes non-entrant while we get args ready.
 740 
 741   // Adapters can be frameless because they do not require the caller
 742   // to perform additional cleanup work, such as correcting the stack pointer.
 743   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 744   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 745   // even if a callee has modified the stack pointer.
 746   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 747   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 748   // up via the senderSP register).
 749   // In other words, if *either* the caller or callee is interpreted, we can
 750   // get the stack pointer repaired after a call.
 751   // This is why c2i and i2c adapters cannot be indefinitely composed.
 752   // In particular, if a c2i adapter were to somehow call an i2c adapter,
 753   // both caller and callee would be compiled methods, and neither would
 754   // clean up the stack pointer changes performed by the two adapters.
 755   // If this happens, control eventually transfers back to the compiled
 756   // caller, but with an uncorrected stack, causing delayed havoc.


 805   }
 806 
 807   // Align the outgoing SP
 808   __ andptr(rsp, -(StackAlignmentInBytes));
 809 
 810   // push the return address on the stack (note that pushing, rather
 811   // than storing it, yields the correct frame alignment for the callee)
 812   __ push(rax);
 813 
 814   // Put saved SP in another register
 815   const Register saved_sp = rax;
 816   __ movptr(saved_sp, rdi);
 817 
 818 
 819   // Will jump to the compiled code just as if compiled code was doing it.
 820   // Pre-load the register-jump target early, to schedule it better.
 821   __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
 822 
 823   // Now generate the shuffle code.  Pick up all register args and move the
 824   // rest through the floating point stack top.
 825   for (int i = 0; i < sig_extended.length(); i++) {
 826     if (sig_extended.at(i)._bt == T_VOID) {
 827       // Longs and doubles are passed in native word order, but misaligned
 828       // in the 32-bit build.
 829       assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
 830       continue;
 831     }
 832 
 833     // Pick up 0, 1 or 2 words from SP+offset.
 834 
 835     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 836             "scrambled load targets?");
 837     // Load in argument order going down.
 838     int ld_off = (sig_extended.length() - i) * Interpreter::stackElementSize;
 839     // Point to interpreter value (vs. tag)
 840     int next_off = ld_off - Interpreter::stackElementSize;
 841     //
 842     //
 843     //
 844     VMReg r_1 = regs[i].first();
 845     VMReg r_2 = regs[i].second();
 846     if (!r_1->is_valid()) {
 847       assert(!r_2->is_valid(), "");
 848       continue;
 849     }
 850     if (r_1->is_stack()) {
 851       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 852       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 853 
 854       // We can use rsi as a temp here because compiled code doesn't need rsi as an input
 855       // and if we end up going thru a c2i because of a miss a reasonable value of rsi
 856       // we be generated.
 857       if (!r_2->is_valid()) {
 858         // __ fld_s(Address(saved_sp, ld_off));
 859         // __ fstp_s(Address(rsp, st_off));
 860         __ movl(rsi, Address(saved_sp, ld_off));
 861         __ movptr(Address(rsp, st_off), rsi);
 862       } else {
 863         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 864         // are accessed as negative so LSW is at LOW address
 865 
 866         // ld_off is MSW so get LSW
 867         // st_off is LSW (i.e. reg.first())
 868         // __ fld_d(Address(saved_sp, next_off));
 869         // __ fstp_d(Address(rsp, st_off));
 870         //
 871         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 872         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 873         // So we must adjust where to pick up the data to match the interpreter.
 874         //
 875         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 876         // are accessed as negative so LSW is at LOW address
 877 
 878         // ld_off is MSW so get LSW
 879         const int offset = (NOT_LP64(true ||) sig_extended.at(i)._bt==T_LONG||sig_extended.at(i)._bt==T_DOUBLE)?
 880                            next_off : ld_off;
 881         __ movptr(rsi, Address(saved_sp, offset));
 882         __ movptr(Address(rsp, st_off), rsi);
 883 #ifndef _LP64
 884         __ movptr(rsi, Address(saved_sp, ld_off));
 885         __ movptr(Address(rsp, st_off + wordSize), rsi);
 886 #endif // _LP64
 887       }
 888     } else if (r_1->is_Register()) {  // Register argument
 889       Register r = r_1->as_Register();
 890       assert(r != rax, "must be different");
 891       if (r_2->is_valid()) {
 892         //
 893         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 894         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 895         // So we must adjust where to pick up the data to match the interpreter.
 896 
 897         const int offset = (NOT_LP64(true ||) sig_extended.at(i)._bt==T_LONG||sig_extended.at(i)._bt==T_DOUBLE)?
 898                            next_off : ld_off;
 899 
 900         // this can be a misaligned move
 901         __ movptr(r, Address(saved_sp, offset));
 902 #ifndef _LP64
 903         assert(r_2->as_Register() != rax, "need another temporary register");
 904         // Remember r_1 is low address (and LSB on x86)
 905         // So r_2 gets loaded from high address regardless of the platform
 906         __ movptr(r_2->as_Register(), Address(saved_sp, ld_off));
 907 #endif // _LP64
 908       } else {
 909         __ movl(r, Address(saved_sp, ld_off));
 910       }
 911     } else {
 912       assert(r_1->is_XMMRegister(), "");
 913       if (!r_2->is_valid()) {
 914         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
 915       } else {
 916         move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
 917       }


 925   // "compiled" so it is much better to make this transition
 926   // invisible to the stack walking code. Unfortunately if
 927   // we try and find the callee by normal means a safepoint
 928   // is possible. So we stash the desired callee in the thread
 929   // and the vm will find there should this case occur.
 930 
 931   __ get_thread(rax);
 932   __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
 933 
 934   // move Method* to rax, in case we end up in an c2i adapter.
 935   // the c2i adapters expect Method* in rax, (c2) because c2's
 936   // resolve stubs return the result (the method) in rax,.
 937   // I'd love to fix this.
 938   __ mov(rax, rbx);
 939 
 940   __ jmp(rdi);
 941 }
 942 
 943 // ---------------------------------------------------------------
 944 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,

 945                                                             int comp_args_on_stack,
 946                                                             const GrowableArray<SigEntry>& sig_extended,
 947                                                             const VMRegPair *regs,
 948                                                             AdapterFingerPrint* fingerprint,
 949                                                             AdapterBlob*& new_adapter) {
 950   address i2c_entry = __ pc();
 951 
 952   gen_i2c_adapter(masm, comp_args_on_stack, sig_extended, regs);
 953 
 954   // -------------------------------------------------------------------------
 955   // Generate a C2I adapter.  On entry we know rbx, holds the Method* during calls
 956   // to the interpreter.  The args start out packed in the compiled layout.  They
 957   // need to be unpacked into the interpreter layout.  This will almost always
 958   // require some stack space.  We grow the current (compiled) stack, then repack
 959   // the args.  We  finally end in a jump to the generic interpreter entry point.
 960   // On exit from the interpreter, the interpreter will restore our SP (lest the
 961   // compiled code, which relys solely on SP and not EBP, get sick).
 962 
 963   address c2i_unverified_entry = __ pc();
 964   Label skip_fixup;
 965 
 966   Register holder = rax;
 967   Register receiver = rcx;
 968   Register temp = rbx;
 969 
 970   {
 971 
 972     Label missed;
 973     __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
 974     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
 975     __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
 976     __ jcc(Assembler::notEqual, missed);
 977     // Method might have been compiled since the call site was patched to
 978     // interpreted if that is the case treat it as a miss so we can get
 979     // the call site corrected.
 980     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 981     __ jcc(Assembler::equal, skip_fixup);
 982 
 983     __ bind(missed);
 984     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 985   }
 986 
 987   address c2i_entry = __ pc();
 988 
 989   OopMapSet* oop_maps = NULL;
 990   int frame_complete = CodeOffsets::frame_never_safe;
 991   int frame_size_in_words = 0;
 992   gen_c2i_adapter(masm, sig_extended, regs, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words);
 993 
 994   __ flush();
 995   new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps);
 996   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 997 }
 998 
 999 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1000                                          VMRegPair *regs,
1001                                          VMRegPair *regs2,
1002                                          int total_args_passed) {
1003   assert(regs2 == NULL, "not needed on x86");
1004 // We return the amount of VMRegImpl stack slots we need to reserve for all
1005 // the arguments NOT counting out_preserve_stack_slots.
1006 
1007   uint    stack = 0;        // All arguments on stack
1008 
1009   for( int i = 0; i < total_args_passed; i++) {
1010     // From the type and the argument number (count) compute the location
1011     switch( sig_bt[i] ) {
1012     case T_BOOLEAN:
1013     case T_CHAR:
1014     case T_FLOAT:
1015     case T_BYTE:
1016     case T_SHORT:
1017     case T_INT:
1018     case T_OBJECT:
1019     case T_VALUETYPE:
1020     case T_ARRAY:
1021     case T_ADDRESS:
1022     case T_METADATA:
1023       regs[i].set1(VMRegImpl::stack2reg(stack++));
1024       break;
1025     case T_LONG:
1026     case T_DOUBLE: // The stack numbering is reversed from Java
1027       // Since C arguments do not get reversed, the ordering for
1028       // doubles on the stack must be opposite the Java convention
1029       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
1030       regs[i].set2(VMRegImpl::stack2reg(stack));
1031       stack += 2;
1032       break;
1033     case T_VOID: regs[i].set_bad(); break;
1034     default:
1035       ShouldNotReachHere();
1036       break;
1037     }
1038   }
1039   return stack;


1281       switch (in_sig_bt[i]) {
1282         case T_ARRAY:
1283           if (map != NULL) {
1284             __ movptr(Address(rsp, offset), reg);
1285           } else {
1286             __ movptr(reg, Address(rsp, offset));
1287           }
1288           break;
1289         case T_BOOLEAN:
1290         case T_CHAR:
1291         case T_BYTE:
1292         case T_SHORT:
1293         case T_INT:
1294           if (map != NULL) {
1295             __ movl(Address(rsp, offset), reg);
1296           } else {
1297             __ movl(reg, Address(rsp, offset));
1298           }
1299           break;
1300         case T_OBJECT:
1301         case T_VALUETYPE:
1302         default: ShouldNotReachHere();
1303       }
1304     } else if (in_regs[i].first()->is_XMMRegister()) {
1305       if (in_sig_bt[i] == T_FLOAT) {
1306         int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
1307         int offset = slot * VMRegImpl::stack_slot_size;
1308         assert(handle_index <= stack_slots, "overflow");
1309         if (map != NULL) {
1310           __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
1311         } else {
1312           __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
1313         }
1314       }
1315     } else if (in_regs[i].first()->is_stack()) {
1316       if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1317         int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1318         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1319       }
1320     }
1321   }


1418   __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
1419                            arrayOopDesc::base_offset_in_bytes(in_elem_type)));
1420   simple_move32(masm, tmp, length_arg);
1421   __ jmpb(done);
1422   __ bind(is_null);
1423   // Pass zeros
1424   __ xorptr(tmp_reg, tmp_reg);
1425   simple_move32(masm, tmp, body_arg);
1426   simple_move32(masm, tmp, length_arg);
1427   __ bind(done);
1428 }
1429 
1430 static void verify_oop_args(MacroAssembler* masm,
1431                             const methodHandle& method,
1432                             const BasicType* sig_bt,
1433                             const VMRegPair* regs) {
1434   Register temp_reg = rbx;  // not part of any compiled calling seq
1435   if (VerifyOops) {
1436     for (int i = 0; i < method->size_of_parameters(); i++) {
1437       if (sig_bt[i] == T_OBJECT ||
1438           sig_bt[i] == T_ARRAY || sig_bt[i] == T_VALUETYPE) {
1439         VMReg r = regs[i].first();
1440         assert(r->is_valid(), "bad oop arg");
1441         if (r->is_stack()) {
1442           __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1443           __ verify_oop(temp_reg);
1444         } else {
1445           __ verify_oop(r->as_Register());
1446         }
1447       }
1448     }
1449   }
1450 }
1451 
1452 static void gen_special_dispatch(MacroAssembler* masm,
1453                                  const methodHandle& method,
1454                                  const BasicType* sig_bt,
1455                                  const VMRegPair* regs) {
1456   verify_oop_args(masm, method, sig_bt, regs);
1457   vmIntrinsics::ID iid = method->intrinsic_id();
1458 


1891   // sure we can capture all the incoming oop args from the
1892   // caller.
1893   //
1894   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1895 
1896   // Mark location of rbp,
1897   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1898 
1899   // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1900   // Are free to temporaries if we have to do  stack to steck moves.
1901   // All inbound args are referenced based on rbp, and all outbound args via rsp.
1902 
1903   for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1904     switch (in_sig_bt[i]) {
1905       case T_ARRAY:
1906         if (is_critical_native) {
1907           unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1908           c_arg++;
1909           break;
1910         }
1911       case T_VALUETYPE:
1912       case T_OBJECT:
1913         assert(!is_critical_native, "no oop arguments");
1914         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1915                     ((i == 0) && (!is_static)),
1916                     &receiver_offset);
1917         break;
1918       case T_VOID:
1919         break;
1920 
1921       case T_FLOAT:
1922         float_move(masm, in_regs[i], out_regs[c_arg]);
1923           break;
1924 
1925       case T_DOUBLE:
1926         assert( i + 1 < total_in_args &&
1927                 in_sig_bt[i + 1] == T_VOID &&
1928                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1929         double_move(masm, in_regs[i], out_regs[c_arg]);
1930         break;
1931 


2074   // Verify or restore cpu control state after JNI call
2075   __ restore_cpu_control_state_after_jni();
2076 
2077   // WARNING - on Windows Java Natives use pascal calling convention and pop the
2078   // arguments off of the stack. We could just re-adjust the stack pointer here
2079   // and continue to do SP relative addressing but we instead switch to FP
2080   // relative addressing.
2081 
2082   // Unpack native results.
2083   switch (ret_type) {
2084   case T_BOOLEAN: __ c2bool(rax);            break;
2085   case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
2086   case T_BYTE   : __ sign_extend_byte (rax); break;
2087   case T_SHORT  : __ sign_extend_short(rax); break;
2088   case T_INT    : /* nothing to do */        break;
2089   case T_DOUBLE :
2090   case T_FLOAT  :
2091     // Result is in st0 we'll save as needed
2092     break;
2093   case T_ARRAY:                 // Really a handle
2094   case T_VALUETYPE:             // Really a handle
2095   case T_OBJECT:                // Really a handle
2096       break; // can't de-handlize until after safepoint check
2097   case T_VOID: break;
2098   case T_LONG: break;
2099   default       : ShouldNotReachHere();
2100   }
2101 
2102   // Switch thread to "native transition" state before reading the synchronization state.
2103   // This additional state is necessary because reading and testing the synchronization
2104   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2105   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2106   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2107   //     Thread A is resumed to finish this native method, but doesn't block here since it
2108   //     didn't see any synchronization is progress, and escapes.
2109   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2110 
2111   // Force this write out before the read below
2112   __ membar(Assembler::Membar_mask_bits(
2113             Assembler::LoadLoad | Assembler::LoadStore |
2114             Assembler::StoreLoad | Assembler::StoreStore));


2221     __ bind(done);
2222 
2223   }
2224 
2225   {
2226     SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2227     // Tell dtrace about this method exit
2228     save_native_result(masm, ret_type, stack_slots);
2229     __ mov_metadata(rax, method());
2230     __ call_VM_leaf(
2231          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2232          thread, rax);
2233     restore_native_result(masm, ret_type, stack_slots);
2234   }
2235 
2236   // We can finally stop using that last_Java_frame we setup ages ago
2237 
2238   __ reset_last_Java_frame(thread, false);
2239 
2240   // Unbox oop result, e.g. JNIHandles::resolve value.
2241   if (ret_type == T_OBJECT || ret_type == T_ARRAY || ret_type == T_VALUETYPE) {
2242     __ resolve_jobject(rax /* value */,
2243                        thread /* thread */,
2244                        rcx /* tmp */);
2245   }
2246 
2247   if (CheckJNICalls) {
2248     // clear_pending_jni_exception_check
2249     __ movptr(Address(thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD);
2250   }
2251 
2252   if (!is_critical_native) {
2253     // reset handle block
2254     __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
2255     __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
2256 
2257     // Any exception pending?
2258     __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2259     __ jcc(Assembler::notEqual, exception_pending);
2260   }
2261 


3172   // Pending exception after the safepoint
3173 
3174   __ bind(pending);
3175 
3176   RegisterSaver::restore_live_registers(masm);
3177 
3178   // exception pending => remove activation and forward to exception handler
3179 
3180   __ get_thread(thread);
3181   __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
3182   __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
3183   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3184 
3185   // -------------
3186   // make sure all code is generated
3187   masm->flush();
3188 
3189   // return the  blob
3190   // frame_size_words or bytes??
3191   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
3192 }
3193 
3194 BufferedValueTypeBlob* SharedRuntime::generate_buffered_value_type_adapter(const ValueKlass* vk) {
3195   Unimplemented();
3196   return NULL;
3197 }
< prev index next >