446 else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
447 else // Else double is passed low on the stack to be aligned.
448 stack += 2;
449 } else if( sig_bt[i] == T_LONG ) {
450 stack += 2;
451 }
452 }
453 int dstack = 0; // Separate counter for placing doubles
454
455 // Now pick where all else goes.
456 for( i = 0; i < total_args_passed; i++) {
457 // From the type and the argument number (count) compute the location
458 switch( sig_bt[i] ) {
459 case T_SHORT:
460 case T_CHAR:
461 case T_BYTE:
462 case T_BOOLEAN:
463 case T_INT:
464 case T_ARRAY:
465 case T_OBJECT:
466 case T_ADDRESS:
467 if( reg_arg0 == 9999 ) {
468 reg_arg0 = i;
469 regs[i].set1(rcx->as_VMReg());
470 } else if( reg_arg1 == 9999 ) {
471 reg_arg1 = i;
472 regs[i].set1(rdx->as_VMReg());
473 } else {
474 regs[i].set1(VMRegImpl::stack2reg(stack++));
475 }
476 break;
477 case T_FLOAT:
478 if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
479 freg_arg0 = i;
480 regs[i].set1(xmm0->as_VMReg());
481 } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
482 freg_arg1 = i;
483 regs[i].set1(xmm1->as_VMReg());
484 } else {
485 regs[i].set1(VMRegImpl::stack2reg(stack++));
496 regs[i].set2(xmm0->as_VMReg());
497 } else if( freg_arg1 == (uint)i ) {
498 regs[i].set2(xmm1->as_VMReg());
499 } else {
500 regs[i].set2(VMRegImpl::stack2reg(dstack));
501 dstack += 2;
502 }
503 break;
504 case T_VOID: regs[i].set_bad(); break;
505 break;
506 default:
507 ShouldNotReachHere();
508 break;
509 }
510 }
511
512 // return value can be odd number of VMRegImpl stack slots make multiple of 2
513 return align_up(stack, 2);
514 }
515
516 // Patch the callers callsite with entry to compiled code if it exists.
517 static void patch_callers_callsite(MacroAssembler *masm) {
518 Label L;
519 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
520 __ jcc(Assembler::equal, L);
521 // Schedule the branch target address early.
522 // Call into the VM to patch the caller, then jump to compiled callee
523 // rax, isn't live so capture return address while we easily can
524 __ movptr(rax, Address(rsp, 0));
525 __ pusha();
526 __ pushf();
527
528 if (UseSSE == 1) {
529 __ subptr(rsp, 2*wordSize);
530 __ movflt(Address(rsp, 0), xmm0);
531 __ movflt(Address(rsp, wordSize), xmm1);
532 }
533 if (UseSSE >= 2) {
534 __ subptr(rsp, 4*wordSize);
535 __ movdbl(Address(rsp, 0), xmm0);
557 __ addptr(rsp, 2*wordSize);
558 }
559 if (UseSSE >= 2) {
560 __ movdbl(xmm0, Address(rsp, 0));
561 __ movdbl(xmm1, Address(rsp, 2*wordSize));
562 __ addptr(rsp, 4*wordSize);
563 }
564
565 __ popf();
566 __ popa();
567 __ bind(L);
568 }
569
570
571 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
572 int next_off = st_off - Interpreter::stackElementSize;
573 __ movdbl(Address(rsp, next_off), r);
574 }
575
576 static void gen_c2i_adapter(MacroAssembler *masm,
577 int total_args_passed,
578 int comp_args_on_stack,
579 const BasicType *sig_bt,
580 const VMRegPair *regs,
581 Label& skip_fixup) {
582 // Before we get into the guts of the C2I adapter, see if we should be here
583 // at all. We've come from compiled code and are attempting to jump to the
584 // interpreter, which means the caller made a static call to get here
585 // (vcalls always get a compiled target if there is one). Check for a
586 // compiled target. If there is one, we need to patch the caller's call.
587 patch_callers_callsite(masm);
588
589 __ bind(skip_fixup);
590
591 #ifdef COMPILER2
592 // C2 may leave the stack dirty if not in SSE2+ mode
593 if (UseSSE >= 2) {
594 __ verify_FPU(0, "c2i transition should have clean FPU stack");
595 } else {
596 __ empty_FPU_stack();
597 }
598 #endif /* COMPILER2 */
599
600 // Since all args are passed on the stack, total_args_passed * interpreter_
601 // stack_element_size is the
602 // space we need.
603 int extraspace = total_args_passed * Interpreter::stackElementSize;
604
605 // Get return address
606 __ pop(rax);
607
608 // set senderSP value
609 __ movptr(rsi, rsp);
610
611 __ subptr(rsp, extraspace);
612
613 // Now write the args into the outgoing interpreter space
614 for (int i = 0; i < total_args_passed; i++) {
615 if (sig_bt[i] == T_VOID) {
616 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
617 continue;
618 }
619
620 // st_off points to lowest address on stack.
621 int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
622 int next_off = st_off - Interpreter::stackElementSize;
623
624 // Say 4 args:
625 // i st_off
626 // 0 12 T_LONG
627 // 1 8 T_VOID
628 // 2 4 T_OBJECT
629 // 3 0 T_BOOL
630 VMReg r_1 = regs[i].first();
631 VMReg r_2 = regs[i].second();
632 if (!r_1->is_valid()) {
633 assert(!r_2->is_valid(), "");
634 continue;
635 }
636
637 if (r_1->is_stack()) {
638 // memory to memory use fpu stack top
639 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
640
641 if (!r_2->is_valid()) {
651 #ifndef _LP64
652 __ movptr(rdi, Address(rsp, ld_off + wordSize));
653 __ movptr(Address(rsp, st_off), rdi);
654 #else
655 #ifdef ASSERT
656 // Overwrite the unused slot with known junk
657 __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
658 __ movptr(Address(rsp, st_off), rax);
659 #endif /* ASSERT */
660 #endif // _LP64
661 }
662 } else if (r_1->is_Register()) {
663 Register r = r_1->as_Register();
664 if (!r_2->is_valid()) {
665 __ movl(Address(rsp, st_off), r);
666 } else {
667 // long/double in gpr
668 NOT_LP64(ShouldNotReachHere());
669 // Two VMRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
670 // T_DOUBLE and T_LONG use two slots in the interpreter
671 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
672 // long/double in gpr
673 #ifdef ASSERT
674 // Overwrite the unused slot with known junk
675 LP64_ONLY(__ mov64(rax, CONST64(0xdeadffffdeadaaab)));
676 __ movptr(Address(rsp, st_off), rax);
677 #endif /* ASSERT */
678 __ movptr(Address(rsp, next_off), r);
679 } else {
680 __ movptr(Address(rsp, st_off), r);
681 }
682 }
683 } else {
684 assert(r_1->is_XMMRegister(), "");
685 if (!r_2->is_valid()) {
686 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
687 } else {
688 assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type");
689 move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
690 }
691 }
692 }
693
694 // Schedule the branch target address early.
695 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
696 // And repush original return address
697 __ push(rax);
698 __ jmp(rcx);
699 }
700
701
702 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
703 int next_val_off = ld_off - Interpreter::stackElementSize;
704 __ movdbl(r, Address(saved_sp, next_val_off));
705 }
706
707 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
708 address code_start, address code_end,
709 Label& L_ok) {
710 Label L_fail;
711 __ lea(temp_reg, ExternalAddress(code_start));
712 __ cmpptr(pc_reg, temp_reg);
713 __ jcc(Assembler::belowEqual, L_fail);
714 __ lea(temp_reg, ExternalAddress(code_end));
715 __ cmpptr(pc_reg, temp_reg);
716 __ jcc(Assembler::below, L_ok);
717 __ bind(L_fail);
718 }
719
720 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
721 int total_args_passed,
722 int comp_args_on_stack,
723 const BasicType *sig_bt,
724 const VMRegPair *regs) {
725 // Note: rsi contains the senderSP on entry. We must preserve it since
726 // we may do a i2c -> c2i transition if we lose a race where compiled
727 // code goes non-entrant while we get args ready.
728
729 // Adapters can be frameless because they do not require the caller
730 // to perform additional cleanup work, such as correcting the stack pointer.
731 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
732 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
733 // even if a callee has modified the stack pointer.
734 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
735 // routinely repairs its caller's stack pointer (from sender_sp, which is set
736 // up via the senderSP register).
737 // In other words, if *either* the caller or callee is interpreted, we can
738 // get the stack pointer repaired after a call.
739 // This is why c2i and i2c adapters cannot be indefinitely composed.
740 // In particular, if a c2i adapter were to somehow call an i2c adapter,
741 // both caller and callee would be compiled methods, and neither would
742 // clean up the stack pointer changes performed by the two adapters.
743 // If this happens, control eventually transfers back to the compiled
744 // caller, but with an uncorrected stack, causing delayed havoc.
793 }
794
795 // Align the outgoing SP
796 __ andptr(rsp, -(StackAlignmentInBytes));
797
798 // push the return address on the stack (note that pushing, rather
799 // than storing it, yields the correct frame alignment for the callee)
800 __ push(rax);
801
802 // Put saved SP in another register
803 const Register saved_sp = rax;
804 __ movptr(saved_sp, rdi);
805
806
807 // Will jump to the compiled code just as if compiled code was doing it.
808 // Pre-load the register-jump target early, to schedule it better.
809 __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
810
811 // Now generate the shuffle code. Pick up all register args and move the
812 // rest through the floating point stack top.
813 for (int i = 0; i < total_args_passed; i++) {
814 if (sig_bt[i] == T_VOID) {
815 // Longs and doubles are passed in native word order, but misaligned
816 // in the 32-bit build.
817 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
818 continue;
819 }
820
821 // Pick up 0, 1 or 2 words from SP+offset.
822
823 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
824 "scrambled load targets?");
825 // Load in argument order going down.
826 int ld_off = (total_args_passed - i) * Interpreter::stackElementSize;
827 // Point to interpreter value (vs. tag)
828 int next_off = ld_off - Interpreter::stackElementSize;
829 //
830 //
831 //
832 VMReg r_1 = regs[i].first();
833 VMReg r_2 = regs[i].second();
834 if (!r_1->is_valid()) {
835 assert(!r_2->is_valid(), "");
836 continue;
837 }
838 if (r_1->is_stack()) {
839 // Convert stack slot to an SP offset (+ wordSize to account for return address )
840 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
841
842 // We can use rsi as a temp here because compiled code doesn't need rsi as an input
843 // and if we end up going thru a c2i because of a miss a reasonable value of rsi
844 // we be generated.
845 if (!r_2->is_valid()) {
846 // __ fld_s(Address(saved_sp, ld_off));
847 // __ fstp_s(Address(rsp, st_off));
848 __ movl(rsi, Address(saved_sp, ld_off));
849 __ movptr(Address(rsp, st_off), rsi);
850 } else {
851 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
852 // are accessed as negative so LSW is at LOW address
853
854 // ld_off is MSW so get LSW
855 // st_off is LSW (i.e. reg.first())
856 // __ fld_d(Address(saved_sp, next_off));
857 // __ fstp_d(Address(rsp, st_off));
858 //
859 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
860 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
861 // So we must adjust where to pick up the data to match the interpreter.
862 //
863 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
864 // are accessed as negative so LSW is at LOW address
865
866 // ld_off is MSW so get LSW
867 const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
868 next_off : ld_off;
869 __ movptr(rsi, Address(saved_sp, offset));
870 __ movptr(Address(rsp, st_off), rsi);
871 #ifndef _LP64
872 __ movptr(rsi, Address(saved_sp, ld_off));
873 __ movptr(Address(rsp, st_off + wordSize), rsi);
874 #endif // _LP64
875 }
876 } else if (r_1->is_Register()) { // Register argument
877 Register r = r_1->as_Register();
878 assert(r != rax, "must be different");
879 if (r_2->is_valid()) {
880 //
881 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
882 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
883 // So we must adjust where to pick up the data to match the interpreter.
884
885 const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
886 next_off : ld_off;
887
888 // this can be a misaligned move
889 __ movptr(r, Address(saved_sp, offset));
890 #ifndef _LP64
891 assert(r_2->as_Register() != rax, "need another temporary register");
892 // Remember r_1 is low address (and LSB on x86)
893 // So r_2 gets loaded from high address regardless of the platform
894 __ movptr(r_2->as_Register(), Address(saved_sp, ld_off));
895 #endif // _LP64
896 } else {
897 __ movl(r, Address(saved_sp, ld_off));
898 }
899 } else {
900 assert(r_1->is_XMMRegister(), "");
901 if (!r_2->is_valid()) {
902 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
903 } else {
904 move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
905 }
913 // "compiled" so it is much better to make this transition
914 // invisible to the stack walking code. Unfortunately if
915 // we try and find the callee by normal means a safepoint
916 // is possible. So we stash the desired callee in the thread
917 // and the vm will find there should this case occur.
918
919 __ get_thread(rax);
920 __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
921
922 // move Method* to rax, in case we end up in an c2i adapter.
923 // the c2i adapters expect Method* in rax, (c2) because c2's
924 // resolve stubs return the result (the method) in rax,.
925 // I'd love to fix this.
926 __ mov(rax, rbx);
927
928 __ jmp(rdi);
929 }
930
931 // ---------------------------------------------------------------
932 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
933 int total_args_passed,
934 int comp_args_on_stack,
935 const BasicType *sig_bt,
936 const VMRegPair *regs,
937 AdapterFingerPrint* fingerprint) {
938 address i2c_entry = __ pc();
939
940 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
941
942 // -------------------------------------------------------------------------
943 // Generate a C2I adapter. On entry we know rbx, holds the Method* during calls
944 // to the interpreter. The args start out packed in the compiled layout. They
945 // need to be unpacked into the interpreter layout. This will almost always
946 // require some stack space. We grow the current (compiled) stack, then repack
947 // the args. We finally end in a jump to the generic interpreter entry point.
948 // On exit from the interpreter, the interpreter will restore our SP (lest the
949 // compiled code, which relys solely on SP and not EBP, get sick).
950
951 address c2i_unverified_entry = __ pc();
952 Label skip_fixup;
953
954 Register holder = rax;
955 Register receiver = rcx;
956 Register temp = rbx;
957
958 {
959
960 Label missed;
961 __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
962 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
963 __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
964 __ jcc(Assembler::notEqual, missed);
965 // Method might have been compiled since the call site was patched to
966 // interpreted if that is the case treat it as a miss so we can get
967 // the call site corrected.
968 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
969 __ jcc(Assembler::equal, skip_fixup);
970
971 __ bind(missed);
972 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
973 }
974
975 address c2i_entry = __ pc();
976
977 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
978
979 __ flush();
980 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
981 }
982
983 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
984 VMRegPair *regs,
985 VMRegPair *regs2,
986 int total_args_passed) {
987 assert(regs2 == NULL, "not needed on x86");
988 // We return the amount of VMRegImpl stack slots we need to reserve for all
989 // the arguments NOT counting out_preserve_stack_slots.
990
991 uint stack = 0; // All arguments on stack
992
993 for( int i = 0; i < total_args_passed; i++) {
994 // From the type and the argument number (count) compute the location
995 switch( sig_bt[i] ) {
996 case T_BOOLEAN:
997 case T_CHAR:
998 case T_FLOAT:
999 case T_BYTE:
1000 case T_SHORT:
1001 case T_INT:
1002 case T_OBJECT:
1003 case T_ARRAY:
1004 case T_ADDRESS:
1005 case T_METADATA:
1006 regs[i].set1(VMRegImpl::stack2reg(stack++));
1007 break;
1008 case T_LONG:
1009 case T_DOUBLE: // The stack numbering is reversed from Java
1010 // Since C arguments do not get reversed, the ordering for
1011 // doubles on the stack must be opposite the Java convention
1012 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
1013 regs[i].set2(VMRegImpl::stack2reg(stack));
1014 stack += 2;
1015 break;
1016 case T_VOID: regs[i].set_bad(); break;
1017 default:
1018 ShouldNotReachHere();
1019 break;
1020 }
1021 }
1022 return stack;
1264 switch (in_sig_bt[i]) {
1265 case T_ARRAY:
1266 if (map != NULL) {
1267 __ movptr(Address(rsp, offset), reg);
1268 } else {
1269 __ movptr(reg, Address(rsp, offset));
1270 }
1271 break;
1272 case T_BOOLEAN:
1273 case T_CHAR:
1274 case T_BYTE:
1275 case T_SHORT:
1276 case T_INT:
1277 if (map != NULL) {
1278 __ movl(Address(rsp, offset), reg);
1279 } else {
1280 __ movl(reg, Address(rsp, offset));
1281 }
1282 break;
1283 case T_OBJECT:
1284 default: ShouldNotReachHere();
1285 }
1286 } else if (in_regs[i].first()->is_XMMRegister()) {
1287 if (in_sig_bt[i] == T_FLOAT) {
1288 int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
1289 int offset = slot * VMRegImpl::stack_slot_size;
1290 assert(handle_index <= stack_slots, "overflow");
1291 if (map != NULL) {
1292 __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
1293 } else {
1294 __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
1295 }
1296 }
1297 } else if (in_regs[i].first()->is_stack()) {
1298 if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1299 int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1300 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1301 }
1302 }
1303 }
1400 __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
1401 arrayOopDesc::base_offset_in_bytes(in_elem_type)));
1402 simple_move32(masm, tmp, length_arg);
1403 __ jmpb(done);
1404 __ bind(is_null);
1405 // Pass zeros
1406 __ xorptr(tmp_reg, tmp_reg);
1407 simple_move32(masm, tmp, body_arg);
1408 simple_move32(masm, tmp, length_arg);
1409 __ bind(done);
1410 }
1411
1412 static void verify_oop_args(MacroAssembler* masm,
1413 const methodHandle& method,
1414 const BasicType* sig_bt,
1415 const VMRegPair* regs) {
1416 Register temp_reg = rbx; // not part of any compiled calling seq
1417 if (VerifyOops) {
1418 for (int i = 0; i < method->size_of_parameters(); i++) {
1419 if (sig_bt[i] == T_OBJECT ||
1420 sig_bt[i] == T_ARRAY) {
1421 VMReg r = regs[i].first();
1422 assert(r->is_valid(), "bad oop arg");
1423 if (r->is_stack()) {
1424 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1425 __ verify_oop(temp_reg);
1426 } else {
1427 __ verify_oop(r->as_Register());
1428 }
1429 }
1430 }
1431 }
1432 }
1433
1434 static void gen_special_dispatch(MacroAssembler* masm,
1435 const methodHandle& method,
1436 const BasicType* sig_bt,
1437 const VMRegPair* regs) {
1438 verify_oop_args(masm, method, sig_bt, regs);
1439 vmIntrinsics::ID iid = method->intrinsic_id();
1440
1873 // sure we can capture all the incoming oop args from the
1874 // caller.
1875 //
1876 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1877
1878 // Mark location of rbp,
1879 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1880
1881 // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1882 // Are free to temporaries if we have to do stack to steck moves.
1883 // All inbound args are referenced based on rbp, and all outbound args via rsp.
1884
1885 for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1886 switch (in_sig_bt[i]) {
1887 case T_ARRAY:
1888 if (is_critical_native) {
1889 unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1890 c_arg++;
1891 break;
1892 }
1893 case T_OBJECT:
1894 assert(!is_critical_native, "no oop arguments");
1895 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1896 ((i == 0) && (!is_static)),
1897 &receiver_offset);
1898 break;
1899 case T_VOID:
1900 break;
1901
1902 case T_FLOAT:
1903 float_move(masm, in_regs[i], out_regs[c_arg]);
1904 break;
1905
1906 case T_DOUBLE:
1907 assert( i + 1 < total_in_args &&
1908 in_sig_bt[i + 1] == T_VOID &&
1909 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1910 double_move(masm, in_regs[i], out_regs[c_arg]);
1911 break;
1912
2055 // Verify or restore cpu control state after JNI call
2056 __ restore_cpu_control_state_after_jni();
2057
2058 // WARNING - on Windows Java Natives use pascal calling convention and pop the
2059 // arguments off of the stack. We could just re-adjust the stack pointer here
2060 // and continue to do SP relative addressing but we instead switch to FP
2061 // relative addressing.
2062
2063 // Unpack native results.
2064 switch (ret_type) {
2065 case T_BOOLEAN: __ c2bool(rax); break;
2066 case T_CHAR : __ andptr(rax, 0xFFFF); break;
2067 case T_BYTE : __ sign_extend_byte (rax); break;
2068 case T_SHORT : __ sign_extend_short(rax); break;
2069 case T_INT : /* nothing to do */ break;
2070 case T_DOUBLE :
2071 case T_FLOAT :
2072 // Result is in st0 we'll save as needed
2073 break;
2074 case T_ARRAY: // Really a handle
2075 case T_OBJECT: // Really a handle
2076 break; // can't de-handlize until after safepoint check
2077 case T_VOID: break;
2078 case T_LONG: break;
2079 default : ShouldNotReachHere();
2080 }
2081
2082 // Switch thread to "native transition" state before reading the synchronization state.
2083 // This additional state is necessary because reading and testing the synchronization
2084 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2085 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2086 // VM thread changes sync state to synchronizing and suspends threads for GC.
2087 // Thread A is resumed to finish this native method, but doesn't block here since it
2088 // didn't see any synchronization is progress, and escapes.
2089 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2090
2091 // Force this write out before the read below
2092 __ membar(Assembler::Membar_mask_bits(
2093 Assembler::LoadLoad | Assembler::LoadStore |
2094 Assembler::StoreLoad | Assembler::StoreStore));
2201 __ bind(done);
2202
2203 }
2204
2205 {
2206 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2207 // Tell dtrace about this method exit
2208 save_native_result(masm, ret_type, stack_slots);
2209 __ mov_metadata(rax, method());
2210 __ call_VM_leaf(
2211 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2212 thread, rax);
2213 restore_native_result(masm, ret_type, stack_slots);
2214 }
2215
2216 // We can finally stop using that last_Java_frame we setup ages ago
2217
2218 __ reset_last_Java_frame(thread, false);
2219
2220 // Unbox oop result, e.g. JNIHandles::resolve value.
2221 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2222 __ resolve_jobject(rax /* value */,
2223 thread /* thread */,
2224 rcx /* tmp */);
2225 }
2226
2227 if (CheckJNICalls) {
2228 // clear_pending_jni_exception_check
2229 __ movptr(Address(thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD);
2230 }
2231
2232 if (!is_critical_native) {
2233 // reset handle block
2234 __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
2235 __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
2236
2237 // Any exception pending?
2238 __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2239 __ jcc(Assembler::notEqual, exception_pending);
2240 }
2241
3152 // Pending exception after the safepoint
3153
3154 __ bind(pending);
3155
3156 RegisterSaver::restore_live_registers(masm);
3157
3158 // exception pending => remove activation and forward to exception handler
3159
3160 __ get_thread(thread);
3161 __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
3162 __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
3163 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3164
3165 // -------------
3166 // make sure all code is generated
3167 masm->flush();
3168
3169 // return the blob
3170 // frame_size_words or bytes??
3171 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
3172 }
|
446 else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
447 else // Else double is passed low on the stack to be aligned.
448 stack += 2;
449 } else if( sig_bt[i] == T_LONG ) {
450 stack += 2;
451 }
452 }
453 int dstack = 0; // Separate counter for placing doubles
454
455 // Now pick where all else goes.
456 for( i = 0; i < total_args_passed; i++) {
457 // From the type and the argument number (count) compute the location
458 switch( sig_bt[i] ) {
459 case T_SHORT:
460 case T_CHAR:
461 case T_BYTE:
462 case T_BOOLEAN:
463 case T_INT:
464 case T_ARRAY:
465 case T_OBJECT:
466 case T_VALUETYPE:
467 case T_ADDRESS:
468 if( reg_arg0 == 9999 ) {
469 reg_arg0 = i;
470 regs[i].set1(rcx->as_VMReg());
471 } else if( reg_arg1 == 9999 ) {
472 reg_arg1 = i;
473 regs[i].set1(rdx->as_VMReg());
474 } else {
475 regs[i].set1(VMRegImpl::stack2reg(stack++));
476 }
477 break;
478 case T_FLOAT:
479 if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
480 freg_arg0 = i;
481 regs[i].set1(xmm0->as_VMReg());
482 } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
483 freg_arg1 = i;
484 regs[i].set1(xmm1->as_VMReg());
485 } else {
486 regs[i].set1(VMRegImpl::stack2reg(stack++));
497 regs[i].set2(xmm0->as_VMReg());
498 } else if( freg_arg1 == (uint)i ) {
499 regs[i].set2(xmm1->as_VMReg());
500 } else {
501 regs[i].set2(VMRegImpl::stack2reg(dstack));
502 dstack += 2;
503 }
504 break;
505 case T_VOID: regs[i].set_bad(); break;
506 break;
507 default:
508 ShouldNotReachHere();
509 break;
510 }
511 }
512
513 // return value can be odd number of VMRegImpl stack slots make multiple of 2
514 return align_up(stack, 2);
515 }
516
517 const uint SharedRuntime::java_return_convention_max_int = 1;
518 const uint SharedRuntime::java_return_convention_max_float = 1;
519 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
520 VMRegPair *regs,
521 int total_args_passed) {
522 Unimplemented();
523 return 0;
524 }
525
526 // Patch the callers callsite with entry to compiled code if it exists.
527 static void patch_callers_callsite(MacroAssembler *masm) {
528 Label L;
529 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
530 __ jcc(Assembler::equal, L);
531 // Schedule the branch target address early.
532 // Call into the VM to patch the caller, then jump to compiled callee
533 // rax, isn't live so capture return address while we easily can
534 __ movptr(rax, Address(rsp, 0));
535 __ pusha();
536 __ pushf();
537
538 if (UseSSE == 1) {
539 __ subptr(rsp, 2*wordSize);
540 __ movflt(Address(rsp, 0), xmm0);
541 __ movflt(Address(rsp, wordSize), xmm1);
542 }
543 if (UseSSE >= 2) {
544 __ subptr(rsp, 4*wordSize);
545 __ movdbl(Address(rsp, 0), xmm0);
567 __ addptr(rsp, 2*wordSize);
568 }
569 if (UseSSE >= 2) {
570 __ movdbl(xmm0, Address(rsp, 0));
571 __ movdbl(xmm1, Address(rsp, 2*wordSize));
572 __ addptr(rsp, 4*wordSize);
573 }
574
575 __ popf();
576 __ popa();
577 __ bind(L);
578 }
579
580
581 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
582 int next_off = st_off - Interpreter::stackElementSize;
583 __ movdbl(Address(rsp, next_off), r);
584 }
585
586 static void gen_c2i_adapter(MacroAssembler *masm,
587 const GrowableArray<SigEntry>& sig_extended,
588 const VMRegPair *regs,
589 Label& skip_fixup,
590 address start,
591 OopMapSet*& oop_maps,
592 int& frame_complete,
593 int& frame_size_in_words) {
594 // Before we get into the guts of the C2I adapter, see if we should be here
595 // at all. We've come from compiled code and are attempting to jump to the
596 // interpreter, which means the caller made a static call to get here
597 // (vcalls always get a compiled target if there is one). Check for a
598 // compiled target. If there is one, we need to patch the caller's call.
599 patch_callers_callsite(masm);
600
601 __ bind(skip_fixup);
602
603 #ifdef COMPILER2
604 // C2 may leave the stack dirty if not in SSE2+ mode
605 if (UseSSE >= 2) {
606 __ verify_FPU(0, "c2i transition should have clean FPU stack");
607 } else {
608 __ empty_FPU_stack();
609 }
610 #endif /* COMPILER2 */
611
612 // Since all args are passed on the stack, total_args_passed * interpreter_
613 // stack_element_size is the
614 // space we need.
615 int extraspace = sig_extended.length() * Interpreter::stackElementSize;
616
617 // Get return address
618 __ pop(rax);
619
620 // set senderSP value
621 __ movptr(rsi, rsp);
622
623 __ subptr(rsp, extraspace);
624
625 // Now write the args into the outgoing interpreter space
626 for (int i = 0; i < sig_extended.length(); i++) {
627 if (sig_extended.at(i)._bt == T_VOID) {
628 assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
629 continue;
630 }
631
632 // st_off points to lowest address on stack.
633 int st_off = ((sig_extended.length() - 1) - i) * Interpreter::stackElementSize;
634 int next_off = st_off - Interpreter::stackElementSize;
635
636 // Say 4 args:
637 // i st_off
638 // 0 12 T_LONG
639 // 1 8 T_VOID
640 // 2 4 T_OBJECT
641 // 3 0 T_BOOL
642 VMReg r_1 = regs[i].first();
643 VMReg r_2 = regs[i].second();
644 if (!r_1->is_valid()) {
645 assert(!r_2->is_valid(), "");
646 continue;
647 }
648
649 if (r_1->is_stack()) {
650 // memory to memory use fpu stack top
651 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
652
653 if (!r_2->is_valid()) {
663 #ifndef _LP64
664 __ movptr(rdi, Address(rsp, ld_off + wordSize));
665 __ movptr(Address(rsp, st_off), rdi);
666 #else
667 #ifdef ASSERT
668 // Overwrite the unused slot with known junk
669 __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
670 __ movptr(Address(rsp, st_off), rax);
671 #endif /* ASSERT */
672 #endif // _LP64
673 }
674 } else if (r_1->is_Register()) {
675 Register r = r_1->as_Register();
676 if (!r_2->is_valid()) {
677 __ movl(Address(rsp, st_off), r);
678 } else {
679 // long/double in gpr
680 NOT_LP64(ShouldNotReachHere());
681 // Two VMRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
682 // T_DOUBLE and T_LONG use two slots in the interpreter
683 if (sig_extended.at(i)._bt == T_LONG || sig_extended.at(i)._bt == T_DOUBLE) {
684 // long/double in gpr
685 #ifdef ASSERT
686 // Overwrite the unused slot with known junk
687 LP64_ONLY(__ mov64(rax, CONST64(0xdeadffffdeadaaab)));
688 __ movptr(Address(rsp, st_off), rax);
689 #endif /* ASSERT */
690 __ movptr(Address(rsp, next_off), r);
691 } else {
692 __ movptr(Address(rsp, st_off), r);
693 }
694 }
695 } else {
696 assert(r_1->is_XMMRegister(), "");
697 if (!r_2->is_valid()) {
698 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
699 } else {
700 assert(sig_extended.at(i)._bt == T_DOUBLE || sig_extended.at(i)._bt == T_LONG, "wrong type");
701 move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
702 }
703 }
704 }
705
706 // Schedule the branch target address early.
707 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
708 // And repush original return address
709 __ push(rax);
710 __ jmp(rcx);
711 }
712
713
714 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
715 int next_val_off = ld_off - Interpreter::stackElementSize;
716 __ movdbl(r, Address(saved_sp, next_val_off));
717 }
718
719 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
720 address code_start, address code_end,
721 Label& L_ok) {
722 Label L_fail;
723 __ lea(temp_reg, ExternalAddress(code_start));
724 __ cmpptr(pc_reg, temp_reg);
725 __ jcc(Assembler::belowEqual, L_fail);
726 __ lea(temp_reg, ExternalAddress(code_end));
727 __ cmpptr(pc_reg, temp_reg);
728 __ jcc(Assembler::below, L_ok);
729 __ bind(L_fail);
730 }
731
732 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
733 int comp_args_on_stack,
734 const GrowableArray<SigEntry>& sig_extended,
735 const VMRegPair *regs) {
736
737 // Note: rsi contains the senderSP on entry. We must preserve it since
738 // we may do a i2c -> c2i transition if we lose a race where compiled
739 // code goes non-entrant while we get args ready.
740
741 // Adapters can be frameless because they do not require the caller
742 // to perform additional cleanup work, such as correcting the stack pointer.
743 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
744 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
745 // even if a callee has modified the stack pointer.
746 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
747 // routinely repairs its caller's stack pointer (from sender_sp, which is set
748 // up via the senderSP register).
749 // In other words, if *either* the caller or callee is interpreted, we can
750 // get the stack pointer repaired after a call.
751 // This is why c2i and i2c adapters cannot be indefinitely composed.
752 // In particular, if a c2i adapter were to somehow call an i2c adapter,
753 // both caller and callee would be compiled methods, and neither would
754 // clean up the stack pointer changes performed by the two adapters.
755 // If this happens, control eventually transfers back to the compiled
756 // caller, but with an uncorrected stack, causing delayed havoc.
805 }
806
807 // Align the outgoing SP
808 __ andptr(rsp, -(StackAlignmentInBytes));
809
810 // push the return address on the stack (note that pushing, rather
811 // than storing it, yields the correct frame alignment for the callee)
812 __ push(rax);
813
814 // Put saved SP in another register
815 const Register saved_sp = rax;
816 __ movptr(saved_sp, rdi);
817
818
819 // Will jump to the compiled code just as if compiled code was doing it.
820 // Pre-load the register-jump target early, to schedule it better.
821 __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
822
823 // Now generate the shuffle code. Pick up all register args and move the
824 // rest through the floating point stack top.
825 for (int i = 0; i < sig_extended.length(); i++) {
826 if (sig_extended.at(i)._bt == T_VOID) {
827 // Longs and doubles are passed in native word order, but misaligned
828 // in the 32-bit build.
829 assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
830 continue;
831 }
832
833 // Pick up 0, 1 or 2 words from SP+offset.
834
835 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
836 "scrambled load targets?");
837 // Load in argument order going down.
838 int ld_off = (sig_extended.length() - i) * Interpreter::stackElementSize;
839 // Point to interpreter value (vs. tag)
840 int next_off = ld_off - Interpreter::stackElementSize;
841 //
842 //
843 //
844 VMReg r_1 = regs[i].first();
845 VMReg r_2 = regs[i].second();
846 if (!r_1->is_valid()) {
847 assert(!r_2->is_valid(), "");
848 continue;
849 }
850 if (r_1->is_stack()) {
851 // Convert stack slot to an SP offset (+ wordSize to account for return address )
852 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
853
854 // We can use rsi as a temp here because compiled code doesn't need rsi as an input
855 // and if we end up going thru a c2i because of a miss a reasonable value of rsi
856 // we be generated.
857 if (!r_2->is_valid()) {
858 // __ fld_s(Address(saved_sp, ld_off));
859 // __ fstp_s(Address(rsp, st_off));
860 __ movl(rsi, Address(saved_sp, ld_off));
861 __ movptr(Address(rsp, st_off), rsi);
862 } else {
863 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
864 // are accessed as negative so LSW is at LOW address
865
866 // ld_off is MSW so get LSW
867 // st_off is LSW (i.e. reg.first())
868 // __ fld_d(Address(saved_sp, next_off));
869 // __ fstp_d(Address(rsp, st_off));
870 //
871 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
872 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
873 // So we must adjust where to pick up the data to match the interpreter.
874 //
875 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
876 // are accessed as negative so LSW is at LOW address
877
878 // ld_off is MSW so get LSW
879 const int offset = (NOT_LP64(true ||) sig_extended.at(i)._bt==T_LONG||sig_extended.at(i)._bt==T_DOUBLE)?
880 next_off : ld_off;
881 __ movptr(rsi, Address(saved_sp, offset));
882 __ movptr(Address(rsp, st_off), rsi);
883 #ifndef _LP64
884 __ movptr(rsi, Address(saved_sp, ld_off));
885 __ movptr(Address(rsp, st_off + wordSize), rsi);
886 #endif // _LP64
887 }
888 } else if (r_1->is_Register()) { // Register argument
889 Register r = r_1->as_Register();
890 assert(r != rax, "must be different");
891 if (r_2->is_valid()) {
892 //
893 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
894 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
895 // So we must adjust where to pick up the data to match the interpreter.
896
897 const int offset = (NOT_LP64(true ||) sig_extended.at(i)._bt==T_LONG||sig_extended.at(i)._bt==T_DOUBLE)?
898 next_off : ld_off;
899
900 // this can be a misaligned move
901 __ movptr(r, Address(saved_sp, offset));
902 #ifndef _LP64
903 assert(r_2->as_Register() != rax, "need another temporary register");
904 // Remember r_1 is low address (and LSB on x86)
905 // So r_2 gets loaded from high address regardless of the platform
906 __ movptr(r_2->as_Register(), Address(saved_sp, ld_off));
907 #endif // _LP64
908 } else {
909 __ movl(r, Address(saved_sp, ld_off));
910 }
911 } else {
912 assert(r_1->is_XMMRegister(), "");
913 if (!r_2->is_valid()) {
914 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
915 } else {
916 move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
917 }
925 // "compiled" so it is much better to make this transition
926 // invisible to the stack walking code. Unfortunately if
927 // we try and find the callee by normal means a safepoint
928 // is possible. So we stash the desired callee in the thread
929 // and the vm will find there should this case occur.
930
931 __ get_thread(rax);
932 __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
933
934 // move Method* to rax, in case we end up in an c2i adapter.
935 // the c2i adapters expect Method* in rax, (c2) because c2's
936 // resolve stubs return the result (the method) in rax,.
937 // I'd love to fix this.
938 __ mov(rax, rbx);
939
940 __ jmp(rdi);
941 }
942
943 // ---------------------------------------------------------------
944 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
945 int comp_args_on_stack,
946 const GrowableArray<SigEntry>& sig_extended,
947 const VMRegPair *regs,
948 AdapterFingerPrint* fingerprint,
949 AdapterBlob*& new_adapter) {
950 address i2c_entry = __ pc();
951
952 gen_i2c_adapter(masm, comp_args_on_stack, sig_extended, regs);
953
954 // -------------------------------------------------------------------------
955 // Generate a C2I adapter. On entry we know rbx, holds the Method* during calls
956 // to the interpreter. The args start out packed in the compiled layout. They
957 // need to be unpacked into the interpreter layout. This will almost always
958 // require some stack space. We grow the current (compiled) stack, then repack
959 // the args. We finally end in a jump to the generic interpreter entry point.
960 // On exit from the interpreter, the interpreter will restore our SP (lest the
961 // compiled code, which relys solely on SP and not EBP, get sick).
962
963 address c2i_unverified_entry = __ pc();
964 Label skip_fixup;
965
966 Register holder = rax;
967 Register receiver = rcx;
968 Register temp = rbx;
969
970 {
971
972 Label missed;
973 __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
974 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
975 __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
976 __ jcc(Assembler::notEqual, missed);
977 // Method might have been compiled since the call site was patched to
978 // interpreted if that is the case treat it as a miss so we can get
979 // the call site corrected.
980 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
981 __ jcc(Assembler::equal, skip_fixup);
982
983 __ bind(missed);
984 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
985 }
986
987 address c2i_entry = __ pc();
988
989 OopMapSet* oop_maps = NULL;
990 int frame_complete = CodeOffsets::frame_never_safe;
991 int frame_size_in_words = 0;
992 gen_c2i_adapter(masm, sig_extended, regs, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words);
993
994 __ flush();
995 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps);
996 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
997 }
998
999 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1000 VMRegPair *regs,
1001 VMRegPair *regs2,
1002 int total_args_passed) {
1003 assert(regs2 == NULL, "not needed on x86");
1004 // We return the amount of VMRegImpl stack slots we need to reserve for all
1005 // the arguments NOT counting out_preserve_stack_slots.
1006
1007 uint stack = 0; // All arguments on stack
1008
1009 for( int i = 0; i < total_args_passed; i++) {
1010 // From the type and the argument number (count) compute the location
1011 switch( sig_bt[i] ) {
1012 case T_BOOLEAN:
1013 case T_CHAR:
1014 case T_FLOAT:
1015 case T_BYTE:
1016 case T_SHORT:
1017 case T_INT:
1018 case T_OBJECT:
1019 case T_VALUETYPE:
1020 case T_ARRAY:
1021 case T_ADDRESS:
1022 case T_METADATA:
1023 regs[i].set1(VMRegImpl::stack2reg(stack++));
1024 break;
1025 case T_LONG:
1026 case T_DOUBLE: // The stack numbering is reversed from Java
1027 // Since C arguments do not get reversed, the ordering for
1028 // doubles on the stack must be opposite the Java convention
1029 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
1030 regs[i].set2(VMRegImpl::stack2reg(stack));
1031 stack += 2;
1032 break;
1033 case T_VOID: regs[i].set_bad(); break;
1034 default:
1035 ShouldNotReachHere();
1036 break;
1037 }
1038 }
1039 return stack;
1281 switch (in_sig_bt[i]) {
1282 case T_ARRAY:
1283 if (map != NULL) {
1284 __ movptr(Address(rsp, offset), reg);
1285 } else {
1286 __ movptr(reg, Address(rsp, offset));
1287 }
1288 break;
1289 case T_BOOLEAN:
1290 case T_CHAR:
1291 case T_BYTE:
1292 case T_SHORT:
1293 case T_INT:
1294 if (map != NULL) {
1295 __ movl(Address(rsp, offset), reg);
1296 } else {
1297 __ movl(reg, Address(rsp, offset));
1298 }
1299 break;
1300 case T_OBJECT:
1301 case T_VALUETYPE:
1302 default: ShouldNotReachHere();
1303 }
1304 } else if (in_regs[i].first()->is_XMMRegister()) {
1305 if (in_sig_bt[i] == T_FLOAT) {
1306 int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
1307 int offset = slot * VMRegImpl::stack_slot_size;
1308 assert(handle_index <= stack_slots, "overflow");
1309 if (map != NULL) {
1310 __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
1311 } else {
1312 __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
1313 }
1314 }
1315 } else if (in_regs[i].first()->is_stack()) {
1316 if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1317 int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1318 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1319 }
1320 }
1321 }
1418 __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
1419 arrayOopDesc::base_offset_in_bytes(in_elem_type)));
1420 simple_move32(masm, tmp, length_arg);
1421 __ jmpb(done);
1422 __ bind(is_null);
1423 // Pass zeros
1424 __ xorptr(tmp_reg, tmp_reg);
1425 simple_move32(masm, tmp, body_arg);
1426 simple_move32(masm, tmp, length_arg);
1427 __ bind(done);
1428 }
1429
1430 static void verify_oop_args(MacroAssembler* masm,
1431 const methodHandle& method,
1432 const BasicType* sig_bt,
1433 const VMRegPair* regs) {
1434 Register temp_reg = rbx; // not part of any compiled calling seq
1435 if (VerifyOops) {
1436 for (int i = 0; i < method->size_of_parameters(); i++) {
1437 if (sig_bt[i] == T_OBJECT ||
1438 sig_bt[i] == T_ARRAY || sig_bt[i] == T_VALUETYPE) {
1439 VMReg r = regs[i].first();
1440 assert(r->is_valid(), "bad oop arg");
1441 if (r->is_stack()) {
1442 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1443 __ verify_oop(temp_reg);
1444 } else {
1445 __ verify_oop(r->as_Register());
1446 }
1447 }
1448 }
1449 }
1450 }
1451
1452 static void gen_special_dispatch(MacroAssembler* masm,
1453 const methodHandle& method,
1454 const BasicType* sig_bt,
1455 const VMRegPair* regs) {
1456 verify_oop_args(masm, method, sig_bt, regs);
1457 vmIntrinsics::ID iid = method->intrinsic_id();
1458
1891 // sure we can capture all the incoming oop args from the
1892 // caller.
1893 //
1894 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1895
1896 // Mark location of rbp,
1897 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1898
1899 // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1900 // Are free to temporaries if we have to do stack to steck moves.
1901 // All inbound args are referenced based on rbp, and all outbound args via rsp.
1902
1903 for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1904 switch (in_sig_bt[i]) {
1905 case T_ARRAY:
1906 if (is_critical_native) {
1907 unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1908 c_arg++;
1909 break;
1910 }
1911 case T_VALUETYPE:
1912 case T_OBJECT:
1913 assert(!is_critical_native, "no oop arguments");
1914 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1915 ((i == 0) && (!is_static)),
1916 &receiver_offset);
1917 break;
1918 case T_VOID:
1919 break;
1920
1921 case T_FLOAT:
1922 float_move(masm, in_regs[i], out_regs[c_arg]);
1923 break;
1924
1925 case T_DOUBLE:
1926 assert( i + 1 < total_in_args &&
1927 in_sig_bt[i + 1] == T_VOID &&
1928 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1929 double_move(masm, in_regs[i], out_regs[c_arg]);
1930 break;
1931
2074 // Verify or restore cpu control state after JNI call
2075 __ restore_cpu_control_state_after_jni();
2076
2077 // WARNING - on Windows Java Natives use pascal calling convention and pop the
2078 // arguments off of the stack. We could just re-adjust the stack pointer here
2079 // and continue to do SP relative addressing but we instead switch to FP
2080 // relative addressing.
2081
2082 // Unpack native results.
2083 switch (ret_type) {
2084 case T_BOOLEAN: __ c2bool(rax); break;
2085 case T_CHAR : __ andptr(rax, 0xFFFF); break;
2086 case T_BYTE : __ sign_extend_byte (rax); break;
2087 case T_SHORT : __ sign_extend_short(rax); break;
2088 case T_INT : /* nothing to do */ break;
2089 case T_DOUBLE :
2090 case T_FLOAT :
2091 // Result is in st0 we'll save as needed
2092 break;
2093 case T_ARRAY: // Really a handle
2094 case T_VALUETYPE: // Really a handle
2095 case T_OBJECT: // Really a handle
2096 break; // can't de-handlize until after safepoint check
2097 case T_VOID: break;
2098 case T_LONG: break;
2099 default : ShouldNotReachHere();
2100 }
2101
2102 // Switch thread to "native transition" state before reading the synchronization state.
2103 // This additional state is necessary because reading and testing the synchronization
2104 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2105 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2106 // VM thread changes sync state to synchronizing and suspends threads for GC.
2107 // Thread A is resumed to finish this native method, but doesn't block here since it
2108 // didn't see any synchronization is progress, and escapes.
2109 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2110
2111 // Force this write out before the read below
2112 __ membar(Assembler::Membar_mask_bits(
2113 Assembler::LoadLoad | Assembler::LoadStore |
2114 Assembler::StoreLoad | Assembler::StoreStore));
2221 __ bind(done);
2222
2223 }
2224
2225 {
2226 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2227 // Tell dtrace about this method exit
2228 save_native_result(masm, ret_type, stack_slots);
2229 __ mov_metadata(rax, method());
2230 __ call_VM_leaf(
2231 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2232 thread, rax);
2233 restore_native_result(masm, ret_type, stack_slots);
2234 }
2235
2236 // We can finally stop using that last_Java_frame we setup ages ago
2237
2238 __ reset_last_Java_frame(thread, false);
2239
2240 // Unbox oop result, e.g. JNIHandles::resolve value.
2241 if (ret_type == T_OBJECT || ret_type == T_ARRAY || ret_type == T_VALUETYPE) {
2242 __ resolve_jobject(rax /* value */,
2243 thread /* thread */,
2244 rcx /* tmp */);
2245 }
2246
2247 if (CheckJNICalls) {
2248 // clear_pending_jni_exception_check
2249 __ movptr(Address(thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD);
2250 }
2251
2252 if (!is_critical_native) {
2253 // reset handle block
2254 __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
2255 __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
2256
2257 // Any exception pending?
2258 __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2259 __ jcc(Assembler::notEqual, exception_pending);
2260 }
2261
3172 // Pending exception after the safepoint
3173
3174 __ bind(pending);
3175
3176 RegisterSaver::restore_live_registers(masm);
3177
3178 // exception pending => remove activation and forward to exception handler
3179
3180 __ get_thread(thread);
3181 __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
3182 __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
3183 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3184
3185 // -------------
3186 // make sure all code is generated
3187 masm->flush();
3188
3189 // return the blob
3190 // frame_size_words or bytes??
3191 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
3192 }
3193
3194 BufferedValueTypeBlob* SharedRuntime::generate_buffered_value_type_adapter(const ValueKlass* vk) {
3195 Unimplemented();
3196 return NULL;
3197 }
|