src/cpu/x86/vm/sharedRuntime_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 6893081 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/sharedRuntime_x86_64.cpp

Print this page
rev 1081 : imported patch indy-cleanup-6893081.patch


 621 
 622   //
 623   // We will only enter here from an interpreted frame and never from after
 624   // passing thru a c2i. Azul allowed this but we do not. If we lose the
 625   // race and use a c2i we will remain interpreted for the race loser(s).
 626   // This removes all sorts of headaches on the x86 side and also eliminates
 627   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
 628 
 629 
 630   // Note: r13 contains the senderSP on entry. We must preserve it since
 631   // we may do a i2c -> c2i transition if we lose a race where compiled
 632   // code goes non-entrant while we get args ready.
 633   // In addition we use r13 to locate all the interpreter args as
 634   // we must align the stack to 16 bytes on an i2c entry else we
 635   // lose alignment we expect in all compiled code and register
 636   // save code can segv when fxsave instructions find improperly
 637   // aligned stack pointer.
 638 
 639   __ movptr(rax, Address(rsp, 0));
 640 




 641   // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
 642   // in registers, we will occasionally have no stack args.
 643   int comp_words_on_stack = 0;
 644   if (comp_args_on_stack) {
 645     // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
 646     // registers are below.  By subtracting stack0, we either get a negative
 647     // number (all values in registers) or the maximum stack slot accessed.
 648 
 649     // Convert 4-byte c2 stack slots to words.
 650     comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
 651     // Round up to miminum stack alignment, in wordSize
 652     comp_words_on_stack = round_to(comp_words_on_stack, 2);
 653     __ subptr(rsp, comp_words_on_stack * wordSize);
 654   }
 655 
 656 
 657   // Ensure compiled code always sees stack at proper alignment
 658   __ andptr(rsp, -16);
 659 
 660   // push the return address and misalign the stack that youngest frame always sees
 661   // as far as the placement of the call instruction
 662   __ push(rax);
 663 




 664   // Will jump to the compiled code just as if compiled code was doing it.
 665   // Pre-load the register-jump target early, to schedule it better.
 666   __ movptr(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
 667 
 668   // Now generate the shuffle code.  Pick up all register args and move the
 669   // rest through the floating point stack top.
 670   for (int i = 0; i < total_args_passed; i++) {
 671     if (sig_bt[i] == T_VOID) {
 672       // Longs and doubles are passed in native word order, but misaligned
 673       // in the 32-bit build.
 674       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 675       continue;
 676     }
 677 
 678     // Pick up 0, 1 or 2 words from SP+offset.
 679 
 680     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 681             "scrambled load targets?");
 682     // Load in argument order going down.
 683     // int ld_off = (total_args_passed + comp_words_on_stack -i)*wordSize;
 684     // base ld_off on r13 (sender_sp) as the stack alignment makes offsets from rsp
 685     // unpredictable
 686     int ld_off = ((total_args_passed - 1) - i)*Interpreter::stackElementSize();
 687 
 688     // Point to interpreter value (vs. tag)
 689     int next_off = ld_off - Interpreter::stackElementSize();
 690     //
 691     //
 692     //
 693     VMReg r_1 = regs[i].first();
 694     VMReg r_2 = regs[i].second();
 695     if (!r_1->is_valid()) {
 696       assert(!r_2->is_valid(), "");
 697       continue;
 698     }
 699     if (r_1->is_stack()) {
 700       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 701       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;




 702       if (!r_2->is_valid()) {
 703         // sign extend???
 704         __ movl(rax, Address(r13, ld_off));
 705         __ movptr(Address(rsp, st_off), rax);
 706       } else {
 707         //
 708         // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 709         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 710         // So we must adjust where to pick up the data to match the interpreter.
 711         //
 712         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 713         // are accessed as negative so LSW is at LOW address
 714 
 715         // ld_off is MSW so get LSW
 716         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 717                            next_off : ld_off;
 718         __ movq(rax, Address(r13, offset));
 719         // st_off is LSW (i.e. reg.first())
 720         __ movq(Address(rsp, st_off), rax);
 721       }
 722     } else if (r_1->is_Register()) {  // Register argument
 723       Register r = r_1->as_Register();
 724       assert(r != rax, "must be different");
 725       if (r_2->is_valid()) {
 726         //
 727         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 728         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 729         // So we must adjust where to pick up the data to match the interpreter.
 730 
 731         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 732                            next_off : ld_off;
 733 
 734         // this can be a misaligned move
 735         __ movq(r, Address(r13, offset));
 736       } else {
 737         // sign extend and use a full word?
 738         __ movl(r, Address(r13, ld_off));
 739       }
 740     } else {
 741       if (!r_2->is_valid()) {
 742         __ movflt(r_1->as_XMMRegister(), Address(r13, ld_off));
 743       } else {
 744         __ movdbl(r_1->as_XMMRegister(), Address(r13, next_off));
 745       }
 746     }
 747   }
 748 
 749   // 6243940 We might end up in handle_wrong_method if
 750   // the callee is deoptimized as we race thru here. If that
 751   // happens we don't want to take a safepoint because the
 752   // caller frame will look interpreted and arguments are now
 753   // "compiled" so it is much better to make this transition
 754   // invisible to the stack walking code. Unfortunately if
 755   // we try and find the callee by normal means a safepoint
 756   // is possible. So we stash the desired callee in the thread
 757   // and the vm will find there should this case occur.
 758 
 759   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
 760 
 761   // put methodOop where a c2i would expect should we end up there
 762   // only needed becaus eof c2 resolve stubs return methodOop as a result in
 763   // rax
 764   __ mov(rax, rbx);


3302 
3303   OopMapSet* oop_maps = new OopMapSet();
3304 
3305   oop_maps->add_gc_map( __ pc()-start, new OopMap(SimpleRuntimeFrame::framesize, 0));
3306 
3307   __ reset_last_Java_frame(false, false);
3308 
3309   // Restore callee-saved registers
3310 
3311   // rbp is an implicitly saved callee saved register (i.e. the calling
3312   // convention will save restore it in prolog/epilog) Other than that
3313   // there are no callee save registers no that adapter frames are gone.
3314 
3315   __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
3316 
3317   __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog
3318   __ pop(rdx);                  // No need for exception pc anymore
3319 
3320   // rax: exception handler
3321 




3322   // We have a handler in rax (could be deopt blob).
3323   __ mov(r8, rax);
3324 
3325   // Get the exception oop
3326   __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
3327   // Get the exception pc in case we are deoptimized
3328   __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
3329 #ifdef ASSERT
3330   __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD);
3331   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3332 #endif
3333   // Clear the exception oop so GC no longer processes it as a root.
3334   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3335 
3336   // rax: exception oop
3337   // r8:  exception handler
3338   // rdx: exception pc
3339   // Jump to handler
3340 
3341   __ jmp(r8);


 621 
 622   //
 623   // We will only enter here from an interpreted frame and never from after
 624   // passing thru a c2i. Azul allowed this but we do not. If we lose the
 625   // race and use a c2i we will remain interpreted for the race loser(s).
 626   // This removes all sorts of headaches on the x86 side and also eliminates
 627   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
 628 
 629 
 630   // Note: r13 contains the senderSP on entry. We must preserve it since
 631   // we may do a i2c -> c2i transition if we lose a race where compiled
 632   // code goes non-entrant while we get args ready.
 633   // In addition we use r13 to locate all the interpreter args as
 634   // we must align the stack to 16 bytes on an i2c entry else we
 635   // lose alignment we expect in all compiled code and register
 636   // save code can segv when fxsave instructions find improperly
 637   // aligned stack pointer.
 638 
 639   __ movptr(rax, Address(rsp, 0));
 640 
 641   // Must preserve original SP for loading incoming arguments because
 642   // we need to align the outgoing SP for compiled code.
 643   __ movptr(r11, rsp);
 644 
 645   // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
 646   // in registers, we will occasionally have no stack args.
 647   int comp_words_on_stack = 0;
 648   if (comp_args_on_stack) {
 649     // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
 650     // registers are below.  By subtracting stack0, we either get a negative
 651     // number (all values in registers) or the maximum stack slot accessed.
 652 
 653     // Convert 4-byte c2 stack slots to words.
 654     comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
 655     // Round up to miminum stack alignment, in wordSize
 656     comp_words_on_stack = round_to(comp_words_on_stack, 2);
 657     __ subptr(rsp, comp_words_on_stack * wordSize);
 658   }
 659 
 660 
 661   // Ensure compiled code always sees stack at proper alignment
 662   __ andptr(rsp, -16);
 663 
 664   // push the return address and misalign the stack that youngest frame always sees
 665   // as far as the placement of the call instruction
 666   __ push(rax);
 667 
 668   // Put saved SP in another register
 669   const Register saved_sp = rax;
 670   __ movptr(saved_sp, r11);
 671 
 672   // Will jump to the compiled code just as if compiled code was doing it.
 673   // Pre-load the register-jump target early, to schedule it better.
 674   __ movptr(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
 675 
 676   // Now generate the shuffle code.  Pick up all register args and move the
 677   // rest through the floating point stack top.
 678   for (int i = 0; i < total_args_passed; i++) {
 679     if (sig_bt[i] == T_VOID) {
 680       // Longs and doubles are passed in native word order, but misaligned
 681       // in the 32-bit build.
 682       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 683       continue;
 684     }
 685 
 686     // Pick up 0, 1 or 2 words from SP+offset.
 687 
 688     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 689             "scrambled load targets?");
 690     // Load in argument order going down.
 691     int ld_off = (total_args_passed - i)*Interpreter::stackElementSize() + Interpreter::value_offset_in_bytes();




 692     // Point to interpreter value (vs. tag)
 693     int next_off = ld_off - Interpreter::stackElementSize();
 694     //
 695     //
 696     //
 697     VMReg r_1 = regs[i].first();
 698     VMReg r_2 = regs[i].second();
 699     if (!r_1->is_valid()) {
 700       assert(!r_2->is_valid(), "");
 701       continue;
 702     }
 703     if (r_1->is_stack()) {
 704       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 705       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 706 
 707       // We can use r13 as a temp here because compiled code doesn't need r13 as an input
 708       // and if we end up going thru a c2i because of a miss a reasonable value of r13
 709       // will be generated.
 710       if (!r_2->is_valid()) {
 711         // sign extend???
 712         __ movl(r13, Address(saved_sp, ld_off));
 713         __ movptr(Address(rsp, st_off), r13);
 714       } else {
 715         //
 716         // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 717         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 718         // So we must adjust where to pick up the data to match the interpreter.
 719         //
 720         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 721         // are accessed as negative so LSW is at LOW address
 722 
 723         // ld_off is MSW so get LSW
 724         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 725                            next_off : ld_off;
 726         __ movq(r13, Address(saved_sp, offset));
 727         // st_off is LSW (i.e. reg.first())
 728         __ movq(Address(rsp, st_off), r13);
 729       }
 730     } else if (r_1->is_Register()) {  // Register argument
 731       Register r = r_1->as_Register();
 732       assert(r != rax, "must be different");
 733       if (r_2->is_valid()) {
 734         //
 735         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 736         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 737         // So we must adjust where to pick up the data to match the interpreter.
 738 
 739         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 740                            next_off : ld_off;
 741 
 742         // this can be a misaligned move
 743         __ movq(r, Address(saved_sp, offset));
 744       } else {
 745         // sign extend and use a full word?
 746         __ movl(r, Address(saved_sp, ld_off));
 747       }
 748     } else {
 749       if (!r_2->is_valid()) {
 750         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
 751       } else {
 752         __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
 753       }
 754     }
 755   }
 756 
 757   // 6243940 We might end up in handle_wrong_method if
 758   // the callee is deoptimized as we race thru here. If that
 759   // happens we don't want to take a safepoint because the
 760   // caller frame will look interpreted and arguments are now
 761   // "compiled" so it is much better to make this transition
 762   // invisible to the stack walking code. Unfortunately if
 763   // we try and find the callee by normal means a safepoint
 764   // is possible. So we stash the desired callee in the thread
 765   // and the vm will find there should this case occur.
 766 
 767   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
 768 
 769   // put methodOop where a c2i would expect should we end up there
 770   // only needed becaus eof c2 resolve stubs return methodOop as a result in
 771   // rax
 772   __ mov(rax, rbx);


3310 
3311   OopMapSet* oop_maps = new OopMapSet();
3312 
3313   oop_maps->add_gc_map( __ pc()-start, new OopMap(SimpleRuntimeFrame::framesize, 0));
3314 
3315   __ reset_last_Java_frame(false, false);
3316 
3317   // Restore callee-saved registers
3318 
3319   // rbp is an implicitly saved callee saved register (i.e. the calling
3320   // convention will save restore it in prolog/epilog) Other than that
3321   // there are no callee save registers no that adapter frames are gone.
3322 
3323   __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
3324 
3325   __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog
3326   __ pop(rdx);                  // No need for exception pc anymore
3327 
3328   // rax: exception handler
3329 
3330   // Restore SP from BP if the exception PC is a MethodHandle call.
3331   __ cmpl(Address(r15_thread, JavaThread::is_method_handle_exception_offset()), 0);
3332   __ cmovptr(Assembler::notEqual, rsp, rbp);
3333 
3334   // We have a handler in rax (could be deopt blob).
3335   __ mov(r8, rax);
3336 
3337   // Get the exception oop
3338   __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
3339   // Get the exception pc in case we are deoptimized
3340   __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
3341 #ifdef ASSERT
3342   __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD);
3343   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3344 #endif
3345   // Clear the exception oop so GC no longer processes it as a root.
3346   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3347 
3348   // rax: exception oop
3349   // r8:  exception handler
3350   // rdx: exception pc
3351   // Jump to handler
3352 
3353   __ jmp(r8);
src/cpu/x86/vm/sharedRuntime_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File