< prev index next >

src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp

Print this page
rev 55751 : 8228369: Shenandoah: Refactor LRB C1 stubs


 719   // itself and from the retry loop.
 720   __ bind(done);
 721   if (!exchange) {
 722     assert(res != NULL, "need result register");
 723 #ifdef _LP64
 724     __ setb(Assembler::equal, res);
 725     __ movzbl(res, res);
 726 #else
 727     // Need something else to clean the result, because some registers
 728     // do not have byte encoding that movzbl wants. Cannot do the xor first,
 729     // because it modifies the flags.
 730     Label res_non_zero;
 731     __ movptr(res, 1);
 732     __ jcc(Assembler::equal, res_non_zero, true);
 733     __ xorptr(res, res);
 734     __ bind(res_non_zero);
 735 #endif
 736   }
 737 }
 738 
 739 void ShenandoahBarrierSetAssembler::save_vector_registers(MacroAssembler* masm) {
 740   int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
 741   if (UseAVX > 2) {
 742     num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
 743   }
 744 
 745   if (UseSSE == 1)  {
 746     __ subptr(rsp, sizeof(jdouble)*8);
 747     for (int n = 0; n < 8; n++) {
 748       __ movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
 749     }
 750   } else if (UseSSE >= 2)  {
 751     if (UseAVX > 2) {
 752       __ push(rbx);
 753       __ movl(rbx, 0xffff);
 754       __ kmovwl(k1, rbx);
 755       __ pop(rbx);
 756     }
 757 #ifdef COMPILER2
 758     if (MaxVectorSize > 16) {
 759       if(UseAVX > 2) {
 760         // Save upper half of ZMM registers
 761         __ subptr(rsp, 32*num_xmm_regs);
 762         for (int n = 0; n < num_xmm_regs; n++) {
 763           __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
 764         }
 765       }
 766       assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
 767       // Save upper half of YMM registers
 768       __ subptr(rsp, 16*num_xmm_regs);
 769       for (int n = 0; n < num_xmm_regs; n++) {
 770         __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
 771       }
 772     }
 773 #endif
 774     // Save whole 128bit (16 bytes) XMM registers
 775     __ subptr(rsp, 16*num_xmm_regs);
 776 #ifdef _LP64
 777     if (VM_Version::supports_evex()) {
 778       for (int n = 0; n < num_xmm_regs; n++) {
 779         __ vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
 780       }
 781     } else {
 782       for (int n = 0; n < num_xmm_regs; n++) {
 783         __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
 784       }
 785     }
 786 #else
 787     for (int n = 0; n < num_xmm_regs; n++) {
 788       __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
 789     }
 790 #endif
 791   }
 792 }
 793 
 794 void ShenandoahBarrierSetAssembler::restore_vector_registers(MacroAssembler* masm) {
 795   int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
 796   if (UseAVX > 2) {
 797     num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
 798   }
 799   if (UseSSE == 1)  {
 800     for (int n = 0; n < 8; n++) {
 801       __ movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
 802     }
 803     __ addptr(rsp, sizeof(jdouble)*8);
 804   } else if (UseSSE >= 2)  {
 805     // Restore whole 128bit (16 bytes) XMM registers
 806 #ifdef _LP64
 807     if (VM_Version::supports_evex()) {
 808       for (int n = 0; n < num_xmm_regs; n++) {
 809         __ vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
 810       }
 811     } else {
 812       for (int n = 0; n < num_xmm_regs; n++) {
 813         __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
 814       }
 815     }
 816 #else
 817     for (int n = 0; n < num_xmm_regs; n++) {
 818       __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
 819     }
 820 #endif
 821     __ addptr(rsp, 16*num_xmm_regs);
 822 
 823 #ifdef COMPILER2
 824     if (MaxVectorSize > 16) {
 825       // Restore upper half of YMM registers.
 826       for (int n = 0; n < num_xmm_regs; n++) {
 827         __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
 828       }
 829       __ addptr(rsp, 16*num_xmm_regs);
 830       if (UseAVX > 2) {
 831         for (int n = 0; n < num_xmm_regs; n++) {
 832           __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
 833         }
 834         __ addptr(rsp, 32*num_xmm_regs);
 835       }
 836     }
 837 #endif
 838   }
 839 }
 840 
 841 #undef __
 842 
 843 #ifdef COMPILER1
 844 
 845 #define __ ce->masm()->
 846 
 847 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
 848   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 849   // At this point we know that marking is in progress.
 850   // If do_load() is true then we have to emit the
 851   // load of the previous value; otherwise it has already
 852   // been loaded into _pre_val.
 853 
 854   __ bind(*stub->entry());
 855   assert(stub->pre_val()->is_register(), "Precondition.");
 856 
 857   Register pre_val_reg = stub->pre_val()->as_register();
 858 
 859   if (stub->do_load()) {
 860     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
 861   }
 862 
 863   __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
 864   __ jcc(Assembler::equal, *stub->continuation());
 865   ce->store_parameter(stub->pre_val()->as_register(), 0);
 866   __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
 867   __ jmp(*stub->continuation());
 868 
 869 }
 870 
 871 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {

 872   __ bind(*stub->entry());
 873 
 874   Label done;
 875   Register obj = stub->obj()->as_register();
 876   Register res = stub->result()->as_register();






 877 
 878   if (res != obj) {
 879     __ mov(res, obj);
 880   }
 881 
 882   // Check for null.
 883   __ testptr(res, res);
 884   __ jcc(Assembler::zero, done);
 885 
 886   load_reference_barrier_not_null(ce->masm(), res);







 887 
 888   __ bind(done);
 889   __ jmp(*stub->continuation());
 890 }
 891 
 892 #undef __
 893 
 894 #define __ sasm->
 895 
 896 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 897   __ prologue("shenandoah_pre_barrier", false);
 898   // arg0 : previous value of memory
 899 
 900   __ push(rax);
 901   __ push(rdx);
 902 
 903   const Register pre_val = rax;
 904   const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
 905   const Register tmp = rdx;
 906 
 907   NOT_LP64(__ get_thread(thread);)
 908 


 932   __ jmp(done);
 933 
 934   __ bind(runtime);
 935 
 936   __ save_live_registers_no_oop_map(true);
 937 
 938   // load the pre-value
 939   __ load_parameter(0, rcx);
 940   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);
 941 
 942   __ restore_live_registers(true);
 943 
 944   __ bind(done);
 945 
 946   __ pop(rdx);
 947   __ pop(rax);
 948 
 949   __ epilogue();
 950 }
 951 












 952 #undef __
 953 
 954 #endif // COMPILER1
 955 
 956 address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
 957   assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
 958   return _shenandoah_lrb;
 959 }
 960 
 961 #define __ cgen->assembler()->
 962 
 963 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
 964   __ align(CodeEntryAlignment);
 965   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
 966   address start = __ pc();
 967 
 968   Label resolve_oop, slow_path;
 969 
 970   // We use RDI, which also serves as argument register for slow call.
 971   // RAX always holds the src object ptr, except after the slow call and
 972   // the cmpxchg, then it holds the result. R8/RBX is used as temporary register.
 973 
 974   Register tmp1 = rdi;
 975   Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
 976 
 977   __ push(tmp1);
 978   __ push(tmp2);
 979 
 980   // Check for object being in the collection set.
 981   // TODO: Can we use only 1 register here?
 982   // The source object arrives here in rax.
 983   // live: rax
 984   // live: tmp1
 985   __ mov(tmp1, rax);
 986   __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 987   // live: tmp2
 988   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 989   __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
 990   // unlive: tmp1
 991   __ testbool(tmp2);
 992   // unlive: tmp2
 993   __ jccb(Assembler::notZero, resolve_oop);
 994 
 995   __ pop(tmp2);
 996   __ pop(tmp1);
 997   __ ret(0);
 998 
 999   __ bind(resolve_oop);
1000 
1001   __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes()));
1002   // Test if both lowest bits are set. We trick it by negating the bits
1003   // then test for both bits clear.
1004   __ notptr(tmp2);
1005   __ testb(tmp2, markOopDesc::marked_value);
1006   __ jccb(Assembler::notZero, slow_path);
1007   // Clear both lower bits. It's still inverted, so set them, and then invert back.
1008   __ orptr(tmp2, markOopDesc::marked_value);
1009   __ notptr(tmp2);
1010   // At this point, tmp2 contains the decoded forwarding pointer.
1011   __ mov(rax, tmp2);
1012 

1013   __ pop(tmp2);
1014   __ pop(tmp1);
1015   __ ret(0);
1016 
1017   __ bind(slow_path);
1018 
1019   __ push(rcx);
1020   __ push(rdx);
1021   __ push(rdi);
1022   __ push(rsi);
1023 #ifdef _LP64
1024   __ push(r8);
1025   __ push(r9);
1026   __ push(r10);
1027   __ push(r11);
1028   __ push(r12);
1029   __ push(r13);
1030   __ push(r14);
1031   __ push(r15);
1032 #endif
1033 
1034   save_vector_registers(cgen->assembler());
1035   __ movptr(rdi, rax);
1036   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rdi);
1037   restore_vector_registers(cgen->assembler());
1038 
1039 #ifdef _LP64
1040   __ pop(r15);
1041   __ pop(r14);
1042   __ pop(r13);
1043   __ pop(r12);
1044   __ pop(r11);
1045   __ pop(r10);
1046   __ pop(r9);
1047   __ pop(r8);
1048 #endif
1049   __ pop(rsi);
1050   __ pop(rdi);
1051   __ pop(rdx);
1052   __ pop(rcx);
1053 
1054   __ pop(tmp2);
1055   __ pop(tmp1);
1056   __ ret(0);
1057 


 719   // itself and from the retry loop.
 720   __ bind(done);
 721   if (!exchange) {
 722     assert(res != NULL, "need result register");
 723 #ifdef _LP64
 724     __ setb(Assembler::equal, res);
 725     __ movzbl(res, res);
 726 #else
 727     // Need something else to clean the result, because some registers
 728     // do not have byte encoding that movzbl wants. Cannot do the xor first,
 729     // because it modifies the flags.
 730     Label res_non_zero;
 731     __ movptr(res, 1);
 732     __ jcc(Assembler::equal, res_non_zero, true);
 733     __ xorptr(res, res);
 734     __ bind(res_non_zero);
 735 #endif
 736   }
 737 }
 738 
 739 // Generate cset check. If obj is not in cset, branch to done label, otherwise fall through
 740 // obj: Register holding the oop, preserved
 741 // tmp1, tmp2: temp registers, trashed
 742 void ShenandoahBarrierSetAssembler::gen_cset_check(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& done) {
 743   // Check for object being in the collection set.
 744   // TODO: Can we use only 1 register here?
 745   // The source object arrives here in rax.
 746   // live: rax
 747   // live: tmp1
 748   __ mov(tmp1, obj);
 749   __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 750   // live: tmp2
 751   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 752   __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
 753   // unlive: tmp1
 754   __ testbool(tmp2);
 755   // unlive: tmp2
 756   __ jcc(Assembler::zero, done);



































 757 }
 758 
 759 // Generate check if object is resolved. Branch to resolved label, if not. Otherwise return resolved
 760 // object in obj register.
 761 // obj: object, resolved object on normal return
 762 // tmp: temp register, trashed
 763 void ShenandoahBarrierSetAssembler::gen_resolved_check(MacroAssembler* masm, Register obj, Register tmp, Label& not_resolved) {
 764   __ movptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
 765   // Test if both lowest bits are set. We trick it by negating the bits
 766   // then test for both bits clear.
 767   __ notptr(tmp);
 768   __ testb(tmp, markOopDesc::marked_value);
 769   __ jccb(Assembler::notZero, not_resolved);
 770   // Clear both lower bits. It's still inverted, so set them, and then invert back.
 771   __ orptr(tmp, markOopDesc::marked_value);
 772   __ notptr(tmp);
 773   // At this point, tmp2 contains the decoded forwarding pointer.
 774   __ mov(obj, tmp);





























 775 }
 776 
 777 #undef __
 778 
 779 #ifdef COMPILER1
 780 
 781 #define __ ce->masm()->
 782 
 783 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
 784   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 785   // At this point we know that marking is in progress.
 786   // If do_load() is true then we have to emit the
 787   // load of the previous value; otherwise it has already
 788   // been loaded into _pre_val.
 789 
 790   __ bind(*stub->entry());
 791   assert(stub->pre_val()->is_register(), "Precondition.");
 792 
 793   Register pre_val_reg = stub->pre_val()->as_register();
 794 
 795   if (stub->do_load()) {
 796     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
 797   }
 798 
 799   __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
 800   __ jcc(Assembler::equal, *stub->continuation());
 801   ce->store_parameter(stub->pre_val()->as_register(), 0);
 802   __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
 803   __ jmp(*stub->continuation());
 804 
 805 }
 806 
 807 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
 808   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 809   __ bind(*stub->entry());
 810 

 811   Register obj = stub->obj()->as_register();
 812   Register res = stub->result()->as_register();
 813   Register tmp1 = stub->tmp1()->as_register();
 814   Register tmp2 = stub->tmp2()->as_register();
 815 
 816   Label slow_path;
 817 
 818   assert(res == rax, "result must arrive in rax");
 819 
 820   if (res != obj) {
 821     __ mov(res, obj);
 822   }
 823 
 824   // Check for null.
 825   __ testptr(res, res);
 826   __ jcc(Assembler::zero, *stub->continuation());
 827 
 828   gen_cset_check(ce->masm(), res, tmp1, tmp2, *stub->continuation());
 829   gen_resolved_check(ce->masm(), rax, tmp1, slow_path);
 830 
 831   __ jmp(*stub->continuation());
 832 
 833   __ bind(slow_path);
 834   ce->store_parameter(res, 0);
 835   __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
 836 

 837   __ jmp(*stub->continuation());
 838 }
 839 
 840 #undef __
 841 
 842 #define __ sasm->
 843 
 844 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 845   __ prologue("shenandoah_pre_barrier", false);
 846   // arg0 : previous value of memory
 847 
 848   __ push(rax);
 849   __ push(rdx);
 850 
 851   const Register pre_val = rax;
 852   const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
 853   const Register tmp = rdx;
 854 
 855   NOT_LP64(__ get_thread(thread);)
 856 


 880   __ jmp(done);
 881 
 882   __ bind(runtime);
 883 
 884   __ save_live_registers_no_oop_map(true);
 885 
 886   // load the pre-value
 887   __ load_parameter(0, rcx);
 888   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);
 889 
 890   __ restore_live_registers(true);
 891 
 892   __ bind(done);
 893 
 894   __ pop(rdx);
 895   __ pop(rax);
 896 
 897   __ epilogue();
 898 }
 899 
 900 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
 901   __ prologue("shenandoah_load_reference_barrier", false);
 902   // arg0 : object to be resolved
 903 
 904   __ save_live_registers_no_oop_map(true);
 905   __ load_parameter(0, c_rarg0);
 906   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0);
 907   __ restore_live_registers_except_rax(true);
 908 
 909   __ epilogue();
 910 }
 911 
 912 #undef __
 913 
 914 #endif // COMPILER1
 915 
 916 address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
 917   assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
 918   return _shenandoah_lrb;
 919 }
 920 
 921 #define __ cgen->assembler()->
 922 
 923 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
 924   __ align(CodeEntryAlignment);
 925   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
 926   address start = __ pc();
 927 
 928   Label resolve_oop, slow_path, done;
 929 
 930   // We use RDI, which also serves as argument register for slow call.
 931   // RAX always holds the src object ptr, except after the slow call,
 932   // then it holds the result. R8/RBX is used as temporary register.
 933 
 934   Register tmp1 = rdi;
 935   Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
 936 
 937   __ push(tmp1);
 938   __ push(tmp2);
 939 
 940   gen_cset_check(cgen->assembler(), rax, tmp1, tmp2, done);

















 941 
 942   __ bind(resolve_oop);
 943 
 944   gen_resolved_check(cgen->assembler(), rax, tmp2, slow_path);










 945 
 946   __ bind(done);
 947   __ pop(tmp2);
 948   __ pop(tmp1);
 949   __ ret(0);
 950 
 951   __ bind(slow_path);
 952 
 953   __ push(rcx);
 954   __ push(rdx);
 955   __ push(rdi);
 956   __ push(rsi);
 957 #ifdef _LP64
 958   __ push(r8);
 959   __ push(r9);
 960   __ push(r10);
 961   __ push(r11);
 962   __ push(r12);
 963   __ push(r13);
 964   __ push(r14);
 965   __ push(r15);
 966 #endif
 967 
 968   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax);



 969 
 970 #ifdef _LP64
 971   __ pop(r15);
 972   __ pop(r14);
 973   __ pop(r13);
 974   __ pop(r12);
 975   __ pop(r11);
 976   __ pop(r10);
 977   __ pop(r9);
 978   __ pop(r8);
 979 #endif
 980   __ pop(rsi);
 981   __ pop(rdi);
 982   __ pop(rdx);
 983   __ pop(rcx);
 984 
 985   __ pop(tmp2);
 986   __ pop(tmp1);
 987   __ ret(0);
 988 
< prev index next >