< prev index next >

src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp

Print this page
rev 55741 : 8228369: Shenandoah: Refactor LRB C1 stubs


 719   // itself and from the retry loop.
 720   __ bind(done);
 721   if (!exchange) {
 722     assert(res != NULL, "need result register");
 723 #ifdef _LP64
 724     __ setb(Assembler::equal, res);
 725     __ movzbl(res, res);
 726 #else
 727     // Need something else to clean the result, because some registers
 728     // do not have byte encoding that movzbl wants. Cannot do the xor first,
 729     // because it modifies the flags.
 730     Label res_non_zero;
 731     __ movptr(res, 1);
 732     __ jcc(Assembler::equal, res_non_zero, true);
 733     __ xorptr(res, res);
 734     __ bind(res_non_zero);
 735 #endif
 736   }
 737 }
 738 
 739 void ShenandoahBarrierSetAssembler::save_vector_registers(MacroAssembler* masm) {
 740   int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
 741   if (UseAVX > 2) {
 742     num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
 743   }
 744 
 745   if (UseSSE == 1)  {
 746     __ subptr(rsp, sizeof(jdouble)*8);
 747     for (int n = 0; n < 8; n++) {
 748       __ movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
 749     }
 750   } else if (UseSSE >= 2)  {
 751     if (UseAVX > 2) {
 752       __ push(rbx);
 753       __ movl(rbx, 0xffff);
 754       __ kmovwl(k1, rbx);
 755       __ pop(rbx);
 756     }
 757 #ifdef COMPILER2
 758     if (MaxVectorSize > 16) {
 759       if(UseAVX > 2) {
 760         // Save upper half of ZMM registers
 761         __ subptr(rsp, 32*num_xmm_regs);
 762         for (int n = 0; n < num_xmm_regs; n++) {
 763           __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
 764         }
 765       }
 766       assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
 767       // Save upper half of YMM registers
 768       __ subptr(rsp, 16*num_xmm_regs);
 769       for (int n = 0; n < num_xmm_regs; n++) {
 770         __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
 771       }
 772     }
 773 #endif
 774     // Save whole 128bit (16 bytes) XMM registers
 775     __ subptr(rsp, 16*num_xmm_regs);
 776 #ifdef _LP64
 777     if (VM_Version::supports_evex()) {
 778       for (int n = 0; n < num_xmm_regs; n++) {
 779         __ vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
 780       }
 781     } else {
 782       for (int n = 0; n < num_xmm_regs; n++) {
 783         __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
 784       }
 785     }
 786 #else
 787     for (int n = 0; n < num_xmm_regs; n++) {
 788       __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
 789     }
 790 #endif
 791   }
 792 }
 793 
 794 void ShenandoahBarrierSetAssembler::restore_vector_registers(MacroAssembler* masm) {
 795   int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
 796   if (UseAVX > 2) {
 797     num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
 798   }
 799   if (UseSSE == 1)  {
 800     for (int n = 0; n < 8; n++) {
 801       __ movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
 802     }
 803     __ addptr(rsp, sizeof(jdouble)*8);
 804   } else if (UseSSE >= 2)  {
 805     // Restore whole 128bit (16 bytes) XMM registers
 806 #ifdef _LP64
 807     if (VM_Version::supports_evex()) {
 808       for (int n = 0; n < num_xmm_regs; n++) {
 809         __ vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
 810       }
 811     } else {
 812       for (int n = 0; n < num_xmm_regs; n++) {
 813         __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
 814       }
 815     }
 816 #else
 817     for (int n = 0; n < num_xmm_regs; n++) {
 818       __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
 819     }
 820 #endif
 821     __ addptr(rsp, 16*num_xmm_regs);
 822 
 823 #ifdef COMPILER2
 824     if (MaxVectorSize > 16) {
 825       // Restore upper half of YMM registers.
 826       for (int n = 0; n < num_xmm_regs; n++) {
 827         __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
 828       }
 829       __ addptr(rsp, 16*num_xmm_regs);
 830       if (UseAVX > 2) {
 831         for (int n = 0; n < num_xmm_regs; n++) {
 832           __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
 833         }
 834         __ addptr(rsp, 32*num_xmm_regs);
 835       }
 836     }
 837 #endif
 838   }
 839 }
 840 
 841 #undef __
 842 
 843 #ifdef COMPILER1
 844 
 845 #define __ ce->masm()->
 846 
 847 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
 848   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 849   // At this point we know that marking is in progress.
 850   // If do_load() is true then we have to emit the
 851   // load of the previous value; otherwise it has already
 852   // been loaded into _pre_val.
 853 
 854   __ bind(*stub->entry());
 855   assert(stub->pre_val()->is_register(), "Precondition.");
 856 
 857   Register pre_val_reg = stub->pre_val()->as_register();
 858 
 859   if (stub->do_load()) {
 860     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
 861   }
 862 
 863   __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
 864   __ jcc(Assembler::equal, *stub->continuation());
 865   ce->store_parameter(stub->pre_val()->as_register(), 0);
 866   __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
 867   __ jmp(*stub->continuation());
 868 
 869 }
 870 
 871 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {

 872   __ bind(*stub->entry());
 873 
 874   Label done;
 875   Register obj = stub->obj()->as_register();
 876   Register res = stub->result()->as_register();

 877 
 878   if (res != obj) {
 879     __ mov(res, obj);
 880   }
 881 
 882   // Check for null.
 883   __ testptr(res, res);
 884   __ jcc(Assembler::zero, done);
 885 
 886   load_reference_barrier_not_null(ce->masm(), res);

 887 
 888   __ bind(done);
 889   __ jmp(*stub->continuation());
 890 }
 891 
 892 #undef __
 893 
 894 #define __ sasm->
 895 
 896 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 897   __ prologue("shenandoah_pre_barrier", false);
 898   // arg0 : previous value of memory
 899 
 900   __ push(rax);
 901   __ push(rdx);
 902 
 903   const Register pre_val = rax;
 904   const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
 905   const Register tmp = rdx;
 906 
 907   NOT_LP64(__ get_thread(thread);)
 908 


 932   __ jmp(done);
 933 
 934   __ bind(runtime);
 935 
 936   __ save_live_registers_no_oop_map(true);
 937 
 938   // load the pre-value
 939   __ load_parameter(0, rcx);
 940   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);
 941 
 942   __ restore_live_registers(true);
 943 
 944   __ bind(done);
 945 
 946   __ pop(rdx);
 947   __ pop(rax);
 948 
 949   __ epilogue();
 950 }
 951 











































 952 #undef __
 953 
 954 #endif // COMPILER1
 955 
 956 address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
 957   assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
 958   return _shenandoah_lrb;
 959 }
 960 
 961 #define __ cgen->assembler()->
 962 
 963 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
 964   __ align(CodeEntryAlignment);
 965   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
 966   address start = __ pc();
 967 
 968   Label resolve_oop, slow_path;
 969 
 970   // We use RDI, which also serves as argument register for slow call.
 971   // RAX always holds the src object ptr, except after the slow call and
 972   // the cmpxchg, then it holds the result. R8/RBX is used as temporary register.
 973 
 974   Register tmp1 = rdi;
 975   Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
 976 
 977   __ push(tmp1);
 978   __ push(tmp2);
 979 
 980   // Check for object being in the collection set.
 981   // TODO: Can we use only 1 register here?
 982   // The source object arrives here in rax.
 983   // live: rax
 984   // live: tmp1
 985   __ mov(tmp1, rax);
 986   __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 987   // live: tmp2
 988   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 989   __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
 990   // unlive: tmp1
 991   __ testbool(tmp2);
 992   // unlive: tmp2
 993   __ jccb(Assembler::notZero, resolve_oop);
 994 
 995   __ pop(tmp2);
 996   __ pop(tmp1);
 997   __ ret(0);
 998 
 999   __ bind(resolve_oop);
1000 
1001   __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes()));
1002   // Test if both lowest bits are set. We trick it by negating the bits
1003   // then test for both bits clear.
1004   __ notptr(tmp2);
1005   __ testb(tmp2, markOopDesc::marked_value);
1006   __ jccb(Assembler::notZero, slow_path);
1007   // Clear both lower bits. It's still inverted, so set them, and then invert back.
1008   __ orptr(tmp2, markOopDesc::marked_value);
1009   __ notptr(tmp2);
1010   // At this point, tmp2 contains the decoded forwarding pointer.
1011   __ mov(rax, tmp2);
1012 
1013   __ pop(tmp2);
1014   __ pop(tmp1);
1015   __ ret(0);
1016 
1017   __ bind(slow_path);
1018 
1019   __ push(rcx);
1020   __ push(rdx);
1021   __ push(rdi);
1022   __ push(rsi);
1023 #ifdef _LP64
1024   __ push(r8);
1025   __ push(r9);
1026   __ push(r10);
1027   __ push(r11);
1028   __ push(r12);
1029   __ push(r13);
1030   __ push(r14);
1031   __ push(r15);
1032 #endif
1033 
1034   save_vector_registers(cgen->assembler());
1035   __ movptr(rdi, rax);
1036   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rdi);
1037   restore_vector_registers(cgen->assembler());
1038 
1039 #ifdef _LP64
1040   __ pop(r15);
1041   __ pop(r14);
1042   __ pop(r13);
1043   __ pop(r12);
1044   __ pop(r11);
1045   __ pop(r10);
1046   __ pop(r9);
1047   __ pop(r8);
1048 #endif
1049   __ pop(rsi);
1050   __ pop(rdi);
1051   __ pop(rdx);
1052   __ pop(rcx);
1053 
1054   __ pop(tmp2);
1055   __ pop(tmp1);
1056   __ ret(0);
1057 


 719   // itself and from the retry loop.
 720   __ bind(done);
 721   if (!exchange) {
 722     assert(res != NULL, "need result register");
 723 #ifdef _LP64
 724     __ setb(Assembler::equal, res);
 725     __ movzbl(res, res);
 726 #else
 727     // Need something else to clean the result, because some registers
 728     // do not have byte encoding that movzbl wants. Cannot do the xor first,
 729     // because it modifies the flags.
 730     Label res_non_zero;
 731     __ movptr(res, 1);
 732     __ jcc(Assembler::equal, res_non_zero, true);
 733     __ xorptr(res, res);
 734     __ bind(res_non_zero);
 735 #endif
 736   }
 737 }
 738 
 739 // Generate cset check. If obj is in cset, branch to in_cset label, otherwise fall through
 740 // obj: Register holding the oop, preserved
 741 // tmp1, tmp2: temp registers, trashed
 742 void ShenandoahBarrierSetAssembler::gen_cset_check(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& in_cset) {
 743   // Check for object being in the collection set.
 744   // TODO: Can we use only 1 register here?
 745   // The source object arrives here in rax.
 746   // live: rax
 747   // live: tmp1
 748   __ mov(tmp1, obj);
 749   __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 750   // live: tmp2
 751   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 752   __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
 753   // unlive: tmp1
 754   __ testbool(tmp2);
 755   // unlive: tmp2
 756   __ jccb(Assembler::notZero, in_cset);



































 757 }
 758 
 759 // Generate check if object is resolved. Branch to resolved label, if not. Otherwise return resolved
 760 // object in obj register.
 761 // obj: object, resolved object on normal return
 762 // tmp: temp register, trashed
 763 void ShenandoahBarrierSetAssembler::gen_resolved_check(MacroAssembler* masm, Register obj, Register tmp, Label& not_resolved) {
 764   __ movptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
 765   // Test if both lowest bits are set. We trick it by negating the bits
 766   // then test for both bits clear.
 767   __ notptr(tmp);
 768   __ testb(tmp, markOopDesc::marked_value);
 769   __ jccb(Assembler::notZero, not_resolved);
 770   // Clear both lower bits. It's still inverted, so set them, and then invert back.
 771   __ orptr(tmp, markOopDesc::marked_value);
 772   __ notptr(tmp);
 773   // At this point, tmp2 contains the decoded forwarding pointer.
 774   __ mov(obj, tmp);





























 775 }
 776 
 777 #undef __
 778 
 779 #ifdef COMPILER1
 780 
 781 #define __ ce->masm()->
 782 
 783 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
 784   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 785   // At this point we know that marking is in progress.
 786   // If do_load() is true then we have to emit the
 787   // load of the previous value; otherwise it has already
 788   // been loaded into _pre_val.
 789 
 790   __ bind(*stub->entry());
 791   assert(stub->pre_val()->is_register(), "Precondition.");
 792 
 793   Register pre_val_reg = stub->pre_val()->as_register();
 794 
 795   if (stub->do_load()) {
 796     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
 797   }
 798 
 799   __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
 800   __ jcc(Assembler::equal, *stub->continuation());
 801   ce->store_parameter(stub->pre_val()->as_register(), 0);
 802   __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
 803   __ jmp(*stub->continuation());
 804 
 805 }
 806 
 807 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
 808   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 809   __ bind(*stub->entry());
 810 

 811   Register obj = stub->obj()->as_register();
 812   Register res = stub->result()->as_register();
 813   assert(res == rax, "result must arrive in rax");
 814 
 815   if (res != obj) {
 816     __ mov(res, obj);
 817   }
 818 
 819   // Check for null.
 820   __ testptr(res, res);
 821   __ jcc(Assembler::zero, *stub->continuation());
 822 
 823   ce->store_parameter(res, 0);
 824   __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
 825 

 826   __ jmp(*stub->continuation());
 827 }
 828 
 829 #undef __
 830 
 831 #define __ sasm->
 832 
 833 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 834   __ prologue("shenandoah_pre_barrier", false);
 835   // arg0 : previous value of memory
 836 
 837   __ push(rax);
 838   __ push(rdx);
 839 
 840   const Register pre_val = rax;
 841   const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
 842   const Register tmp = rdx;
 843 
 844   NOT_LP64(__ get_thread(thread);)
 845 


 869   __ jmp(done);
 870 
 871   __ bind(runtime);
 872 
 873   __ save_live_registers_no_oop_map(true);
 874 
 875   // load the pre-value
 876   __ load_parameter(0, rcx);
 877   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);
 878 
 879   __ restore_live_registers(true);
 880 
 881   __ bind(done);
 882 
 883   __ pop(rdx);
 884   __ pop(rax);
 885 
 886   __ epilogue();
 887 }
 888 
 889 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
 890   __ prologue("shenandoah_load_reference_barrier", false);
 891   // arg0 : object to be resolved
 892 
 893   Label resolve_oop, slow_path;
 894 
 895   // We use RDI, which also serves as argument register for slow call.
 896   // RAX always holds the src object ptr, except after the slow call,
 897   // then it holds the result. R8/RBX is used as temporary register.
 898 
 899   Register tmp1 = rdi;
 900   Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
 901 
 902   __ push(tmp1);
 903   __ push(tmp2);
 904 
 905   __ load_parameter(0, rax);
 906 
 907   gen_cset_check(sasm, rax, tmp1, tmp2, resolve_oop);
 908 
 909   __ pop(tmp2);
 910   __ pop(tmp1);
 911   __ epilogue();
 912 
 913   __ bind(resolve_oop);
 914 
 915   gen_resolved_check(sasm, rax, tmp1, slow_path);
 916 
 917   __ pop(tmp2);
 918   __ pop(tmp1);
 919   __ epilogue();
 920 
 921   __ bind(slow_path);
 922 
 923   __ save_live_registers_no_oop_map(true);
 924   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax);
 925   __ restore_live_registers_except_rax(true);
 926 
 927   __ pop(tmp2);
 928   __ pop(tmp1);
 929   __ epilogue();
 930 }
 931 
 932 #undef __
 933 
 934 #endif // COMPILER1
 935 
 936 address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
 937   assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
 938   return _shenandoah_lrb;
 939 }
 940 
 941 #define __ cgen->assembler()->
 942 
 943 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
 944   __ align(CodeEntryAlignment);
 945   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
 946   address start = __ pc();
 947 
 948   Label resolve_oop, slow_path;
 949 
 950   // We use RDI, which also serves as argument register for slow call.
 951   // RAX always holds the src object ptr, except after the slow call,
 952   // then it holds the result. R8/RBX is used as temporary register.
 953 
 954   Register tmp1 = rdi;
 955   Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
 956 
 957   __ push(tmp1);
 958   __ push(tmp2);
 959 
 960   gen_cset_check(cgen->assembler(), rax, tmp1, tmp2, resolve_oop);













 961 
 962   __ pop(tmp2);
 963   __ pop(tmp1);
 964   __ ret(0);
 965 
 966   __ bind(resolve_oop);
 967 
 968   gen_resolved_check(cgen->assembler(), rax, tmp2, slow_path);










 969 
 970   __ pop(tmp2);
 971   __ pop(tmp1);
 972   __ ret(0);
 973 
 974   __ bind(slow_path);
 975 
 976   __ push(rcx);
 977   __ push(rdx);
 978   __ push(rdi);
 979   __ push(rsi);
 980 #ifdef _LP64
 981   __ push(r8);
 982   __ push(r9);
 983   __ push(r10);
 984   __ push(r11);
 985   __ push(r12);
 986   __ push(r13);
 987   __ push(r14);
 988   __ push(r15);
 989 #endif
 990 
 991   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax);



 992 
 993 #ifdef _LP64
 994   __ pop(r15);
 995   __ pop(r14);
 996   __ pop(r13);
 997   __ pop(r12);
 998   __ pop(r11);
 999   __ pop(r10);
1000   __ pop(r9);
1001   __ pop(r8);
1002 #endif
1003   __ pop(rsi);
1004   __ pop(rdi);
1005   __ pop(rdx);
1006   __ pop(rcx);
1007 
1008   __ pop(tmp2);
1009   __ pop(tmp1);
1010   __ ret(0);
1011 
< prev index next >