719 // itself and from the retry loop.
720 __ bind(done);
721 if (!exchange) {
722 assert(res != NULL, "need result register");
723 #ifdef _LP64
724 __ setb(Assembler::equal, res);
725 __ movzbl(res, res);
726 #else
727 // Need something else to clean the result, because some registers
728 // do not have byte encoding that movzbl wants. Cannot do the xor first,
729 // because it modifies the flags.
730 Label res_non_zero;
731 __ movptr(res, 1);
732 __ jcc(Assembler::equal, res_non_zero, true);
733 __ xorptr(res, res);
734 __ bind(res_non_zero);
735 #endif
736 }
737 }
738
739 void ShenandoahBarrierSetAssembler::save_vector_registers(MacroAssembler* masm) {
740 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
741 if (UseAVX > 2) {
742 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
743 }
744
745 if (UseSSE == 1) {
746 __ subptr(rsp, sizeof(jdouble)*8);
747 for (int n = 0; n < 8; n++) {
748 __ movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
749 }
750 } else if (UseSSE >= 2) {
751 if (UseAVX > 2) {
752 __ push(rbx);
753 __ movl(rbx, 0xffff);
754 __ kmovwl(k1, rbx);
755 __ pop(rbx);
756 }
757 #ifdef COMPILER2
758 if (MaxVectorSize > 16) {
759 if(UseAVX > 2) {
760 // Save upper half of ZMM registers
761 __ subptr(rsp, 32*num_xmm_regs);
762 for (int n = 0; n < num_xmm_regs; n++) {
763 __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
764 }
765 }
766 assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
767 // Save upper half of YMM registers
768 __ subptr(rsp, 16*num_xmm_regs);
769 for (int n = 0; n < num_xmm_regs; n++) {
770 __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
771 }
772 }
773 #endif
774 // Save whole 128bit (16 bytes) XMM registers
775 __ subptr(rsp, 16*num_xmm_regs);
776 #ifdef _LP64
777 if (VM_Version::supports_evex()) {
778 for (int n = 0; n < num_xmm_regs; n++) {
779 __ vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
780 }
781 } else {
782 for (int n = 0; n < num_xmm_regs; n++) {
783 __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
784 }
785 }
786 #else
787 for (int n = 0; n < num_xmm_regs; n++) {
788 __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
789 }
790 #endif
791 }
792 }
793
794 void ShenandoahBarrierSetAssembler::restore_vector_registers(MacroAssembler* masm) {
795 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
796 if (UseAVX > 2) {
797 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
798 }
799 if (UseSSE == 1) {
800 for (int n = 0; n < 8; n++) {
801 __ movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
802 }
803 __ addptr(rsp, sizeof(jdouble)*8);
804 } else if (UseSSE >= 2) {
805 // Restore whole 128bit (16 bytes) XMM registers
806 #ifdef _LP64
807 if (VM_Version::supports_evex()) {
808 for (int n = 0; n < num_xmm_regs; n++) {
809 __ vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
810 }
811 } else {
812 for (int n = 0; n < num_xmm_regs; n++) {
813 __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
814 }
815 }
816 #else
817 for (int n = 0; n < num_xmm_regs; n++) {
818 __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
819 }
820 #endif
821 __ addptr(rsp, 16*num_xmm_regs);
822
823 #ifdef COMPILER2
824 if (MaxVectorSize > 16) {
825 // Restore upper half of YMM registers.
826 for (int n = 0; n < num_xmm_regs; n++) {
827 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
828 }
829 __ addptr(rsp, 16*num_xmm_regs);
830 if (UseAVX > 2) {
831 for (int n = 0; n < num_xmm_regs; n++) {
832 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
833 }
834 __ addptr(rsp, 32*num_xmm_regs);
835 }
836 }
837 #endif
838 }
839 }
840
841 #undef __
842
843 #ifdef COMPILER1
844
845 #define __ ce->masm()->
846
847 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
848 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
849 // At this point we know that marking is in progress.
850 // If do_load() is true then we have to emit the
851 // load of the previous value; otherwise it has already
852 // been loaded into _pre_val.
853
854 __ bind(*stub->entry());
855 assert(stub->pre_val()->is_register(), "Precondition.");
856
857 Register pre_val_reg = stub->pre_val()->as_register();
858
859 if (stub->do_load()) {
860 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
861 }
862
863 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
864 __ jcc(Assembler::equal, *stub->continuation());
865 ce->store_parameter(stub->pre_val()->as_register(), 0);
866 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
867 __ jmp(*stub->continuation());
868
869 }
870
871 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
872 __ bind(*stub->entry());
873
874 Label done;
875 Register obj = stub->obj()->as_register();
876 Register res = stub->result()->as_register();
877
878 if (res != obj) {
879 __ mov(res, obj);
880 }
881
882 // Check for null.
883 __ testptr(res, res);
884 __ jcc(Assembler::zero, done);
885
886 load_reference_barrier_not_null(ce->masm(), res);
887
888 __ bind(done);
889 __ jmp(*stub->continuation());
890 }
891
892 #undef __
893
894 #define __ sasm->
895
896 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
897 __ prologue("shenandoah_pre_barrier", false);
898 // arg0 : previous value of memory
899
900 __ push(rax);
901 __ push(rdx);
902
903 const Register pre_val = rax;
904 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
905 const Register tmp = rdx;
906
907 NOT_LP64(__ get_thread(thread);)
908
932 __ jmp(done);
933
934 __ bind(runtime);
935
936 __ save_live_registers_no_oop_map(true);
937
938 // load the pre-value
939 __ load_parameter(0, rcx);
940 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);
941
942 __ restore_live_registers(true);
943
944 __ bind(done);
945
946 __ pop(rdx);
947 __ pop(rax);
948
949 __ epilogue();
950 }
951
952 #undef __
953
954 #endif // COMPILER1
955
956 address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
957 assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
958 return _shenandoah_lrb;
959 }
960
961 #define __ cgen->assembler()->
962
963 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
964 __ align(CodeEntryAlignment);
965 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
966 address start = __ pc();
967
968 Label resolve_oop, slow_path;
969
970 // We use RDI, which also serves as argument register for slow call.
971 // RAX always holds the src object ptr, except after the slow call and
972 // the cmpxchg, then it holds the result. R8/RBX is used as temporary register.
973
974 Register tmp1 = rdi;
975 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
976
977 __ push(tmp1);
978 __ push(tmp2);
979
980 // Check for object being in the collection set.
981 // TODO: Can we use only 1 register here?
982 // The source object arrives here in rax.
983 // live: rax
984 // live: tmp1
985 __ mov(tmp1, rax);
986 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
987 // live: tmp2
988 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
989 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
990 // unlive: tmp1
991 __ testbool(tmp2);
992 // unlive: tmp2
993 __ jccb(Assembler::notZero, resolve_oop);
994
995 __ pop(tmp2);
996 __ pop(tmp1);
997 __ ret(0);
998
999 __ bind(resolve_oop);
1000
1001 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes()));
1002 // Test if both lowest bits are set. We trick it by negating the bits
1003 // then test for both bits clear.
1004 __ notptr(tmp2);
1005 __ testb(tmp2, markOopDesc::marked_value);
1006 __ jccb(Assembler::notZero, slow_path);
1007 // Clear both lower bits. It's still inverted, so set them, and then invert back.
1008 __ orptr(tmp2, markOopDesc::marked_value);
1009 __ notptr(tmp2);
1010 // At this point, tmp2 contains the decoded forwarding pointer.
1011 __ mov(rax, tmp2);
1012
1013 __ pop(tmp2);
1014 __ pop(tmp1);
1015 __ ret(0);
1016
1017 __ bind(slow_path);
1018
1019 __ push(rcx);
1020 __ push(rdx);
1021 __ push(rdi);
1022 __ push(rsi);
1023 #ifdef _LP64
1024 __ push(r8);
1025 __ push(r9);
1026 __ push(r10);
1027 __ push(r11);
1028 __ push(r12);
1029 __ push(r13);
1030 __ push(r14);
1031 __ push(r15);
1032 #endif
1033
1034 save_vector_registers(cgen->assembler());
1035 __ movptr(rdi, rax);
1036 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rdi);
1037 restore_vector_registers(cgen->assembler());
1038
1039 #ifdef _LP64
1040 __ pop(r15);
1041 __ pop(r14);
1042 __ pop(r13);
1043 __ pop(r12);
1044 __ pop(r11);
1045 __ pop(r10);
1046 __ pop(r9);
1047 __ pop(r8);
1048 #endif
1049 __ pop(rsi);
1050 __ pop(rdi);
1051 __ pop(rdx);
1052 __ pop(rcx);
1053
1054 __ pop(tmp2);
1055 __ pop(tmp1);
1056 __ ret(0);
1057
|
719 // itself and from the retry loop.
720 __ bind(done);
721 if (!exchange) {
722 assert(res != NULL, "need result register");
723 #ifdef _LP64
724 __ setb(Assembler::equal, res);
725 __ movzbl(res, res);
726 #else
727 // Need something else to clean the result, because some registers
728 // do not have byte encoding that movzbl wants. Cannot do the xor first,
729 // because it modifies the flags.
730 Label res_non_zero;
731 __ movptr(res, 1);
732 __ jcc(Assembler::equal, res_non_zero, true);
733 __ xorptr(res, res);
734 __ bind(res_non_zero);
735 #endif
736 }
737 }
738
739 // Generate cset check. If obj is not in cset, branch to done label, otherwise fall through
740 // obj: Register holding the oop, preserved
741 // tmp1, tmp2: temp registers, trashed
742 void ShenandoahBarrierSetAssembler::gen_cset_check(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& done) {
743 // Check for object being in the collection set.
744 // TODO: Can we use only 1 register here?
745 // The source object arrives here in rax.
746 // live: rax
747 // live: tmp1
748 __ mov(tmp1, obj);
749 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
750 // live: tmp2
751 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
752 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
753 // unlive: tmp1
754 __ testbool(tmp2);
755 // unlive: tmp2
756 __ jcc(Assembler::zero, done);
757 }
758
759 // Generate check if object is resolved. Branch to resolved label, if not. Otherwise return resolved
760 // object in obj register.
761 // obj: object, resolved object on normal return
762 // tmp: temp register, trashed
763 void ShenandoahBarrierSetAssembler::gen_resolved_check(MacroAssembler* masm, Register obj, Register tmp, Label& not_resolved) {
764 __ movptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
765 // Test if both lowest bits are set. We trick it by negating the bits
766 // then test for both bits clear.
767 __ notptr(tmp);
768 __ testb(tmp, markOopDesc::marked_value);
769 __ jccb(Assembler::notZero, not_resolved);
770 // Clear both lower bits. It's still inverted, so set them, and then invert back.
771 __ orptr(tmp, markOopDesc::marked_value);
772 __ notptr(tmp);
773 // At this point, tmp2 contains the decoded forwarding pointer.
774 __ mov(obj, tmp);
775 }
776
777 #undef __
778
779 #ifdef COMPILER1
780
781 #define __ ce->masm()->
782
783 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
784 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
785 // At this point we know that marking is in progress.
786 // If do_load() is true then we have to emit the
787 // load of the previous value; otherwise it has already
788 // been loaded into _pre_val.
789
790 __ bind(*stub->entry());
791 assert(stub->pre_val()->is_register(), "Precondition.");
792
793 Register pre_val_reg = stub->pre_val()->as_register();
794
795 if (stub->do_load()) {
796 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
797 }
798
799 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
800 __ jcc(Assembler::equal, *stub->continuation());
801 ce->store_parameter(stub->pre_val()->as_register(), 0);
802 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
803 __ jmp(*stub->continuation());
804
805 }
806
807 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
808 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
809 __ bind(*stub->entry());
810
811 Register obj = stub->obj()->as_register();
812 Register res = stub->result()->as_register();
813 Register tmp1 = stub->tmp1()->as_register();
814 Register tmp2 = stub->tmp2()->as_register();
815
816 Label slow_path;
817
818 assert(res == rax, "result must arrive in rax");
819
820 if (res != obj) {
821 __ mov(res, obj);
822 }
823
824 // Check for null.
825 __ testptr(res, res);
826 __ jcc(Assembler::zero, *stub->continuation());
827
828 gen_cset_check(ce->masm(), res, tmp1, tmp2, *stub->continuation());
829 gen_resolved_check(ce->masm(), rax, tmp1, slow_path);
830
831 __ jmp(*stub->continuation());
832
833 __ bind(slow_path);
834 ce->store_parameter(res, 0);
835 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
836
837 __ jmp(*stub->continuation());
838 }
839
840 #undef __
841
842 #define __ sasm->
843
844 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
845 __ prologue("shenandoah_pre_barrier", false);
846 // arg0 : previous value of memory
847
848 __ push(rax);
849 __ push(rdx);
850
851 const Register pre_val = rax;
852 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
853 const Register tmp = rdx;
854
855 NOT_LP64(__ get_thread(thread);)
856
880 __ jmp(done);
881
882 __ bind(runtime);
883
884 __ save_live_registers_no_oop_map(true);
885
886 // load the pre-value
887 __ load_parameter(0, rcx);
888 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);
889
890 __ restore_live_registers(true);
891
892 __ bind(done);
893
894 __ pop(rdx);
895 __ pop(rax);
896
897 __ epilogue();
898 }
899
900 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
901 __ prologue("shenandoah_load_reference_barrier", false);
902 // arg0 : object to be resolved
903
904 __ save_live_registers_no_oop_map(true);
905 __ load_parameter(0, c_rarg0);
906 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0);
907 __ restore_live_registers_except_rax(true);
908
909 __ epilogue();
910 }
911
912 #undef __
913
914 #endif // COMPILER1
915
916 address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
917 assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
918 return _shenandoah_lrb;
919 }
920
921 #define __ cgen->assembler()->
922
923 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
924 __ align(CodeEntryAlignment);
925 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
926 address start = __ pc();
927
928 Label resolve_oop, slow_path, done;
929
930 // We use RDI, which also serves as argument register for slow call.
931 // RAX always holds the src object ptr, except after the slow call,
932 // then it holds the result. R8/RBX is used as temporary register.
933
934 Register tmp1 = rdi;
935 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
936
937 __ push(tmp1);
938 __ push(tmp2);
939
940 gen_cset_check(cgen->assembler(), rax, tmp1, tmp2, done);
941
942 __ bind(resolve_oop);
943
944 gen_resolved_check(cgen->assembler(), rax, tmp2, slow_path);
945
946 __ bind(done);
947 __ pop(tmp2);
948 __ pop(tmp1);
949 __ ret(0);
950
951 __ bind(slow_path);
952
953 __ push(rcx);
954 __ push(rdx);
955 __ push(rdi);
956 __ push(rsi);
957 #ifdef _LP64
958 __ push(r8);
959 __ push(r9);
960 __ push(r10);
961 __ push(r11);
962 __ push(r12);
963 __ push(r13);
964 __ push(r14);
965 __ push(r15);
966 #endif
967
968 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax);
969
970 #ifdef _LP64
971 __ pop(r15);
972 __ pop(r14);
973 __ pop(r13);
974 __ pop(r12);
975 __ pop(r11);
976 __ pop(r10);
977 __ pop(r9);
978 __ pop(r8);
979 #endif
980 __ pop(rsi);
981 __ pop(rdi);
982 __ pop(rdx);
983 __ pop(rcx);
984
985 __ pop(tmp2);
986 __ pop(tmp1);
987 __ ret(0);
988
|