149 const Address result_type (rbp, 4 * wordSize);
150 const Address method (rbp, 5 * wordSize);
151 const Address entry_point (rbp, 6 * wordSize);
152 const Address parameters (rbp, 7 * wordSize);
153 const Address parameter_size(rbp, 8 * wordSize);
154 const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()!
155 sse_save = UseSSE > 0;
156
157 // stub code
158 __ enter();
159 __ movptr(rcx, parameter_size); // parameter counter
160 __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes
161 __ addptr(rcx, locals_count_in_bytes); // reserve space for register saves
162 __ subptr(rsp, rcx);
163 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack
164
165 // save rdi, rsi, & rbx, according to C calling conventions
166 __ movptr(saved_rdi, rdi);
167 __ movptr(saved_rsi, rsi);
168 __ movptr(saved_rbx, rbx);
169 // save and initialize %mxcsr
170 if (sse_save) {
171 Label skip_ldmx;
172 __ stmxcsr(mxcsr_save);
173 __ movl(rax, mxcsr_save);
174 __ andl(rax, MXCSR_MASK); // Only check control and mask bits
175 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
176 __ cmp32(rax, mxcsr_std);
177 __ jcc(Assembler::equal, skip_ldmx);
178 __ ldmxcsr(mxcsr_std);
179 __ bind(skip_ldmx);
180 }
181
182 // make sure the control word is correct.
183 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
184
185 #ifdef ASSERT
186 // make sure we have no pending exceptions
187 { Label L;
188 __ movptr(rcx, thread);
777 }
778 }
779
780
781 // Copy 64 bytes chunks
782 //
783 // Inputs:
784 // from - source array address
785 // to_from - destination array address - from
786 // qword_count - 8-bytes element count, negative
787 //
788 void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
789 assert( UseSSE >= 2, "supported cpu only" );
790 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
791 // Copy 64-byte chunks
792 __ jmpb(L_copy_64_bytes);
793 __ align(OptoLoopAlignment);
794 __ BIND(L_copy_64_bytes_loop);
795
796 if (UseUnalignedLoadStores) {
797 if (UseAVX >= 2) {
798 __ vmovdqu(xmm0, Address(from, 0));
799 __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0);
800 __ vmovdqu(xmm1, Address(from, 32));
801 __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
802 } else {
803 __ movdqu(xmm0, Address(from, 0));
804 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
805 __ movdqu(xmm1, Address(from, 16));
806 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
807 __ movdqu(xmm2, Address(from, 32));
808 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
809 __ movdqu(xmm3, Address(from, 48));
810 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
811 }
812 } else {
813 __ movq(xmm0, Address(from, 0));
814 __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
815 __ movq(xmm1, Address(from, 8));
816 __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
817 __ movq(xmm2, Address(from, 16));
818 __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
819 __ movq(xmm3, Address(from, 24));
820 __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
821 __ movq(xmm4, Address(from, 32));
822 __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
823 __ movq(xmm5, Address(from, 40));
824 __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
825 __ movq(xmm6, Address(from, 48));
826 __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
827 __ movq(xmm7, Address(from, 56));
828 __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
829 }
830
831 __ addl(from, 64);
832 __ BIND(L_copy_64_bytes);
833 __ subl(qword_count, 8);
834 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
835
836 if (UseUnalignedLoadStores && (UseAVX >= 2)) {
837 // clean upper bits of YMM registers
838 __ vpxor(xmm0, xmm0);
839 __ vpxor(xmm1, xmm1);
840 }
841 __ addl(qword_count, 8);
842 __ jccb(Assembler::zero, L_exit);
843 //
844 // length is too short, just copy qwords
845 //
846 __ BIND(L_copy_8_bytes);
847 __ movq(xmm0, Address(from, 0));
848 __ movq(Address(from, to_from, Address::times_1), xmm0);
849 __ addl(from, 8);
850 __ decrement(qword_count);
851 __ jcc(Assembler::greater, L_copy_8_bytes);
852 __ BIND(L_exit);
853 }
854
855 // Copy 64 bytes chunks
856 //
|
149 const Address result_type (rbp, 4 * wordSize);
150 const Address method (rbp, 5 * wordSize);
151 const Address entry_point (rbp, 6 * wordSize);
152 const Address parameters (rbp, 7 * wordSize);
153 const Address parameter_size(rbp, 8 * wordSize);
154 const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()!
155 sse_save = UseSSE > 0;
156
157 // stub code
158 __ enter();
159 __ movptr(rcx, parameter_size); // parameter counter
160 __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes
161 __ addptr(rcx, locals_count_in_bytes); // reserve space for register saves
162 __ subptr(rsp, rcx);
163 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack
164
165 // save rdi, rsi, & rbx, according to C calling conventions
166 __ movptr(saved_rdi, rdi);
167 __ movptr(saved_rsi, rsi);
168 __ movptr(saved_rbx, rbx);
169
170 // provide initial value for required masks
171 if (UseAVX > 2) {
172 __ movl(rbx, 0xffff);
173 __ kmovdl(k1, rbx);
174 }
175
176 // save and initialize %mxcsr
177 if (sse_save) {
178 Label skip_ldmx;
179 __ stmxcsr(mxcsr_save);
180 __ movl(rax, mxcsr_save);
181 __ andl(rax, MXCSR_MASK); // Only check control and mask bits
182 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
183 __ cmp32(rax, mxcsr_std);
184 __ jcc(Assembler::equal, skip_ldmx);
185 __ ldmxcsr(mxcsr_std);
186 __ bind(skip_ldmx);
187 }
188
189 // make sure the control word is correct.
190 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
191
192 #ifdef ASSERT
193 // make sure we have no pending exceptions
194 { Label L;
195 __ movptr(rcx, thread);
784 }
785 }
786
787
788 // Copy 64 bytes chunks
789 //
790 // Inputs:
791 // from - source array address
792 // to_from - destination array address - from
793 // qword_count - 8-bytes element count, negative
794 //
795 void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
796 assert( UseSSE >= 2, "supported cpu only" );
797 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
798 // Copy 64-byte chunks
799 __ jmpb(L_copy_64_bytes);
800 __ align(OptoLoopAlignment);
801 __ BIND(L_copy_64_bytes_loop);
802
803 if (UseUnalignedLoadStores) {
804 if (UseAVX > 2) {
805 __ evmovdqu(xmm0, Address(from, 0), Assembler::AVX_512bit);
806 __ evmovdqu(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit);
807 } else if (UseAVX == 2) {
808 __ vmovdqu(xmm0, Address(from, 0));
809 __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0);
810 __ vmovdqu(xmm1, Address(from, 32));
811 __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
812 } else {
813 __ movdqu(xmm0, Address(from, 0));
814 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
815 __ movdqu(xmm1, Address(from, 16));
816 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
817 __ movdqu(xmm2, Address(from, 32));
818 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
819 __ movdqu(xmm3, Address(from, 48));
820 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
821 }
822 } else {
823 __ movq(xmm0, Address(from, 0));
824 __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
825 __ movq(xmm1, Address(from, 8));
826 __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
827 __ movq(xmm2, Address(from, 16));
828 __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
829 __ movq(xmm3, Address(from, 24));
830 __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
831 __ movq(xmm4, Address(from, 32));
832 __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
833 __ movq(xmm5, Address(from, 40));
834 __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
835 __ movq(xmm6, Address(from, 48));
836 __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
837 __ movq(xmm7, Address(from, 56));
838 __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
839 }
840
841 __ addl(from, 64);
842 __ BIND(L_copy_64_bytes);
843 __ subl(qword_count, 8);
844 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
845
846 if (UseUnalignedLoadStores && (UseAVX == 2)) {
847 // clean upper bits of YMM registers
848 __ vpxor(xmm0, xmm0);
849 __ vpxor(xmm1, xmm1);
850 }
851 __ addl(qword_count, 8);
852 __ jccb(Assembler::zero, L_exit);
853 //
854 // length is too short, just copy qwords
855 //
856 __ BIND(L_copy_8_bytes);
857 __ movq(xmm0, Address(from, 0));
858 __ movq(Address(from, to_from, Address::times_1), xmm0);
859 __ addl(from, 8);
860 __ decrement(qword_count);
861 __ jcc(Assembler::greater, L_copy_8_bytes);
862 __ BIND(L_exit);
863 }
864
865 // Copy 64 bytes chunks
866 //
|