149 const Address result_type (rbp, 4 * wordSize); 150 const Address method (rbp, 5 * wordSize); 151 const Address entry_point (rbp, 6 * wordSize); 152 const Address parameters (rbp, 7 * wordSize); 153 const Address parameter_size(rbp, 8 * wordSize); 154 const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()! 155 sse_save = UseSSE > 0; 156 157 // stub code 158 __ enter(); 159 __ movptr(rcx, parameter_size); // parameter counter 160 __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes 161 __ addptr(rcx, locals_count_in_bytes); // reserve space for register saves 162 __ subptr(rsp, rcx); 163 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 164 165 // save rdi, rsi, & rbx, according to C calling conventions 166 __ movptr(saved_rdi, rdi); 167 __ movptr(saved_rsi, rsi); 168 __ movptr(saved_rbx, rbx); 169 // save and initialize %mxcsr 170 if (sse_save) { 171 Label skip_ldmx; 172 __ stmxcsr(mxcsr_save); 173 __ movl(rax, mxcsr_save); 174 __ andl(rax, MXCSR_MASK); // Only check control and mask bits 175 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); 176 __ cmp32(rax, mxcsr_std); 177 __ jcc(Assembler::equal, skip_ldmx); 178 __ ldmxcsr(mxcsr_std); 179 __ bind(skip_ldmx); 180 } 181 182 // make sure the control word is correct. 183 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 184 185 #ifdef ASSERT 186 // make sure we have no pending exceptions 187 { Label L; 188 __ movptr(rcx, thread); 777 } 778 } 779 780 781 // Copy 64 bytes chunks 782 // 783 // Inputs: 784 // from - source array address 785 // to_from - destination array address - from 786 // qword_count - 8-bytes element count, negative 787 // 788 void xmm_copy_forward(Register from, Register to_from, Register qword_count) { 789 assert( UseSSE >= 2, "supported cpu only" ); 790 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; 791 // Copy 64-byte chunks 792 __ jmpb(L_copy_64_bytes); 793 __ align(OptoLoopAlignment); 794 __ BIND(L_copy_64_bytes_loop); 795 796 if (UseUnalignedLoadStores) { 797 if (UseAVX >= 2) { 798 __ vmovdqu(xmm0, Address(from, 0)); 799 __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); 800 __ vmovdqu(xmm1, Address(from, 32)); 801 __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); 802 } else { 803 __ movdqu(xmm0, Address(from, 0)); 804 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); 805 __ movdqu(xmm1, Address(from, 16)); 806 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); 807 __ movdqu(xmm2, Address(from, 32)); 808 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); 809 __ movdqu(xmm3, Address(from, 48)); 810 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); 811 } 812 } else { 813 __ movq(xmm0, Address(from, 0)); 814 __ movq(Address(from, to_from, Address::times_1, 0), xmm0); 815 __ movq(xmm1, Address(from, 8)); 816 __ movq(Address(from, to_from, Address::times_1, 8), xmm1); 817 __ movq(xmm2, Address(from, 16)); 818 __ movq(Address(from, to_from, Address::times_1, 16), xmm2); 819 __ movq(xmm3, Address(from, 24)); 820 __ movq(Address(from, to_from, Address::times_1, 24), xmm3); 821 __ movq(xmm4, Address(from, 32)); 822 __ movq(Address(from, to_from, Address::times_1, 32), xmm4); 823 __ movq(xmm5, Address(from, 40)); 824 __ movq(Address(from, to_from, Address::times_1, 40), xmm5); 825 __ movq(xmm6, Address(from, 48)); 826 __ movq(Address(from, to_from, Address::times_1, 48), xmm6); 827 __ movq(xmm7, Address(from, 56)); 828 __ movq(Address(from, to_from, Address::times_1, 56), xmm7); 829 } 830 831 __ addl(from, 64); 832 __ BIND(L_copy_64_bytes); 833 __ subl(qword_count, 8); 834 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); 835 836 if (UseUnalignedLoadStores && (UseAVX >= 2)) { 837 // clean upper bits of YMM registers 838 __ vzeroupper(); 839 } 840 __ addl(qword_count, 8); 841 __ jccb(Assembler::zero, L_exit); 842 // 843 // length is too short, just copy qwords 844 // 845 __ BIND(L_copy_8_bytes); 846 __ movq(xmm0, Address(from, 0)); 847 __ movq(Address(from, to_from, Address::times_1), xmm0); 848 __ addl(from, 8); 849 __ decrement(qword_count); 850 __ jcc(Assembler::greater, L_copy_8_bytes); 851 __ BIND(L_exit); 852 } 853 854 // Copy 64 bytes chunks 855 // 856 // Inputs: | 149 const Address result_type (rbp, 4 * wordSize); 150 const Address method (rbp, 5 * wordSize); 151 const Address entry_point (rbp, 6 * wordSize); 152 const Address parameters (rbp, 7 * wordSize); 153 const Address parameter_size(rbp, 8 * wordSize); 154 const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()! 155 sse_save = UseSSE > 0; 156 157 // stub code 158 __ enter(); 159 __ movptr(rcx, parameter_size); // parameter counter 160 __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes 161 __ addptr(rcx, locals_count_in_bytes); // reserve space for register saves 162 __ subptr(rsp, rcx); 163 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 164 165 // save rdi, rsi, & rbx, according to C calling conventions 166 __ movptr(saved_rdi, rdi); 167 __ movptr(saved_rsi, rsi); 168 __ movptr(saved_rbx, rbx); 169 170 // provide initial value for required masks 171 if (UseAVX > 2) { 172 __ movl(rbx, 0xffff); 173 __ kmovdl(k1, rbx); 174 } 175 176 // save and initialize %mxcsr 177 if (sse_save) { 178 Label skip_ldmx; 179 __ stmxcsr(mxcsr_save); 180 __ movl(rax, mxcsr_save); 181 __ andl(rax, MXCSR_MASK); // Only check control and mask bits 182 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); 183 __ cmp32(rax, mxcsr_std); 184 __ jcc(Assembler::equal, skip_ldmx); 185 __ ldmxcsr(mxcsr_std); 186 __ bind(skip_ldmx); 187 } 188 189 // make sure the control word is correct. 190 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 191 192 #ifdef ASSERT 193 // make sure we have no pending exceptions 194 { Label L; 195 __ movptr(rcx, thread); 784 } 785 } 786 787 788 // Copy 64 bytes chunks 789 // 790 // Inputs: 791 // from - source array address 792 // to_from - destination array address - from 793 // qword_count - 8-bytes element count, negative 794 // 795 void xmm_copy_forward(Register from, Register to_from, Register qword_count) { 796 assert( UseSSE >= 2, "supported cpu only" ); 797 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; 798 // Copy 64-byte chunks 799 __ jmpb(L_copy_64_bytes); 800 __ align(OptoLoopAlignment); 801 __ BIND(L_copy_64_bytes_loop); 802 803 if (UseUnalignedLoadStores) { 804 if (UseAVX > 2) { 805 __ evmovdqu(xmm0, Address(from, 0), Assembler::AVX_512bit); 806 __ evmovdqu(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit); 807 } else if (UseAVX == 2) { 808 __ vmovdqu(xmm0, Address(from, 0)); 809 __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); 810 __ vmovdqu(xmm1, Address(from, 32)); 811 __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); 812 } else { 813 __ movdqu(xmm0, Address(from, 0)); 814 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); 815 __ movdqu(xmm1, Address(from, 16)); 816 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); 817 __ movdqu(xmm2, Address(from, 32)); 818 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); 819 __ movdqu(xmm3, Address(from, 48)); 820 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); 821 } 822 } else { 823 __ movq(xmm0, Address(from, 0)); 824 __ movq(Address(from, to_from, Address::times_1, 0), xmm0); 825 __ movq(xmm1, Address(from, 8)); 826 __ movq(Address(from, to_from, Address::times_1, 8), xmm1); 827 __ movq(xmm2, Address(from, 16)); 828 __ movq(Address(from, to_from, Address::times_1, 16), xmm2); 829 __ movq(xmm3, Address(from, 24)); 830 __ movq(Address(from, to_from, Address::times_1, 24), xmm3); 831 __ movq(xmm4, Address(from, 32)); 832 __ movq(Address(from, to_from, Address::times_1, 32), xmm4); 833 __ movq(xmm5, Address(from, 40)); 834 __ movq(Address(from, to_from, Address::times_1, 40), xmm5); 835 __ movq(xmm6, Address(from, 48)); 836 __ movq(Address(from, to_from, Address::times_1, 48), xmm6); 837 __ movq(xmm7, Address(from, 56)); 838 __ movq(Address(from, to_from, Address::times_1, 56), xmm7); 839 } 840 841 __ addl(from, 64); 842 __ BIND(L_copy_64_bytes); 843 __ subl(qword_count, 8); 844 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); 845 846 if (UseUnalignedLoadStores && (UseAVX == 2)) { 847 // clean upper bits of YMM registers 848 __ vzeroupper(); 849 } 850 __ addl(qword_count, 8); 851 __ jccb(Assembler::zero, L_exit); 852 // 853 // length is too short, just copy qwords 854 // 855 __ BIND(L_copy_8_bytes); 856 __ movq(xmm0, Address(from, 0)); 857 __ movq(Address(from, to_from, Address::times_1), xmm0); 858 __ addl(from, 8); 859 __ decrement(qword_count); 860 __ jcc(Assembler::greater, L_copy_8_bytes); 861 __ BIND(L_exit); 862 } 863 864 // Copy 64 bytes chunks 865 // 866 // Inputs: |