818 __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
819 __ movq(xmm3, Address(from, 24));
820 __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
821 __ movq(xmm4, Address(from, 32));
822 __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
823 __ movq(xmm5, Address(from, 40));
824 __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
825 __ movq(xmm6, Address(from, 48));
826 __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
827 __ movq(xmm7, Address(from, 56));
828 __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
829 }
830
831 __ addl(from, 64);
832 __ BIND(L_copy_64_bytes);
833 __ subl(qword_count, 8);
834 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
835
836 if (UseUnalignedLoadStores && (UseAVX >= 2)) {
837 // clean upper bits of YMM registers
838 __ vzeroupper();
839 }
840 __ addl(qword_count, 8);
841 __ jccb(Assembler::zero, L_exit);
842 //
843 // length is too short, just copy qwords
844 //
845 __ BIND(L_copy_8_bytes);
846 __ movq(xmm0, Address(from, 0));
847 __ movq(Address(from, to_from, Address::times_1), xmm0);
848 __ addl(from, 8);
849 __ decrement(qword_count);
850 __ jcc(Assembler::greater, L_copy_8_bytes);
851 __ BIND(L_exit);
852 }
853
854 // Copy 64 bytes chunks
855 //
856 // Inputs:
857 // from - source array address
858 // to_from - destination array address - from
|
818 __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
819 __ movq(xmm3, Address(from, 24));
820 __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
821 __ movq(xmm4, Address(from, 32));
822 __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
823 __ movq(xmm5, Address(from, 40));
824 __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
825 __ movq(xmm6, Address(from, 48));
826 __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
827 __ movq(xmm7, Address(from, 56));
828 __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
829 }
830
831 __ addl(from, 64);
832 __ BIND(L_copy_64_bytes);
833 __ subl(qword_count, 8);
834 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
835
836 if (UseUnalignedLoadStores && (UseAVX >= 2)) {
837 // clean upper bits of YMM registers
838 __ vpxor(xmm0, xmm0);
839 __ vpxor(xmm1, xmm1);
840 }
841 __ addl(qword_count, 8);
842 __ jccb(Assembler::zero, L_exit);
843 //
844 // length is too short, just copy qwords
845 //
846 __ BIND(L_copy_8_bytes);
847 __ movq(xmm0, Address(from, 0));
848 __ movq(Address(from, to_from, Address::times_1), xmm0);
849 __ addl(from, 8);
850 __ decrement(qword_count);
851 __ jcc(Assembler::greater, L_copy_8_bytes);
852 __ BIND(L_exit);
853 }
854
855 // Copy 64 bytes chunks
856 //
857 // Inputs:
858 // from - source array address
859 // to_from - destination array address - from
|