src/cpu/x86/vm/stubGenerator_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File
*** old/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Dec 27 17:06:12 2012
--- new/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Dec 27 17:06:12 2012

*** 1284,1363 **** --- 1284,1427 ---- // Inputs: // end_from - source arrays end address // end_to - destination array end address // qword_count - 64-bits element count, negative // to - scratch ! // L_copy_32_bytes - entry label ! // L_copy_bytes - entry label // L_copy_8_bytes - exit label // ! void copy_32_bytes_forward(Register end_from, Register end_to, ! void copy_bytes_forward(Register end_from, Register end_to, Register qword_count, Register to, ! Label& L_copy_32_bytes, Label& L_copy_8_bytes) { ! Label& L_copy_bytes, Label& L_copy_8_bytes) { DEBUG_ONLY(__ stop("enter at entry label, not here")); Label L_loop; __ align(OptoLoopAlignment); + if (UseUnalignedLoadStores) { + Label L_end; + // Copy 64-bytes per iteration __ BIND(L_loop); ! if(UseUnalignedLoadStores) { ! if (UseAVX >= 2) { + __ vmovdqu(xmm0,Address(end_from, qword_count, Address::times_8, -56)); + __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); + __ vmovdqu(xmm1,Address(end_from, qword_count, Address::times_8, -24)); + __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); + } else { + __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); + __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1); + __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2); + __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8)); + __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3); + } + __ BIND(L_copy_bytes); + __ addptr(qword_count, 8); + __ jcc(Assembler::lessEqual, L_loop); + __ subptr(qword_count, 4); // sub(8) and add(4) + __ jccb(Assembler::greater, L_end); + // Copy trailing 32 bytes + if (UseAVX >= 2) { + __ vmovdqu(xmm0,Address(end_from, qword_count, Address::times_8, -24)); + __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); + } else { __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); + } + __ addptr(qword_count, 4); + __ BIND(L_end); } else { + // Copy 32-bytes per iteration + __ BIND(L_loop); __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); __ movq(Address(end_to, qword_count, Address::times_8, -24), to); __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); __ movq(Address(end_to, qword_count, Address::times_8, -16), to); __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); } ! __ BIND(L_copy_32_bytes); + ! __ BIND(L_copy_bytes); __ addptr(qword_count, 4); __ jcc(Assembler::lessEqual, L_loop); + } __ subptr(qword_count, 4); __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords } // Copy big chunks backward // // Inputs: // from - source arrays address // dest - destination array address // qword_count - 64-bits element count // to - scratch ! // L_copy_32_bytes - entry label ! // L_copy_bytes - entry label // L_copy_8_bytes - exit label // ! void copy_32_bytes_backward(Register from, Register dest, ! void copy_bytes_backward(Register from, Register dest, Register qword_count, Register to, ! Label& L_copy_32_bytes, Label& L_copy_8_bytes) { ! Label& L_copy_bytes, Label& L_copy_8_bytes) { DEBUG_ONLY(__ stop("enter at entry label, not here")); Label L_loop; __ align(OptoLoopAlignment); + if (UseUnalignedLoadStores) { + Label L_end; + // Copy 64-bytes per iteration __ BIND(L_loop); ! if(UseUnalignedLoadStores) { ! if (UseAVX >= 2) { + __ vmovdqu(xmm0,Address(from, qword_count, Address::times_8, 32)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); + __ vmovdqu(xmm1,Address(from, qword_count, Address::times_8, 0)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + } else { + __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48)); + __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0); + __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32)); + __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1); + __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16)); + __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2); + __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0)); + __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3); + } + __ BIND(L_copy_bytes); + __ subptr(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_loop); + + __ addptr(qword_count, 4); // add(8) and sub(4) + __ jccb(Assembler::less, L_end); + // Copy trailing 32 bytes + if (UseAVX >= 2) { + __ vmovdqu(xmm0,Address(from, qword_count, Address::times_8, 0)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0); + } else { __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + } + __ subptr(qword_count, 4); + __ BIND(L_end); } else { + // Copy 32-bytes per iteration + __ BIND(L_loop); __ movq(to, Address(from, qword_count, Address::times_8, 24)); __ movq(Address(dest, qword_count, Address::times_8, 24), to); __ movq(to, Address(from, qword_count, Address::times_8, 16)); __ movq(Address(dest, qword_count, Address::times_8, 16), to); __ movq(to, Address(from, qword_count, Address::times_8, 8)); __ movq(Address(dest, qword_count, Address::times_8, 8), to); __ movq(to, Address(from, qword_count, Address::times_8, 0)); __ movq(Address(dest, qword_count, Address::times_8, 0), to); } ! __ BIND(L_copy_32_bytes); + ! __ BIND(L_copy_bytes); __ subptr(qword_count, 4); __ jcc(Assembler::greaterEqual, L_loop); + } __ addptr(qword_count, 4); __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords }
*** 1383,1393 **** --- 1447,1457 ---- address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); ! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; ! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; Label L_copy_byte, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count const Register byte_count = rcx;
*** 1415,1425 **** --- 1479,1489 ---- // Copy from low to high addresses. Use 'to' as scratch. __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); __ negptr(qword_count); // make the count negative ! __ jmp(L_copy_32_bytes); ! __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
*** 1458,1469 **** --- 1522,1533 ---- inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); ! // Copy in 32-bytes chunks ! copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); ! // Copy in multi-bytes chunks ! copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); __ jmp(L_copy_4_bytes); return start; }
*** 1486,1496 **** --- 1550,1560 ---- address* entry, const char *name) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); ! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; ! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count const Register byte_count = rcx; const Register qword_count = count;
*** 1529,1542 **** --- 1593,1606 ---- __ movw(Address(to, byte_count, Address::times_1, -2), rax); // Check for and copy trailing dword __ BIND(L_copy_4_bytes); __ testl(byte_count, 4); ! __ jcc(Assembler::zero, L_copy_32_bytes); ! __ jcc(Assembler::zero, L_copy_bytes); __ movl(rax, Address(from, qword_count, Address::times_8)); __ movl(Address(to, qword_count, Address::times_8), rax); ! __ jmp(L_copy_32_bytes); ! __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(from, qword_count, Address::times_8, -8)); __ movq(Address(to, qword_count, Address::times_8, -8), rax);
*** 1547,1558 **** --- 1611,1622 ---- inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); ! // Copy in 32-bytes chunks ! copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); ! // Copy in multi-bytes chunks ! copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); restore_arg_regs(); inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame
*** 1583,1593 **** --- 1647,1657 ---- address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); ! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; ! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count const Register word_count = rcx; const Register qword_count = count;
*** 1614,1624 **** --- 1678,1688 ---- // Copy from low to high addresses. Use 'to' as scratch. __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); __ negptr(qword_count); ! __ jmp(L_copy_32_bytes); ! __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
*** 1650,1661 **** --- 1714,1725 ---- inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); ! // Copy in 32-bytes chunks ! copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); ! // Copy in multi-bytes chunks ! copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); __ jmp(L_copy_4_bytes); return start; }
*** 1698,1708 **** --- 1762,1772 ---- address *entry, const char *name) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); ! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes; ! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count const Register word_count = rcx; const Register qword_count = count;
*** 1733,1746 **** --- 1797,1810 ---- __ movw(Address(to, word_count, Address::times_2, -2), rax); // Check for and copy trailing dword __ BIND(L_copy_4_bytes); __ testl(word_count, 2); ! __ jcc(Assembler::zero, L_copy_32_bytes); ! __ jcc(Assembler::zero, L_copy_bytes); __ movl(rax, Address(from, qword_count, Address::times_8)); __ movl(Address(to, qword_count, Address::times_8), rax); ! __ jmp(L_copy_32_bytes); ! __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(from, qword_count, Address::times_8, -8)); __ movq(Address(to, qword_count, Address::times_8, -8), rax);
*** 1751,1762 **** --- 1815,1826 ---- inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); ! // Copy in 32-bytes chunks ! copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); ! // Copy in multi-bytes chunks ! copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); restore_arg_regs(); inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame
*** 1788,1798 **** --- 1852,1862 ---- const char *name, bool dest_uninitialized = false) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); ! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; ! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count const Register dword_count = rcx; const Register qword_count = count;
*** 1824,1834 **** --- 1888,1898 ---- // Copy from low to high addresses. Use 'to' as scratch. __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); __ negptr(qword_count); ! __ jmp(L_copy_32_bytes); ! __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
*** 1851,1862 **** --- 1915,1926 ---- inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); ! // Copy 32-bytes chunks ! copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); ! // Copy in multi-bytes chunks ! copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); __ jmp(L_copy_4_bytes); return start; }
*** 1880,1890 **** --- 1944,1954 ---- bool dest_uninitialized = false) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); ! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; ! Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count const Register dword_count = rcx; const Register qword_count = count;
*** 1914,1927 **** --- 1978,1991 ---- // Copy from high to low addresses. Use 'to' as scratch. // Check for and copy trailing dword __ testl(dword_count, 1); ! __ jcc(Assembler::zero, L_copy_32_bytes); ! __ jcc(Assembler::zero, L_copy_bytes); __ movl(rax, Address(from, dword_count, Address::times_4, -4)); __ movl(Address(to, dword_count, Address::times_4, -4), rax); ! __ jmp(L_copy_32_bytes); ! __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(from, qword_count, Address::times_8, -8)); __ movq(Address(to, qword_count, Address::times_8, -8), rax);
*** 1935,1946 **** --- 1999,2010 ---- inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); ! // Copy in 32-bytes chunks ! copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); ! // Copy in multi-bytes chunks ! copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); __ bind(L_exit); if (is_oop) { Register end_to = rdx; __ leaq(end_to, Address(to, dword_count, Address::times_4, -4));
*** 1974,1984 **** --- 2038,2048 ---- const char *name, bool dest_uninitialized = false) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); ! Label L_copy_32_bytes, L_copy_8_bytes, L_exit; ! Label L_copy_bytes, L_copy_8_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register qword_count = rdx; // elements count const Register end_from = from; // source array end address const Register end_to = rcx; // destination array end address
*** 2006,2016 **** --- 2070,2080 ---- // Copy from low to high addresses. Use 'to' as scratch. __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); __ negptr(qword_count); ! __ jmp(L_copy_32_bytes); ! __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
*** 2025,2036 **** --- 2089,2100 ---- __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); } ! // Copy 64-byte chunks ! copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); ! // Copy in multi-bytes chunks ! copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); if (is_oop) { __ BIND(L_exit); gen_write_ref_array_post_barrier(saved_to, end_to, rax); }
*** 2063,2073 **** --- 2127,2137 ---- const char *name, bool dest_uninitialized = false) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); ! Label L_copy_32_bytes, L_copy_8_bytes, L_exit; ! Label L_copy_bytes, L_copy_8_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register qword_count = rdx; // elements count const Register saved_count = rcx;
*** 2089,2099 **** --- 2153,2163 ---- __ movptr(saved_count, qword_count); // No registers are destroyed by this call gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized); } ! __ jmp(L_copy_32_bytes); ! __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(from, qword_count, Address::times_8, -8)); __ movq(Address(to, qword_count, Address::times_8, -8), rax);
*** 2108,2119 **** --- 2172,2183 ---- __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); } ! // Copy in 32-bytes chunks ! copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); ! // Copy in multi-bytes chunks ! copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); if (is_oop) { __ BIND(L_exit); __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); gen_write_ref_array_post_barrier(to, rcx, rax);

src/cpu/x86/vm/stubGenerator_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File