src/cpu/x86/vm/stubGenerator_x86_64.cpp
Index
Unified diffs
Context diffs
Sdiffs
Wdiffs
Patch
New
Old
Previous File
Next File
*** old/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Dec 27 17:06:12 2012
--- new/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Dec 27 17:06:12 2012
*** 1284,1363 ****
--- 1284,1427 ----
// Inputs:
// end_from - source arrays end address
// end_to - destination array end address
// qword_count - 64-bits element count, negative
// to - scratch
! // L_copy_32_bytes - entry label
! // L_copy_bytes - entry label
// L_copy_8_bytes - exit label
//
! void copy_32_bytes_forward(Register end_from, Register end_to,
! void copy_bytes_forward(Register end_from, Register end_to,
Register qword_count, Register to,
! Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
! Label& L_copy_bytes, Label& L_copy_8_bytes) {
DEBUG_ONLY(__ stop("enter at entry label, not here"));
Label L_loop;
__ align(OptoLoopAlignment);
+ if (UseUnalignedLoadStores) {
+ Label L_end;
+ // Copy 64-bytes per iteration
__ BIND(L_loop);
! if(UseUnalignedLoadStores) {
! if (UseAVX >= 2) {
+ __ vmovdqu(xmm0,Address(end_from, qword_count, Address::times_8, -56));
+ __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
+ __ vmovdqu(xmm1,Address(end_from, qword_count, Address::times_8, -24));
+ __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
+ } else {
+ __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
+ __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
+ __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
+ __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
+ }
+ __ BIND(L_copy_bytes);
+ __ addptr(qword_count, 8);
+ __ jcc(Assembler::lessEqual, L_loop);
+ __ subptr(qword_count, 4); // sub(8) and add(4)
+ __ jccb(Assembler::greater, L_end);
+ // Copy trailing 32 bytes
+ if (UseAVX >= 2) {
+ __ vmovdqu(xmm0,Address(end_from, qword_count, Address::times_8, -24));
+ __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
+ } else {
__ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
__ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
__ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
__ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
+ }
+ __ addptr(qword_count, 4);
+ __ BIND(L_end);
} else {
+ // Copy 32-bytes per iteration
+ __ BIND(L_loop);
__ movq(to, Address(end_from, qword_count, Address::times_8, -24));
__ movq(Address(end_to, qword_count, Address::times_8, -24), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, -16));
__ movq(Address(end_to, qword_count, Address::times_8, -16), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
}
! __ BIND(L_copy_32_bytes);
+
! __ BIND(L_copy_bytes);
__ addptr(qword_count, 4);
__ jcc(Assembler::lessEqual, L_loop);
+ }
__ subptr(qword_count, 4);
__ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
}
// Copy big chunks backward
//
// Inputs:
// from - source arrays address
// dest - destination array address
// qword_count - 64-bits element count
// to - scratch
! // L_copy_32_bytes - entry label
! // L_copy_bytes - entry label
// L_copy_8_bytes - exit label
//
! void copy_32_bytes_backward(Register from, Register dest,
! void copy_bytes_backward(Register from, Register dest,
Register qword_count, Register to,
! Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
! Label& L_copy_bytes, Label& L_copy_8_bytes) {
DEBUG_ONLY(__ stop("enter at entry label, not here"));
Label L_loop;
__ align(OptoLoopAlignment);
+ if (UseUnalignedLoadStores) {
+ Label L_end;
+ // Copy 64-bytes per iteration
__ BIND(L_loop);
! if(UseUnalignedLoadStores) {
! if (UseAVX >= 2) {
+ __ vmovdqu(xmm0,Address(from, qword_count, Address::times_8, 32));
+ __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
+ __ vmovdqu(xmm1,Address(from, qword_count, Address::times_8, 0));
+ __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
+ } else {
+ __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
+ __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
+ __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
+ __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
+ }
+ __ BIND(L_copy_bytes);
+ __ subptr(qword_count, 8);
+ __ jcc(Assembler::greaterEqual, L_loop);
+
+ __ addptr(qword_count, 4); // add(8) and sub(4)
+ __ jccb(Assembler::less, L_end);
+ // Copy trailing 32 bytes
+ if (UseAVX >= 2) {
+ __ vmovdqu(xmm0,Address(from, qword_count, Address::times_8, 0));
+ __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
+ } else {
__ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
__ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
__ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
__ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
+ }
+ __ subptr(qword_count, 4);
+ __ BIND(L_end);
} else {
+ // Copy 32-bytes per iteration
+ __ BIND(L_loop);
__ movq(to, Address(from, qword_count, Address::times_8, 24));
__ movq(Address(dest, qword_count, Address::times_8, 24), to);
__ movq(to, Address(from, qword_count, Address::times_8, 16));
__ movq(Address(dest, qword_count, Address::times_8, 16), to);
__ movq(to, Address(from, qword_count, Address::times_8, 8));
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
__ movq(to, Address(from, qword_count, Address::times_8, 0));
__ movq(Address(dest, qword_count, Address::times_8, 0), to);
}
! __ BIND(L_copy_32_bytes);
+
! __ BIND(L_copy_bytes);
__ subptr(qword_count, 4);
__ jcc(Assembler::greaterEqual, L_loop);
+ }
__ addptr(qword_count, 4);
__ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
}
*** 1383,1393 ****
--- 1447,1457 ----
address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
Label L_copy_byte, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register byte_count = rcx;
*** 1415,1425 ****
--- 1479,1489 ----
// Copy from low to high addresses. Use 'to' as scratch.
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count); // make the count negative
! __ jmp(L_copy_32_bytes);
! __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
*** 1458,1469 ****
--- 1522,1533 ----
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
! // Copy in 32-bytes chunks
! copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
! // Copy in multi-bytes chunks
! copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ jmp(L_copy_4_bytes);
return start;
}
*** 1486,1496 ****
--- 1550,1560 ----
address* entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register byte_count = rcx;
const Register qword_count = count;
*** 1529,1542 ****
--- 1593,1606 ----
__ movw(Address(to, byte_count, Address::times_1, -2), rax);
// Check for and copy trailing dword
__ BIND(L_copy_4_bytes);
__ testl(byte_count, 4);
! __ jcc(Assembler::zero, L_copy_32_bytes);
! __ jcc(Assembler::zero, L_copy_bytes);
__ movl(rax, Address(from, qword_count, Address::times_8));
__ movl(Address(to, qword_count, Address::times_8), rax);
! __ jmp(L_copy_32_bytes);
! __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
*** 1547,1558 ****
--- 1611,1622 ----
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
! // Copy in 32-bytes chunks
! copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
! // Copy in multi-bytes chunks
! copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
*** 1583,1593 ****
--- 1647,1657 ----
address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register word_count = rcx;
const Register qword_count = count;
*** 1614,1624 ****
--- 1678,1688 ----
// Copy from low to high addresses. Use 'to' as scratch.
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count);
! __ jmp(L_copy_32_bytes);
! __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
*** 1650,1661 ****
--- 1714,1725 ----
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
! // Copy in 32-bytes chunks
! copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
! // Copy in multi-bytes chunks
! copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ jmp(L_copy_4_bytes);
return start;
}
*** 1698,1708 ****
--- 1762,1772 ----
address *entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes;
! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register word_count = rcx;
const Register qword_count = count;
*** 1733,1746 ****
--- 1797,1810 ----
__ movw(Address(to, word_count, Address::times_2, -2), rax);
// Check for and copy trailing dword
__ BIND(L_copy_4_bytes);
__ testl(word_count, 2);
! __ jcc(Assembler::zero, L_copy_32_bytes);
! __ jcc(Assembler::zero, L_copy_bytes);
__ movl(rax, Address(from, qword_count, Address::times_8));
__ movl(Address(to, qword_count, Address::times_8), rax);
! __ jmp(L_copy_32_bytes);
! __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
*** 1751,1762 ****
--- 1815,1826 ----
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
! // Copy in 32-bytes chunks
! copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
! // Copy in multi-bytes chunks
! copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
*** 1788,1798 ****
--- 1852,1862 ----
const char *name, bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
! Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register dword_count = rcx;
const Register qword_count = count;
*** 1824,1834 ****
--- 1888,1898 ----
// Copy from low to high addresses. Use 'to' as scratch.
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count);
! __ jmp(L_copy_32_bytes);
! __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
*** 1851,1862 ****
--- 1915,1926 ----
inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
! // Copy 32-bytes chunks
! copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
! // Copy in multi-bytes chunks
! copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ jmp(L_copy_4_bytes);
return start;
}
*** 1880,1890 ****
--- 1944,1954 ----
bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
! Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
! Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register dword_count = rcx;
const Register qword_count = count;
*** 1914,1927 ****
--- 1978,1991 ----
// Copy from high to low addresses. Use 'to' as scratch.
// Check for and copy trailing dword
__ testl(dword_count, 1);
! __ jcc(Assembler::zero, L_copy_32_bytes);
! __ jcc(Assembler::zero, L_copy_bytes);
__ movl(rax, Address(from, dword_count, Address::times_4, -4));
__ movl(Address(to, dword_count, Address::times_4, -4), rax);
! __ jmp(L_copy_32_bytes);
! __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
*** 1935,1946 ****
--- 1999,2010 ----
inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
! // Copy in 32-bytes chunks
! copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
! // Copy in multi-bytes chunks
! copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ bind(L_exit);
if (is_oop) {
Register end_to = rdx;
__ leaq(end_to, Address(to, dword_count, Address::times_4, -4));
*** 1974,1984 ****
--- 2038,2048 ----
const char *name, bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
! Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
! Label L_copy_bytes, L_copy_8_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register qword_count = rdx; // elements count
const Register end_from = from; // source array end address
const Register end_to = rcx; // destination array end address
*** 2006,2016 ****
--- 2070,2080 ----
// Copy from low to high addresses. Use 'to' as scratch.
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count);
! __ jmp(L_copy_32_bytes);
! __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
*** 2025,2036 ****
--- 2089,2100 ----
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
}
! // Copy 64-byte chunks
! copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
! // Copy in multi-bytes chunks
! copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
if (is_oop) {
__ BIND(L_exit);
gen_write_ref_array_post_barrier(saved_to, end_to, rax);
}
*** 2063,2073 ****
--- 2127,2137 ----
const char *name, bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
! Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
! Label L_copy_bytes, L_copy_8_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register qword_count = rdx; // elements count
const Register saved_count = rcx;
*** 2089,2099 ****
--- 2153,2163 ----
__ movptr(saved_count, qword_count);
// No registers are destroyed by this call
gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
}
! __ jmp(L_copy_32_bytes);
! __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
*** 2108,2119 ****
--- 2172,2183 ----
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
}
! // Copy in 32-bytes chunks
! copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
! // Copy in multi-bytes chunks
! copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
if (is_oop) {
__ BIND(L_exit);
__ lea(rcx, Address(to, saved_count, Address::times_8, -8));
gen_write_ref_array_post_barrier(to, rcx, rax);
src/cpu/x86/vm/stubGenerator_x86_64.cpp
Index
Unified diffs
Context diffs
Sdiffs
Wdiffs
Patch
New
Old
Previous File
Next File