hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
Print this page
rev 611 : Merge
@@ -1,10 +1,7 @@
-#ifdef USE_PRAGMA_IDENT_SRC
-#pragma ident "@(#)stubGenerator_x86_64.cpp 1.49 07/10/05 19:12:48 JVM"
-#endif
/*
- * Copyright 2003-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
@@ -31,10 +28,12 @@
// Declaration and definition of StubGenerator (no .hpp file).
// For a more detailed description of the stub routine structure
// see the comment in stubRoutines.hpp
#define __ _masm->
+#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
+#define a__ ((Assembler*)_masm)->
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#else
#define BLOCK_COMMENT(str) __ block_comment(str)
@@ -210,59 +209,60 @@
const Address r12_save(rbp, r12_off * wordSize);
const Address rbx_save(rbp, rbx_off * wordSize);
// stub code
__ enter();
- __ subq(rsp, -rsp_after_call_off * wordSize);
+ __ subptr(rsp, -rsp_after_call_off * wordSize);
// save register parameters
#ifndef _WIN64
- __ movq(parameters, c_rarg5); // parameters
- __ movq(entry_point, c_rarg4); // entry_point
+ __ movptr(parameters, c_rarg5); // parameters
+ __ movptr(entry_point, c_rarg4); // entry_point
#endif
- __ movq(method, c_rarg3); // method
+ __ movptr(method, c_rarg3); // method
__ movl(result_type, c_rarg2); // result type
- __ movq(result, c_rarg1); // result
- __ movq(call_wrapper, c_rarg0); // call wrapper
+ __ movptr(result, c_rarg1); // result
+ __ movptr(call_wrapper, c_rarg0); // call wrapper
// save regs belonging to calling function
- __ movq(rbx_save, rbx);
- __ movq(r12_save, r12);
- __ movq(r13_save, r13);
- __ movq(r14_save, r14);
- __ movq(r15_save, r15);
+ __ movptr(rbx_save, rbx);
+ __ movptr(r12_save, r12);
+ __ movptr(r13_save, r13);
+ __ movptr(r14_save, r14);
+ __ movptr(r15_save, r15);
#ifdef _WIN64
const Address rdi_save(rbp, rdi_off * wordSize);
const Address rsi_save(rbp, rsi_off * wordSize);
- __ movq(rsi_save, rsi);
- __ movq(rdi_save, rdi);
+ __ movptr(rsi_save, rsi);
+ __ movptr(rdi_save, rdi);
#else
const Address mxcsr_save(rbp, mxcsr_off * wordSize);
{
Label skip_ldmx;
__ stmxcsr(mxcsr_save);
__ movl(rax, mxcsr_save);
__ andl(rax, MXCSR_MASK); // Only check control and mask bits
- ExternalAddress mxcsr_std(StubRoutines::amd64::mxcsr_std());
+ ExternalAddress mxcsr_std(StubRoutines::x86::mxcsr_std());
__ cmp32(rax, mxcsr_std);
__ jcc(Assembler::equal, skip_ldmx);
__ ldmxcsr(mxcsr_std);
__ bind(skip_ldmx);
}
#endif
// Load up thread register
- __ movq(r15_thread, thread);
+ __ movptr(r15_thread, thread);
+ __ reinit_heapbase();
#ifdef ASSERT
// make sure we have no pending exceptions
{
Label L;
- __ cmpq(Address(r15_thread, Thread::pending_exception_offset()), (int)NULL_WORD);
+ __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, L);
__ stop("StubRoutines::call_stub: entered with pending exception");
__ bind(L);
}
#endif
@@ -273,38 +273,38 @@
__ movl(c_rarg3, parameter_size);
__ testl(c_rarg3, c_rarg3);
__ jcc(Assembler::zero, parameters_done);
Label loop;
- __ movq(c_rarg2, parameters); // parameter pointer
+ __ movptr(c_rarg2, parameters); // parameter pointer
__ movl(c_rarg1, c_rarg3); // parameter counter is in c_rarg1
__ BIND(loop);
if (TaggedStackInterpreter) {
- __ movq(rax, Address(c_rarg2, 0)); // get tag
- __ addq(c_rarg2, wordSize); // advance to next tag
- __ pushq(rax); // pass tag
+ __ movl(rax, Address(c_rarg2, 0)); // get tag
+ __ addptr(c_rarg2, wordSize); // advance to next tag
+ __ push(rax); // pass tag
}
- __ movq(rax, Address(c_rarg2, 0)); // get parameter
- __ addq(c_rarg2, wordSize); // advance to next parameter
+ __ movptr(rax, Address(c_rarg2, 0));// get parameter
+ __ addptr(c_rarg2, wordSize); // advance to next parameter
__ decrementl(c_rarg1); // decrement counter
- __ pushq(rax); // pass parameter
+ __ push(rax); // pass parameter
__ jcc(Assembler::notZero, loop);
// call Java function
__ BIND(parameters_done);
- __ movq(rbx, method); // get methodOop
- __ movq(c_rarg1, entry_point); // get entry_point
- __ movq(r13, rsp); // set sender sp
+ __ movptr(rbx, method); // get methodOop
+ __ movptr(c_rarg1, entry_point); // get entry_point
+ __ mov(r13, rsp); // set sender sp
BLOCK_COMMENT("call Java function");
__ call(c_rarg1);
BLOCK_COMMENT("call_stub_return_address:");
return_address = __ pc();
// store result depending on type (everything that is not
// T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
- __ movq(c_rarg0, result);
+ __ movptr(c_rarg0, result);
Label is_long, is_float, is_double, exit;
__ movl(c_rarg1, result_type);
__ cmpl(c_rarg1, T_OBJECT);
__ jcc(Assembler::equal, is_long);
__ cmpl(c_rarg1, T_LONG);
@@ -318,47 +318,47 @@
__ movl(Address(c_rarg0, 0), rax);
__ BIND(exit);
// pop parameters
- __ leaq(rsp, rsp_after_call);
+ __ lea(rsp, rsp_after_call);
#ifdef ASSERT
// verify that threads correspond
{
Label L, S;
- __ cmpq(r15_thread, thread);
+ __ cmpptr(r15_thread, thread);
__ jcc(Assembler::notEqual, S);
__ get_thread(rbx);
- __ cmpq(r15_thread, rbx);
+ __ cmpptr(r15_thread, rbx);
__ jcc(Assembler::equal, L);
__ bind(S);
__ jcc(Assembler::equal, L);
__ stop("StubRoutines::call_stub: threads must correspond");
__ bind(L);
}
#endif
// restore regs belonging to calling function
- __ movq(r15, r15_save);
- __ movq(r14, r14_save);
- __ movq(r13, r13_save);
- __ movq(r12, r12_save);
- __ movq(rbx, rbx_save);
+ __ movptr(r15, r15_save);
+ __ movptr(r14, r14_save);
+ __ movptr(r13, r13_save);
+ __ movptr(r12, r12_save);
+ __ movptr(rbx, rbx_save);
#ifdef _WIN64
- __ movq(rdi, rdi_save);
- __ movq(rsi, rsi_save);
+ __ movptr(rdi, rdi_save);
+ __ movptr(rsi, rsi_save);
#else
__ ldmxcsr(mxcsr_save);
#endif
// restore rsp
- __ addq(rsp, -rsp_after_call_off * wordSize);
+ __ addptr(rsp, -rsp_after_call_off * wordSize);
// return
- __ popq(rbp);
+ __ pop(rbp);
__ ret(0);
// handle return types different from T_INT
__ BIND(is_long);
__ movq(Address(c_rarg0, 0), rax);
@@ -397,27 +397,27 @@
#ifdef ASSERT
// verify that threads correspond
{
Label L, S;
- __ cmpq(r15_thread, thread);
+ __ cmpptr(r15_thread, thread);
__ jcc(Assembler::notEqual, S);
__ get_thread(rbx);
- __ cmpq(r15_thread, rbx);
+ __ cmpptr(r15_thread, rbx);
__ jcc(Assembler::equal, L);
__ bind(S);
__ stop("StubRoutines::catch_exception: threads must correspond");
__ bind(L);
}
#endif
// set pending exception
__ verify_oop(rax);
- __ movq(Address(r15_thread, Thread::pending_exception_offset()), rax);
+ __ movptr(Address(r15_thread, Thread::pending_exception_offset()), rax);
__ lea(rscratch1, ExternalAddress((address)__FILE__));
- __ movq(Address(r15_thread, Thread::exception_file_offset()), rscratch1);
+ __ movptr(Address(r15_thread, Thread::exception_file_offset()), rscratch1);
__ movl(Address(r15_thread, Thread::exception_line_offset()), (int) __LINE__);
// complete return to VM
assert(StubRoutines::_call_stub_return_address != NULL,
"_call_stub_return_address must have been generated before");
@@ -452,35 +452,35 @@
#ifdef ASSERT
// make sure this code is only executed if there is a pending exception
{
Label L;
- __ cmpq(Address(r15_thread, Thread::pending_exception_offset()), (int) NULL);
+ __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t) NULL);
__ jcc(Assembler::notEqual, L);
__ stop("StubRoutines::forward exception: no pending exception (1)");
__ bind(L);
}
#endif
// compute exception handler into rbx
- __ movq(c_rarg0, Address(rsp, 0));
+ __ movptr(c_rarg0, Address(rsp, 0));
BLOCK_COMMENT("call exception_handler_for_return_address");
__ call_VM_leaf(CAST_FROM_FN_PTR(address,
SharedRuntime::exception_handler_for_return_address),
c_rarg0);
- __ movq(rbx, rax);
+ __ mov(rbx, rax);
// setup rax & rdx, remove return address & clear pending exception
- __ popq(rdx);
- __ movq(rax, Address(r15_thread, Thread::pending_exception_offset()));
+ __ pop(rdx);
+ __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
__ movptr(Address(r15_thread, Thread::pending_exception_offset()), (int)NULL_WORD);
#ifdef ASSERT
// make sure exception is set
{
Label L;
- __ testq(rax, rax);
+ __ testptr(rax, rax);
__ jcc(Assembler::notEqual, L);
__ stop("StubRoutines::forward exception: no pending exception (2)");
__ bind(L);
}
#endif
@@ -524,12 +524,12 @@
// *dest <- ex, return (orig *dest)
address generate_atomic_xchg_ptr() {
StubCodeMark mark(this, "StubRoutines", "atomic_xchg_ptr");
address start = __ pc();
- __ movq(rax, c_rarg0); // Copy to eax we need a return value anyhow
- __ xchgq(rax, Address(c_rarg1, 0)); // automatic LOCK
+ __ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
+ __ xchgptr(rax, Address(c_rarg1, 0)); // automatic LOCK
__ ret(0);
return start;
}
@@ -618,14 +618,14 @@
// return *dest;
address generate_atomic_add_ptr() {
StubCodeMark mark(this, "StubRoutines", "atomic_add_ptr");
address start = __ pc();
- __ movq(rax, c_rarg0); // Copy to eax we need a return value anyhow
+ __ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
if ( os::is_MP() ) __ lock();
- __ xaddl(Address(c_rarg1, 0), c_rarg0);
- __ addl(rax, c_rarg0);
+ __ xaddptr(Address(c_rarg1, 0), c_rarg0);
+ __ addptr(rax, c_rarg0);
__ ret(0);
return start;
}
@@ -654,13 +654,13 @@
const Address old_fp(rbp, 0);
const Address older_fp(rax, 0);
address start = __ pc();
__ enter();
- __ movq(rax, old_fp); // callers fp
- __ movq(rax, older_fp); // the frame for ps()
- __ popq(rbp);
+ __ movptr(rax, old_fp); // callers fp
+ __ movptr(rax, older_fp); // the frame for ps()
+ __ pop(rbp);
__ ret(0);
return start;
}
@@ -677,25 +677,25 @@
const Address mxcsr_save(rsp, 0);
if (CheckJNICalls) {
Label ok_ret;
- __ pushq(rax);
- __ subq(rsp, wordSize); // allocate a temp location
+ __ push(rax);
+ __ subptr(rsp, wordSize); // allocate a temp location
__ stmxcsr(mxcsr_save);
__ movl(rax, mxcsr_save);
__ andl(rax, MXCSR_MASK); // Only check control and mask bits
- __ cmpl(rax, *(int *)(StubRoutines::amd64::mxcsr_std()));
+ __ cmpl(rax, *(int *)(StubRoutines::x86::mxcsr_std()));
__ jcc(Assembler::equal, ok_ret);
__ warn("MXCSR changed by native JNI code, use -XX:+RestoreMXCSROnJNICall");
- __ ldmxcsr(ExternalAddress(StubRoutines::amd64::mxcsr_std()));
+ __ ldmxcsr(ExternalAddress(StubRoutines::x86::mxcsr_std()));
__ bind(ok_ret);
- __ addq(rsp, wordSize);
- __ popq(rax);
+ __ addptr(rsp, wordSize);
+ __ pop(rax);
}
__ ret(0);
return start;
@@ -707,14 +707,14 @@
address start = __ pc();
Label L;
- __ pushq(rax);
- __ pushq(c_rarg3);
- __ pushq(c_rarg2);
- __ pushq(c_rarg1);
+ __ push(rax);
+ __ push(c_rarg3);
+ __ push(c_rarg2);
+ __ push(c_rarg1);
__ movl(rax, 0x7f800000);
__ xorl(c_rarg3, c_rarg3);
__ movl(c_rarg2, inout);
__ movl(c_rarg1, c_rarg2);
@@ -725,16 +725,16 @@
__ movl(c_rarg3, 0x80000000);
__ movl(rax, 0x7fffffff);
__ cmovl(Assembler::positive, c_rarg3, rax);
__ bind(L);
- __ movq(inout, c_rarg3);
+ __ movptr(inout, c_rarg3);
- __ popq(c_rarg1);
- __ popq(c_rarg2);
- __ popq(c_rarg3);
- __ popq(rax);
+ __ pop(c_rarg1);
+ __ pop(c_rarg2);
+ __ pop(c_rarg3);
+ __ pop(rax);
__ ret(0);
return start;
}
@@ -744,14 +744,14 @@
Address inout(rsp, 5 * wordSize); // return address + 4 saves
address start = __ pc();
Label L;
- __ pushq(rax);
- __ pushq(c_rarg3);
- __ pushq(c_rarg2);
- __ pushq(c_rarg1);
+ __ push(rax);
+ __ push(c_rarg3);
+ __ push(c_rarg2);
+ __ push(c_rarg1);
__ movl(rax, 0x7f800000);
__ xorl(c_rarg3, c_rarg3);
__ movl(c_rarg2, inout);
__ movl(c_rarg1, c_rarg2);
@@ -759,19 +759,19 @@
__ cmpl(rax, c_rarg1); // NaN? -> 0
__ jcc(Assembler::negative, L);
__ testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong
__ mov64(c_rarg3, 0x8000000000000000);
__ mov64(rax, 0x7fffffffffffffff);
- __ cmovq(Assembler::positive, c_rarg3, rax);
+ __ cmov(Assembler::positive, c_rarg3, rax);
__ bind(L);
- __ movq(inout, c_rarg3);
+ __ movptr(inout, c_rarg3);
- __ popq(c_rarg1);
- __ popq(c_rarg2);
- __ popq(c_rarg3);
- __ popq(rax);
+ __ pop(c_rarg1);
+ __ pop(c_rarg2);
+ __ pop(c_rarg3);
+ __ pop(rax);
__ ret(0);
return start;
}
@@ -782,43 +782,43 @@
address start = __ pc();
Label L;
- __ pushq(rax);
- __ pushq(c_rarg3);
- __ pushq(c_rarg2);
- __ pushq(c_rarg1);
- __ pushq(c_rarg0);
+ __ push(rax);
+ __ push(c_rarg3);
+ __ push(c_rarg2);
+ __ push(c_rarg1);
+ __ push(c_rarg0);
__ movl(rax, 0x7ff00000);
__ movq(c_rarg2, inout);
__ movl(c_rarg3, c_rarg2);
- __ movq(c_rarg1, c_rarg2);
- __ movq(c_rarg0, c_rarg2);
+ __ mov(c_rarg1, c_rarg2);
+ __ mov(c_rarg0, c_rarg2);
__ negl(c_rarg3);
- __ shrq(c_rarg1, 0x20);
+ __ shrptr(c_rarg1, 0x20);
__ orl(c_rarg3, c_rarg2);
__ andl(c_rarg1, 0x7fffffff);
__ xorl(c_rarg2, c_rarg2);
__ shrl(c_rarg3, 0x1f);
__ orl(c_rarg1, c_rarg3);
__ cmpl(rax, c_rarg1);
__ jcc(Assembler::negative, L); // NaN -> 0
- __ testq(c_rarg0, c_rarg0); // signed ? min_jint : max_jint
+ __ testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint
__ movl(c_rarg2, 0x80000000);
__ movl(rax, 0x7fffffff);
- __ cmovl(Assembler::positive, c_rarg2, rax);
+ __ cmov(Assembler::positive, c_rarg2, rax);
__ bind(L);
- __ movq(inout, c_rarg2);
+ __ movptr(inout, c_rarg2);
- __ popq(c_rarg0);
- __ popq(c_rarg1);
- __ popq(c_rarg2);
- __ popq(c_rarg3);
- __ popq(rax);
+ __ pop(c_rarg0);
+ __ pop(c_rarg1);
+ __ pop(c_rarg2);
+ __ pop(c_rarg3);
+ __ pop(rax);
__ ret(0);
return start;
}
@@ -829,23 +829,23 @@
address start = __ pc();
Label L;
- __ pushq(rax);
- __ pushq(c_rarg3);
- __ pushq(c_rarg2);
- __ pushq(c_rarg1);
- __ pushq(c_rarg0);
+ __ push(rax);
+ __ push(c_rarg3);
+ __ push(c_rarg2);
+ __ push(c_rarg1);
+ __ push(c_rarg0);
__ movl(rax, 0x7ff00000);
__ movq(c_rarg2, inout);
__ movl(c_rarg3, c_rarg2);
- __ movq(c_rarg1, c_rarg2);
- __ movq(c_rarg0, c_rarg2);
+ __ mov(c_rarg1, c_rarg2);
+ __ mov(c_rarg0, c_rarg2);
__ negl(c_rarg3);
- __ shrq(c_rarg1, 0x20);
+ __ shrptr(c_rarg1, 0x20);
__ orl(c_rarg3, c_rarg2);
__ andl(c_rarg1, 0x7fffffff);
__ xorl(c_rarg2, c_rarg2);
__ shrl(c_rarg3, 0x1f);
__ orl(c_rarg1, c_rarg3);
@@ -857,15 +857,15 @@
__ cmovq(Assembler::positive, c_rarg2, rax);
__ bind(L);
__ movq(inout, c_rarg2);
- __ popq(c_rarg0);
- __ popq(c_rarg1);
- __ popq(c_rarg2);
- __ popq(c_rarg3);
- __ popq(rax);
+ __ pop(c_rarg0);
+ __ pop(c_rarg1);
+ __ pop(c_rarg2);
+ __ pop(c_rarg3);
+ __ pop(rax);
__ ret(0);
return start;
}
@@ -888,21 +888,21 @@
// SIGBUS/OBJERR.)
address generate_handler_for_unsafe_access() {
StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
address start = __ pc();
- __ pushq(0); // hole for return address-to-be
- __ pushaq(); // push registers
+ __ push(0); // hole for return address-to-be
+ __ pusha(); // push registers
Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord);
- __ subq(rsp, frame::arg_reg_save_area_bytes);
+ __ subptr(rsp, frame::arg_reg_save_area_bytes);
BLOCK_COMMENT("call handle_unsafe_access");
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access)));
- __ addq(rsp, frame::arg_reg_save_area_bytes);
+ __ addptr(rsp, frame::arg_reg_save_area_bytes);
- __ movq(next_pc, rax); // stuff next address
- __ popaq();
+ __ movptr(next_pc, rax); // stuff next address
+ __ popa();
__ ret(0); // jump to next address
return start;
}
@@ -912,101 +912,123 @@
// all args on stack!
//
// Stack after saving c_rarg3:
// [tos + 0]: saved c_rarg3
// [tos + 1]: saved c_rarg2
- // [tos + 2]: saved flags
- // [tos + 3]: return address
- // * [tos + 4]: error message (char*)
- // * [tos + 5]: object to verify (oop)
- // * [tos + 6]: saved rax - saved by caller and bashed
+ // [tos + 2]: saved r12 (several TemplateTable methods use it)
+ // [tos + 3]: saved flags
+ // [tos + 4]: return address
+ // * [tos + 5]: error message (char*)
+ // * [tos + 6]: object to verify (oop)
+ // * [tos + 7]: saved rax - saved by caller and bashed
// * = popped on exit
address generate_verify_oop() {
StubCodeMark mark(this, "StubRoutines", "verify_oop");
address start = __ pc();
Label exit, error;
- __ pushfq();
+ __ pushf();
__ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
+ __ push(r12);
+
// save c_rarg2 and c_rarg3
- __ pushq(c_rarg2);
- __ pushq(c_rarg3);
+ __ push(c_rarg2);
+ __ push(c_rarg3);
+
+ enum {
+ // After previous pushes.
+ oop_to_verify = 6 * wordSize,
+ saved_rax = 7 * wordSize,
+
+ // Before the call to MacroAssembler::debug(), see below.
+ return_addr = 16 * wordSize,
+ error_msg = 17 * wordSize
+ };
// get object
- __ movq(rax, Address(rsp, 5 * wordSize));
+ __ movptr(rax, Address(rsp, oop_to_verify));
// make sure object is 'reasonable'
- __ testq(rax, rax);
+ __ testptr(rax, rax);
__ jcc(Assembler::zero, exit); // if obj is NULL it is OK
// Check if the oop is in the right area of memory
- __ movq(c_rarg2, rax);
+ __ movptr(c_rarg2, rax);
__ movptr(c_rarg3, (int64_t) Universe::verify_oop_mask());
- __ andq(c_rarg2, c_rarg3);
+ __ andptr(c_rarg2, c_rarg3);
__ movptr(c_rarg3, (int64_t) Universe::verify_oop_bits());
- __ cmpq(c_rarg2, c_rarg3);
+ __ cmpptr(c_rarg2, c_rarg3);
__ jcc(Assembler::notZero, error);
+ // set r12 to heapbase for load_klass()
+ __ reinit_heapbase();
+
// make sure klass is 'reasonable'
- __ movq(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
- __ testq(rax, rax);
+ __ load_klass(rax, rax); // get klass
+ __ testptr(rax, rax);
__ jcc(Assembler::zero, error); // if klass is NULL it is broken
// Check if the klass is in the right area of memory
- __ movq(c_rarg2, rax);
+ __ mov(c_rarg2, rax);
__ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
- __ andq(c_rarg2, c_rarg3);
+ __ andptr(c_rarg2, c_rarg3);
__ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits());
- __ cmpq(c_rarg2, c_rarg3);
+ __ cmpptr(c_rarg2, c_rarg3);
__ jcc(Assembler::notZero, error);
// make sure klass' klass is 'reasonable'
- __ movq(rax, Address(rax, oopDesc::klass_offset_in_bytes()));
- __ testq(rax, rax);
+ __ load_klass(rax, rax);
+ __ testptr(rax, rax);
__ jcc(Assembler::zero, error); // if klass' klass is NULL it is broken
// Check if the klass' klass is in the right area of memory
__ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
- __ andq(rax, c_rarg3);
+ __ andptr(rax, c_rarg3);
__ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits());
- __ cmpq(rax, c_rarg3);
+ __ cmpptr(rax, c_rarg3);
__ jcc(Assembler::notZero, error);
// return if everything seems ok
__ bind(exit);
- __ movq(rax, Address(rsp, 6 * wordSize)); // get saved rax back
- __ popq(c_rarg3); // restore c_rarg3
- __ popq(c_rarg2); // restore c_rarg2
- __ popfq(); // restore flags
+ __ movptr(rax, Address(rsp, saved_rax)); // get saved rax back
+ __ pop(c_rarg3); // restore c_rarg3
+ __ pop(c_rarg2); // restore c_rarg2
+ __ pop(r12); // restore r12
+ __ popf(); // restore flags
__ ret(3 * wordSize); // pop caller saved stuff
// handle errors
__ bind(error);
- __ movq(rax, Address(rsp, 6 * wordSize)); // get saved rax back
- __ popq(c_rarg3); // get saved c_rarg3 back
- __ popq(c_rarg2); // get saved c_rarg2 back
- __ popfq(); // get saved flags off stack --
+ __ movptr(rax, Address(rsp, saved_rax)); // get saved rax back
+ __ pop(c_rarg3); // get saved c_rarg3 back
+ __ pop(c_rarg2); // get saved c_rarg2 back
+ __ pop(r12); // get saved r12 back
+ __ popf(); // get saved flags off stack --
// will be ignored
- __ pushaq(); // push registers
+ __ pusha(); // push registers
// (rip is already
// already pushed)
- // debug(char* msg, int64_t regs[])
+ // debug(char* msg, int64_t pc, int64_t regs[])
// We've popped the registers we'd saved (c_rarg3, c_rarg2 and flags), and
// pushed all the registers, so now the stack looks like:
// [tos + 0] 16 saved registers
// [tos + 16] return address
- // [tos + 17] error message (char*)
+ // * [tos + 17] error message (char*)
+ // * [tos + 18] object to verify (oop)
+ // * [tos + 19] saved rax - saved by caller and bashed
+ // * = popped on exit
- __ movq(c_rarg0, Address(rsp, 17 * wordSize)); // pass address of error message
- __ movq(c_rarg1, rsp); // pass address of regs on stack
- __ movq(r12, rsp); // remember rsp
- __ subq(rsp, frame::arg_reg_save_area_bytes);// windows
- __ andq(rsp, -16); // align stack as required by ABI
+ __ movptr(c_rarg0, Address(rsp, error_msg)); // pass address of error message
+ __ movptr(c_rarg1, Address(rsp, return_addr)); // pass return address
+ __ movq(c_rarg2, rsp); // pass address of regs on stack
+ __ mov(r12, rsp); // remember rsp
+ __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
+ __ andptr(rsp, -16); // align stack as required by ABI
BLOCK_COMMENT("call MacroAssembler::debug");
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug)));
- __ movq(rsp, r12); // restore rsp
- __ popaq(); // pop registers
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
+ __ mov(rsp, r12); // restore rsp
+ __ popa(); // pop registers (includes r12)
__ ret(3 * wordSize); // pop caller saved stuff
return start;
}
@@ -1036,11 +1058,11 @@
#ifdef ASSERT
Label L;
assert_different_registers(Rtmp, Rint);
__ movslq(Rtmp, Rint);
__ cmpq(Rtmp, Rint);
- __ jccb(Assembler::equal, L);
+ __ jcc(Assembler::equal, L);
__ stop("high 32-bits of int value are not 0");
__ bind(L);
#endif
}
@@ -1065,20 +1087,20 @@
const Register from = c_rarg0;
const Register to = c_rarg1;
const Register count = c_rarg2;
const Register end_from = rax;
- __ cmpq(to, from);
- __ leaq(end_from, Address(from, count, sf, 0));
+ __ cmpptr(to, from);
+ __ lea(end_from, Address(from, count, sf, 0));
if (NOLp == NULL) {
ExternalAddress no_overlap(no_overlap_target);
__ jump_cc(Assembler::belowEqual, no_overlap);
- __ cmpq(to, end_from);
+ __ cmpptr(to, end_from);
__ jump_cc(Assembler::aboveEqual, no_overlap);
} else {
__ jcc(Assembler::belowEqual, (*NOLp));
- __ cmpq(to, end_from);
+ __ cmpptr(to, end_from);
__ jcc(Assembler::aboveEqual, (*NOLp));
}
}
// Shuffle first three arg regs on Windows into Linux/Solaris locations.
@@ -1098,30 +1120,30 @@
assert(nargs == 3 || nargs == 4, "else fix");
#ifdef _WIN64
assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
"unexpected argument registers");
if (nargs >= 4)
- __ movq(rax, r9); // r9 is also saved_rdi
- __ movq(saved_rdi, rdi);
- __ movq(saved_rsi, rsi);
- __ movq(rdi, rcx); // c_rarg0
- __ movq(rsi, rdx); // c_rarg1
- __ movq(rdx, r8); // c_rarg2
+ __ mov(rax, r9); // r9 is also saved_rdi
+ __ movptr(saved_rdi, rdi);
+ __ movptr(saved_rsi, rsi);
+ __ mov(rdi, rcx); // c_rarg0
+ __ mov(rsi, rdx); // c_rarg1
+ __ mov(rdx, r8); // c_rarg2
if (nargs >= 4)
- __ movq(rcx, rax); // c_rarg3 (via rax)
+ __ mov(rcx, rax); // c_rarg3 (via rax)
#else
assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
"unexpected argument registers");
#endif
}
void restore_arg_regs() {
const Register saved_rdi = r9;
const Register saved_rsi = r10;
#ifdef _WIN64
- __ movq(rdi, saved_rdi);
- __ movq(rsi, saved_rsi);
+ __ movptr(rdi, saved_rdi);
+ __ movptr(rsi, saved_rsi);
#endif
}
// Generate code for an array write pre barrier
//
@@ -1129,34 +1151,41 @@
// count - element count
//
// Destroy no registers!
//
void gen_write_ref_array_pre_barrier(Register addr, Register count) {
-#if 0 // G1 - only
- assert_different_registers(addr, c_rarg1);
- assert_different_registers(count, c_rarg0);
BarrierSet* bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
- __ pushaq(); // push registers
- __ movq(c_rarg0, addr);
- __ movq(c_rarg1, count);
- __ call(RuntimeAddress(BarrierSet::static_write_ref_array_pre));
- __ popaq();
+ __ pusha(); // push registers
+ if (count == c_rarg0) {
+ if (addr == c_rarg1) {
+ // exactly backwards!!
+ __ xchgptr(c_rarg1, c_rarg0);
+ } else {
+ __ movptr(c_rarg1, count);
+ __ movptr(c_rarg0, addr);
+ }
+
+ } else {
+ __ movptr(c_rarg0, addr);
+ __ movptr(c_rarg1, count);
+ }
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
+ __ popa();
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
case BarrierSet::ModRef:
break;
- default :
+ default:
ShouldNotReachHere();
}
-#endif // 0 G1 - only
}
//
// Generate code for an array write post barrier
//
@@ -1169,51 +1198,62 @@
// The ending address is inclusive.
void gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
assert_different_registers(start, end, scratch);
BarrierSet* bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
-#if 0 // G1 - only
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
- __ pushaq(); // push registers (overkill)
+ __ pusha(); // push registers (overkill)
// must compute element count unless barrier set interface is changed (other platforms supply count)
assert_different_registers(start, end, scratch);
- __ leaq(scratch, Address(end, wordSize));
- __ subq(scratch, start);
- __ shrq(scratch, LogBytesPerWord);
- __ movq(c_rarg0, start);
- __ movq(c_rarg1, scratch);
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
- __ popaq();
+ __ lea(scratch, Address(end, wordSize));
+ __ subptr(scratch, start);
+ __ shrptr(scratch, LogBytesPerWord);
+ __ mov(c_rarg0, start);
+ __ mov(c_rarg1, scratch);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
+ __ popa();
}
break;
-#endif // 0 G1 - only
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
CardTableModRefBS* ct = (CardTableModRefBS*)bs;
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
Label L_loop;
- __ shrq(start, CardTableModRefBS::card_shift);
- __ shrq(end, CardTableModRefBS::card_shift);
- __ subq(end, start); // number of bytes to copy
+ __ shrptr(start, CardTableModRefBS::card_shift);
+ __ shrptr(end, CardTableModRefBS::card_shift);
+ __ subptr(end, start); // number of bytes to copy
+
+ intptr_t disp = (intptr_t) ct->byte_map_base;
+ if (__ is_simm32(disp)) {
+ Address cardtable(noreg, noreg, Address::no_scale, disp);
+ __ lea(scratch, cardtable);
+ } else {
+ ExternalAddress cardtable((address)disp);
+ __ lea(scratch, cardtable);
+ }
const Register count = end; // 'end' register contains bytes count now
- __ lea(scratch, ExternalAddress((address)ct->byte_map_base));
- __ addq(start, scratch);
+ __ addptr(start, scratch);
__ BIND(L_loop);
__ movb(Address(start, count, Address::times_1), 0);
- __ decrementq(count);
+ __ decrement(count);
__ jcc(Assembler::greaterEqual, L_loop);
}
+ break;
+ default:
+ ShouldNotReachHere();
+
}
}
+
// Copy big chunks forward
//
// Inputs:
// end_from - source arrays end address
// end_to - destination array end address
@@ -1227,22 +1267,30 @@
Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
DEBUG_ONLY(__ stop("enter at entry label, not here"));
Label L_loop;
__ align(16);
__ BIND(L_loop);
+ if(UseUnalignedLoadStores) {
+ __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
+ __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
+
+ } else {
__ movq(to, Address(end_from, qword_count, Address::times_8, -24));
__ movq(Address(end_to, qword_count, Address::times_8, -24), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, -16));
__ movq(Address(end_to, qword_count, Address::times_8, -16), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
+ }
__ BIND(L_copy_32_bytes);
- __ addq(qword_count, 4);
+ __ addptr(qword_count, 4);
__ jcc(Assembler::lessEqual, L_loop);
- __ subq(qword_count, 4);
+ __ subptr(qword_count, 4);
__ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
}
// Copy big chunks backward
@@ -1260,22 +1308,30 @@
Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
DEBUG_ONLY(__ stop("enter at entry label, not here"));
Label L_loop;
__ align(16);
__ BIND(L_loop);
+ if(UseUnalignedLoadStores) {
+ __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
+ __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
+
+ } else {
__ movq(to, Address(from, qword_count, Address::times_8, 24));
__ movq(Address(dest, qword_count, Address::times_8, 24), to);
__ movq(to, Address(from, qword_count, Address::times_8, 16));
__ movq(Address(dest, qword_count, Address::times_8, 16), to);
__ movq(to, Address(from, qword_count, Address::times_8, 8));
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
__ movq(to, Address(from, qword_count, Address::times_8, 0));
__ movq(Address(dest, qword_count, Address::times_8, 0), to);
+ }
__ BIND(L_copy_32_bytes);
- __ subq(qword_count, 4);
+ __ subptr(qword_count, 4);
__ jcc(Assembler::greaterEqual, L_loop);
- __ addq(qword_count, 4);
+ __ addptr(qword_count, 4);
__ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
}
// Arguments:
@@ -1323,57 +1379,57 @@
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'count' are now valid
- __ movq(byte_count, count);
- __ shrq(count, 3); // count => qword_count
+ __ movptr(byte_count, count);
+ __ shrptr(count, 3); // count => qword_count
// Copy from low to high addresses. Use 'to' as scratch.
- __ leaq(end_from, Address(from, qword_count, Address::times_8, -8));
- __ leaq(end_to, Address(to, qword_count, Address::times_8, -8));
- __ negq(qword_count); // make the count negative
+ __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+ __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
+ __ negptr(qword_count); // make the count negative
__ jmp(L_copy_32_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
- __ incrementq(qword_count);
+ __ increment(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
// Check for and copy trailing dword
__ BIND(L_copy_4_bytes);
- __ testq(byte_count, 4);
+ __ testl(byte_count, 4);
__ jccb(Assembler::zero, L_copy_2_bytes);
__ movl(rax, Address(end_from, 8));
__ movl(Address(end_to, 8), rax);
- __ addq(end_from, 4);
- __ addq(end_to, 4);
+ __ addptr(end_from, 4);
+ __ addptr(end_to, 4);
// Check for and copy trailing word
__ BIND(L_copy_2_bytes);
- __ testq(byte_count, 2);
+ __ testl(byte_count, 2);
__ jccb(Assembler::zero, L_copy_byte);
__ movw(rax, Address(end_from, 8));
__ movw(Address(end_to, 8), rax);
- __ addq(end_from, 2);
- __ addq(end_to, 2);
+ __ addptr(end_from, 2);
+ __ addptr(end_to, 2);
// Check for and copy trailing byte
__ BIND(L_copy_byte);
- __ testq(byte_count, 1);
+ __ testl(byte_count, 1);
__ jccb(Assembler::zero, L_exit);
__ movb(rax, Address(end_from, 8));
__ movb(Address(end_to, 8), rax);
__ BIND(L_exit);
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr);
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
// Copy in 32-bytes chunks
copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
@@ -1419,56 +1475,56 @@
array_overlap_test(disjoint_byte_copy_entry, Address::times_1);
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'count' are now valid
- __ movq(byte_count, count);
- __ shrq(count, 3); // count => qword_count
+ __ movptr(byte_count, count);
+ __ shrptr(count, 3); // count => qword_count
// Copy from high to low addresses.
// Check for and copy trailing byte
- __ testq(byte_count, 1);
+ __ testl(byte_count, 1);
__ jcc(Assembler::zero, L_copy_2_bytes);
__ movb(rax, Address(from, byte_count, Address::times_1, -1));
__ movb(Address(to, byte_count, Address::times_1, -1), rax);
- __ decrementq(byte_count); // Adjust for possible trailing word
+ __ decrement(byte_count); // Adjust for possible trailing word
// Check for and copy trailing word
__ BIND(L_copy_2_bytes);
- __ testq(byte_count, 2);
+ __ testl(byte_count, 2);
__ jcc(Assembler::zero, L_copy_4_bytes);
__ movw(rax, Address(from, byte_count, Address::times_1, -2));
__ movw(Address(to, byte_count, Address::times_1, -2), rax);
// Check for and copy trailing dword
__ BIND(L_copy_4_bytes);
- __ testq(byte_count, 4);
+ __ testl(byte_count, 4);
__ jcc(Assembler::zero, L_copy_32_bytes);
__ movl(rax, Address(from, qword_count, Address::times_8));
__ movl(Address(to, qword_count, Address::times_8), rax);
__ jmp(L_copy_32_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
- __ decrementq(qword_count);
+ __ decrement(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr);
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
// Copy in 32-bytes chunks
copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr);
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
@@ -1517,50 +1573,50 @@
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'count' are now valid
- __ movq(word_count, count);
- __ shrq(count, 2); // count => qword_count
+ __ movptr(word_count, count);
+ __ shrptr(count, 2); // count => qword_count
// Copy from low to high addresses. Use 'to' as scratch.
- __ leaq(end_from, Address(from, qword_count, Address::times_8, -8));
- __ leaq(end_to, Address(to, qword_count, Address::times_8, -8));
- __ negq(qword_count);
+ __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+ __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
+ __ negptr(qword_count);
__ jmp(L_copy_32_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
- __ incrementq(qword_count);
+ __ increment(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
// Original 'dest' is trashed, so we can't use it as a
// base register for a possible trailing word copy
// Check for and copy trailing dword
__ BIND(L_copy_4_bytes);
- __ testq(word_count, 2);
+ __ testl(word_count, 2);
__ jccb(Assembler::zero, L_copy_2_bytes);
__ movl(rax, Address(end_from, 8));
__ movl(Address(end_to, 8), rax);
- __ addq(end_from, 4);
- __ addq(end_to, 4);
+ __ addptr(end_from, 4);
+ __ addptr(end_to, 4);
// Check for and copy trailing word
__ BIND(L_copy_2_bytes);
- __ testq(word_count, 1);
+ __ testl(word_count, 1);
__ jccb(Assembler::zero, L_exit);
__ movw(rax, Address(end_from, 8));
__ movw(Address(end_to, 8), rax);
__ BIND(L_exit);
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr);
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
// Copy in 32-bytes chunks
copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
@@ -1606,57 +1662,58 @@
array_overlap_test(disjoint_short_copy_entry, Address::times_2);
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'count' are now valid
- __ movq(word_count, count);
- __ shrq(count, 2); // count => qword_count
+ __ movptr(word_count, count);
+ __ shrptr(count, 2); // count => qword_count
// Copy from high to low addresses. Use 'to' as scratch.
// Check for and copy trailing word
- __ testq(word_count, 1);
+ __ testl(word_count, 1);
__ jccb(Assembler::zero, L_copy_4_bytes);
__ movw(rax, Address(from, word_count, Address::times_2, -2));
__ movw(Address(to, word_count, Address::times_2, -2), rax);
// Check for and copy trailing dword
__ BIND(L_copy_4_bytes);
- __ testq(word_count, 2);
+ __ testl(word_count, 2);
__ jcc(Assembler::zero, L_copy_32_bytes);
__ movl(rax, Address(from, qword_count, Address::times_8));
__ movl(Address(to, qword_count, Address::times_8), rax);
__ jmp(L_copy_32_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
- __ decrementq(qword_count);
+ __ decrement(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr);
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
// Copy in 32-bytes chunks
copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr);
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
+ // is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
@@ -1666,13 +1723,13 @@
// the hardware handle it. The two dwords within qwords that span
// cache line boundaries will still be loaded and stored atomicly.
//
// Side Effects:
// disjoint_int_copy_entry is set to the no-overlap entry point
- // used by generate_conjoint_int_copy().
+ // used by generate_conjoint_int_oop_copy().
//
- address generate_disjoint_int_copy(bool aligned, const char *name) {
+ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
@@ -1681,51 +1738,66 @@
const Register count = rdx; // elements count
const Register dword_count = rcx;
const Register qword_count = count;
const Register end_from = from; // source array end address
const Register end_to = to; // destination array end address
+ const Register saved_to = r11; // saved destination array address
// End pointers are inclusive, and if count is not zero they point
// to the last unit copied: end_to[0] := end_from[0]
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
- disjoint_int_copy_entry = __ pc();
+ (is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry) = __ pc();
+
+ if (is_oop) {
+ // no registers are destroyed by this call
+ gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
+ }
+
BLOCK_COMMENT("Entry:");
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
+ if (is_oop) {
+ __ movq(saved_to, to);
+ }
+
// 'from', 'to' and 'count' are now valid
- __ movq(dword_count, count);
- __ shrq(count, 1); // count => qword_count
+ __ movptr(dword_count, count);
+ __ shrptr(count, 1); // count => qword_count
// Copy from low to high addresses. Use 'to' as scratch.
- __ leaq(end_from, Address(from, qword_count, Address::times_8, -8));
- __ leaq(end_to, Address(to, qword_count, Address::times_8, -8));
- __ negq(qword_count);
+ __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+ __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
+ __ negptr(qword_count);
__ jmp(L_copy_32_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
- __ incrementq(qword_count);
+ __ increment(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
// Check for and copy trailing dword
__ BIND(L_copy_4_bytes);
- __ testq(dword_count, 1); // Only byte test since the value is 0 or 1
+ __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
__ jccb(Assembler::zero, L_exit);
__ movl(rax, Address(end_from, 8));
__ movl(Address(end_to, 8), rax);
__ BIND(L_exit);
+ if (is_oop) {
+ __ leaq(end_to, Address(saved_to, dword_count, Address::times_4, -4));
+ gen_write_ref_array_post_barrier(saved_to, end_to, rax);
+ }
inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
// Copy 32-bytes chunks
copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
@@ -1735,10 +1807,11 @@
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
+ // is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
@@ -1746,65 +1819,81 @@
//
// If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
// the hardware handle it. The two dwords within qwords that span
// cache line boundaries will still be loaded and stored atomicly.
//
- address generate_conjoint_int_copy(bool aligned, const char *name) {
+ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
- Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes;
+ Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register dword_count = rcx;
const Register qword_count = count;
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
- int_copy_entry = __ pc();
+ if (is_oop) {
+ // no registers are destroyed by this call
+ gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
+ }
+
+ (is_oop ? oop_copy_entry : int_copy_entry) = __ pc();
BLOCK_COMMENT("Entry:");
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
- array_overlap_test(disjoint_int_copy_entry, Address::times_4);
+ array_overlap_test(is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry,
+ Address::times_4);
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
+ assert_clean_int(count, rax); // Make sure 'count' is clean int.
// 'from', 'to' and 'count' are now valid
- __ movq(dword_count, count);
- __ shrq(count, 1); // count => qword_count
+ __ movptr(dword_count, count);
+ __ shrptr(count, 1); // count => qword_count
// Copy from high to low addresses. Use 'to' as scratch.
// Check for and copy trailing dword
- __ testq(dword_count, 1);
+ __ testl(dword_count, 1);
__ jcc(Assembler::zero, L_copy_32_bytes);
__ movl(rax, Address(from, dword_count, Address::times_4, -4));
__ movl(Address(to, dword_count, Address::times_4, -4), rax);
__ jmp(L_copy_32_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
- __ decrementq(qword_count);
+ __ decrement(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
+ if (is_oop) {
+ __ jmp(L_exit);
+ }
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
// Copy in 32-bytes chunks
copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
+ __ bind(L_exit);
+ if (is_oop) {
+ Register end_to = rdx;
+ __ leaq(end_to, Address(to, dword_count, Address::times_4, -4));
+ gen_write_ref_array_post_barrier(to, end_to, rax);
+ }
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
@@ -1857,28 +1946,28 @@
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'qword_count' are now valid
// Copy from low to high addresses. Use 'to' as scratch.
- __ leaq(end_from, Address(from, qword_count, Address::times_8, -8));
- __ leaq(end_to, Address(to, qword_count, Address::times_8, -8));
- __ negq(qword_count);
+ __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+ __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
+ __ negptr(qword_count);
__ jmp(L_copy_32_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
- __ incrementq(qword_count);
+ __ increment(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
if (is_oop) {
__ jmp(L_exit);
} else {
inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
}
// Copy 64-byte chunks
@@ -1890,11 +1979,11 @@
inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
} else {
inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
}
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
@@ -1924,15 +2013,18 @@
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
address disjoint_copy_entry = NULL;
if (is_oop) {
+ assert(!UseCompressedOops, "shouldn't be called for compressed oops");
disjoint_copy_entry = disjoint_oop_copy_entry;
oop_copy_entry = __ pc();
+ array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
} else {
disjoint_copy_entry = disjoint_long_copy_entry;
long_copy_entry = __ pc();
+ array_overlap_test(disjoint_long_copy_entry, Address::times_8);
}
BLOCK_COMMENT("Entry:");
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
array_overlap_test(disjoint_copy_entry, Address::times_8);
@@ -1941,49 +2033,47 @@
// 'from', 'to' and 'qword_count' are now valid
if (is_oop) {
// Save to and count for store barrier
- __ movq(saved_count, qword_count);
+ __ movptr(saved_count, qword_count);
// No registers are destroyed by this call
gen_write_ref_array_pre_barrier(to, saved_count);
}
- // Copy from high to low addresses. Use rcx as scratch.
-
__ jmp(L_copy_32_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
- __ decrementq(qword_count);
+ __ decrement(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
if (is_oop) {
__ jmp(L_exit);
} else {
inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
}
// Copy in 32-bytes chunks
copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
if (is_oop) {
__ BIND(L_exit);
- __ leaq(rcx, Address(to, saved_count, Address::times_8, -8));
+ __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
gen_write_ref_array_post_barrier(to, rcx, rax);
inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
} else {
inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
}
restore_arg_regs();
- __ xorq(rax, rax); // return 0
+ __ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
@@ -2008,16 +2098,16 @@
Klass::secondary_super_cache_offset_in_bytes());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
// if the pointers are equal, we are done (e.g., String[] elements)
- __ cmpq(super_klass, sub_klass);
+ __ cmpptr(super_klass, sub_klass);
__ jcc(Assembler::equal, L_success);
// check the supertype display:
Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
- __ cmpq(super_klass, super_check_addr); // test the super type
+ __ cmpptr(super_klass, super_check_addr); // test the super type
__ jcc(Assembler::equal, L_success);
// if it was a primary super, we can just fail immediately
__ cmpl(super_check_offset, sc_offset);
__ jcc(Assembler::notEqual, L_miss);
@@ -2026,35 +2116,42 @@
// The repne_scan instruction uses fixed registers, which we must spill.
// (We need a couple more temps in any case.)
// This code is rarely used, so simplicity is a virtue here.
inc_counter_np(SharedRuntime::_partial_subtype_ctr);
{
- __ pushq(rax);
- __ pushq(rcx);
- __ pushq(rdi);
+ __ push(rax);
+ __ push(rcx);
+ __ push(rdi);
assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
- __ movq(rdi, secondary_supers_addr);
+ __ movptr(rdi, secondary_supers_addr);
// Load the array length.
__ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
// Skip to start of data.
- __ addq(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+ __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Scan rcx words at [rdi] for occurance of rax
// Set NZ/Z based on last compare
- __ movq(rax, super_klass);
+ __ movptr(rax, super_klass);
+ if (UseCompressedOops) {
+ // Compare against compressed form. Don't need to uncompress because
+ // looks like orig rax is restored in popq below.
+ __ encode_heap_oop(rax);
+ __ repne_scanl();
+ } else {
__ repne_scan();
+ }
// Unspill the temp. registers:
- __ popq(rdi);
- __ popq(rcx);
- __ popq(rax);
+ __ pop(rdi);
+ __ pop(rcx);
+ __ pop(rax);
__ jcc(Assembler::notEqual, L_miss);
}
// Success. Cache the super we found and proceed in triumph.
- __ movq(super_cache_addr, super_klass); // note: rax is dead
+ __ movptr(super_cache_addr, super_klass); // note: rax is dead
__ jmp(L_success);
// Fall through on failure!
__ BIND(L_miss);
}
@@ -2116,11 +2213,11 @@
#ifdef ASSERT
// caller guarantees that the arrays really are different
// otherwise, we would have to make conjoint checks
{ Label L;
- array_overlap_test(L, Address::times_8);
+ array_overlap_test(L, TIMES_OOP);
__ stop("checkcast_copy within a single array");
__ bind(L);
}
#endif //ASSERT
@@ -2130,20 +2227,20 @@
saved_r14_offset,
saved_rbp_offset,
saved_rip_offset,
saved_rarg0_offset
};
- __ subq(rsp, saved_rbp_offset * wordSize);
- __ movq(Address(rsp, saved_r13_offset * wordSize), r13);
- __ movq(Address(rsp, saved_r14_offset * wordSize), r14);
+ __ subptr(rsp, saved_rbp_offset * wordSize);
+ __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
+ __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
// ckoff => rcx, ckval => r8
// r9 and r10 may be used to save non-volatile registers
#ifdef _WIN64
// last argument (#4) is on stack on Win64
const int ckval_offset = saved_rarg0_offset + 4;
- __ movq(ckval, Address(rsp, ckval_offset * wordSize));
+ __ movptr(ckval, Address(rsp, ckval_offset * wordSize));
#endif
// check that int operands are properly extended to size_t
assert_clean_int(length, rax);
assert_clean_int(ckoff, rax);
@@ -2161,75 +2258,74 @@
__ bind(L);
}
#endif //ASSERT
// Loop-invariant addresses. They are exclusive end pointers.
- Address end_from_addr(from, length, Address::times_8, 0);
- Address end_to_addr(to, length, Address::times_8, 0);
+ Address end_from_addr(from, length, TIMES_OOP, 0);
+ Address end_to_addr(to, length, TIMES_OOP, 0);
// Loop-variant addresses. They assume post-incremented count < 0.
- Address from_element_addr(end_from, count, Address::times_8, 0);
- Address to_element_addr(end_to, count, Address::times_8, 0);
- Address oop_klass_addr(rax_oop, oopDesc::klass_offset_in_bytes());
+ Address from_element_addr(end_from, count, TIMES_OOP, 0);
+ Address to_element_addr(end_to, count, TIMES_OOP, 0);
gen_write_ref_array_pre_barrier(to, count);
// Copy from low to high addresses, indexed from the end of each array.
- __ leaq(end_from, end_from_addr);
- __ leaq(end_to, end_to_addr);
- __ movq(r14_length, length); // save a copy of the length
+ __ lea(end_from, end_from_addr);
+ __ lea(end_to, end_to_addr);
+ __ movptr(r14_length, length); // save a copy of the length
assert(length == count, ""); // else fix next line:
- __ negq(count); // negate and test the length
+ __ negptr(count); // negate and test the length
__ jcc(Assembler::notZero, L_load_element);
// Empty array: Nothing to do.
- __ xorq(rax, rax); // return 0 on (trivial) success
+ __ xorptr(rax, rax); // return 0 on (trivial) success
__ jmp(L_done);
// ======== begin loop ========
// (Loop is rotated; its entry is L_load_element.)
// Loop control:
// for (count = -count; count != 0; count++)
// Base pointers src, dst are biased by 8*(count-1),to last element.
__ align(16);
__ BIND(L_store_element);
- __ movq(to_element_addr, rax_oop); // store the oop
- __ incrementq(count); // increment the count toward zero
+ __ store_heap_oop(to_element_addr, rax_oop); // store the oop
+ __ increment(count); // increment the count toward zero
__ jcc(Assembler::zero, L_do_card_marks);
// ======== loop entry is here ========
__ BIND(L_load_element);
- __ movq(rax_oop, from_element_addr); // load the oop
- __ testq(rax_oop, rax_oop);
+ __ load_heap_oop(rax_oop, from_element_addr); // load the oop
+ __ testptr(rax_oop, rax_oop);
__ jcc(Assembler::zero, L_store_element);
- __ movq(r11_klass, oop_klass_addr); // query the object klass
+ __ load_klass(r11_klass, rax_oop);// query the object klass
generate_type_check(r11_klass, ckoff, ckval, L_store_element);
// ======== end loop ========
// It was a real error; we must depend on the caller to finish the job.
// Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
// Emit GC store barriers for the oops we have copied (r14 + rdx),
// and report their number to the caller.
assert_different_registers(rax, r14_length, count, to, end_to, rcx);
- __ leaq(end_to, to_element_addr);
- gen_write_ref_array_post_barrier(to, end_to, rcx);
- __ movq(rax, r14_length); // original oops
- __ addq(rax, count); // K = (original - remaining) oops
- __ notq(rax); // report (-1^K) to caller
+ __ lea(end_to, to_element_addr);
+ gen_write_ref_array_post_barrier(to, end_to, rscratch1);
+ __ movptr(rax, r14_length); // original oops
+ __ addptr(rax, count); // K = (original - remaining) oops
+ __ notptr(rax); // report (-1^K) to caller
__ jmp(L_done);
// Come here on success only.
__ BIND(L_do_card_marks);
- __ addq(end_to, -wordSize); // make an inclusive end pointer
- gen_write_ref_array_post_barrier(to, end_to, rcx);
- __ xorq(rax, rax); // return 0 on success
+ __ addptr(end_to, -wordSize); // make an inclusive end pointer
+ gen_write_ref_array_post_barrier(to, end_to, rscratch1);
+ __ xorptr(rax, rax); // return 0 on success
// Common exit point (success or failure).
__ BIND(L_done);
- __ movq(r13, Address(rsp, saved_r13_offset * wordSize));
- __ movq(r14, Address(rsp, saved_r14_offset * wordSize));
+ __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
+ __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
restore_arg_regs();
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@@ -2268,13 +2364,13 @@
__ enter(); // required for proper stackwalking of RuntimeStub frame
// bump this on entry, not on exit:
inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
- __ movq(bits, from);
- __ orq(bits, to);
- __ orq(bits, size);
+ __ mov(bits, from);
+ __ orptr(bits, to);
+ __ orptr(bits, size);
__ testb(bits, BytesPerLong-1);
__ jccb(Assembler::zero, L_long_aligned);
__ testb(bits, BytesPerInt-1);
@@ -2282,19 +2378,19 @@
__ testb(bits, BytesPerShort-1);
__ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
__ BIND(L_short_aligned);
- __ shrq(size, LogBytesPerShort); // size => short_count
+ __ shrptr(size, LogBytesPerShort); // size => short_count
__ jump(RuntimeAddress(short_copy_entry));
__ BIND(L_int_aligned);
- __ shrq(size, LogBytesPerInt); // size => int_count
+ __ shrptr(size, LogBytesPerInt); // size => int_count
__ jump(RuntimeAddress(int_copy_entry));
__ BIND(L_long_aligned);
- __ shrq(size, LogBytesPerLong); // size => qword_count
+ __ shrptr(size, LogBytesPerLong); // size => qword_count
__ jump(RuntimeAddress(long_copy_entry));
return start;
}
@@ -2398,20 +2494,20 @@
// (7) src_pos + length must not exceed length of src.
// (8) dst_pos + length must not exceed length of dst.
//
// if (src == NULL) return -1;
- __ testq(src, src); // src oop
+ __ testptr(src, src); // src oop
size_t j1off = __ offset();
__ jccb(Assembler::zero, L_failed_0);
// if (src_pos < 0) return -1;
__ testl(src_pos, src_pos); // src_pos (32-bits)
__ jccb(Assembler::negative, L_failed_0);
// if (dst == NULL) return -1;
- __ testq(dst, dst); // dst oop
+ __ testptr(dst, dst); // dst oop
__ jccb(Assembler::zero, L_failed_0);
// if (dst_pos < 0) return -1;
__ testl(dst_pos, dst_pos); // dst_pos (32-bits)
size_t j4off = __ offset();
@@ -2426,29 +2522,29 @@
guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
// registers used as temp
const Register r11_length = r11; // elements count to copy
const Register r10_src_klass = r10; // array klass
+ const Register r9_dst_klass = r9; // dest array klass
// if (length < 0) return -1;
__ movl(r11_length, C_RARG4); // length (elements count, 32-bits value)
__ testl(r11_length, r11_length);
__ jccb(Assembler::negative, L_failed_0);
- Address src_klass_addr(src, oopDesc::klass_offset_in_bytes());
- Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
- __ movq(r10_src_klass, src_klass_addr);
+ __ load_klass(r10_src_klass, src);
#ifdef ASSERT
// assert(src->klass() != NULL);
BLOCK_COMMENT("assert klasses not null");
{ Label L1, L2;
- __ testq(r10_src_klass, r10_src_klass);
+ __ testptr(r10_src_klass, r10_src_klass);
__ jcc(Assembler::notZero, L2); // it is broken if klass is NULL
__ bind(L1);
__ stop("broken null klass");
__ bind(L2);
- __ cmpq(dst_klass_addr, 0);
+ __ load_klass(r9_dst_klass, dst);
+ __ cmpq(r9_dst_klass, 0);
__ jcc(Assembler::equal, L1); // this would be broken also
BLOCK_COMMENT("assert done");
}
#endif
@@ -2471,11 +2567,12 @@
jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
__ cmpl(rax_lh, objArray_lh);
__ jcc(Assembler::equal, L_objArray);
// if (src->klass() != dst->klass()) return -1;
- __ cmpq(r10_src_klass, dst_klass_addr);
+ __ load_klass(r9_dst_klass, dst);
+ __ cmpq(r10_src_klass, r9_dst_klass);
__ jcc(Assembler::notEqual, L_failed);
// if (!src->is_Array()) return -1;
__ cmpl(rax_lh, Klass::_lh_neutral_value);
__ jcc(Assembler::greaterEqual, L_failed);
@@ -2502,13 +2599,13 @@
const Register r10_offset = r10; // array offset
const Register rax_elsize = rax_lh; // element size
__ movl(r10_offset, rax_lh);
__ shrl(r10_offset, Klass::_lh_header_size_shift);
- __ andq(r10_offset, Klass::_lh_header_size_mask); // array_offset
- __ addq(src, r10_offset); // src array offset
- __ addq(dst, r10_offset); // dst array offset
+ __ andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset
+ __ addptr(src, r10_offset); // src array offset
+ __ addptr(dst, r10_offset); // dst array offset
BLOCK_COMMENT("choose copy loop based on element size");
__ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
// next registers should be set before the jump to corresponding stub
const Register from = c_rarg0; // source array address
@@ -2519,29 +2616,29 @@
// since they are the same as 'src', 'src_pos', 'dst'.
__ BIND(L_copy_bytes);
__ cmpl(rax_elsize, 0);
__ jccb(Assembler::notEqual, L_copy_shorts);
- __ leaq(from, Address(src, src_pos, Address::times_1, 0));// src_addr
- __ leaq(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr
- __ movslq(count, r11_length); // length
+ __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
+ __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr
+ __ movl2ptr(count, r11_length); // length
__ jump(RuntimeAddress(byte_copy_entry));
__ BIND(L_copy_shorts);
__ cmpl(rax_elsize, LogBytesPerShort);
__ jccb(Assembler::notEqual, L_copy_ints);
- __ leaq(from, Address(src, src_pos, Address::times_2, 0));// src_addr
- __ leaq(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr
- __ movslq(count, r11_length); // length
+ __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
+ __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr
+ __ movl2ptr(count, r11_length); // length
__ jump(RuntimeAddress(short_copy_entry));
__ BIND(L_copy_ints);
__ cmpl(rax_elsize, LogBytesPerInt);
__ jccb(Assembler::notEqual, L_copy_longs);
- __ leaq(from, Address(src, src_pos, Address::times_4, 0));// src_addr
- __ leaq(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr
- __ movslq(count, r11_length); // length
+ __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
+ __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr
+ __ movl2ptr(count, r11_length); // length
__ jump(RuntimeAddress(int_copy_entry));
__ BIND(L_copy_longs);
#ifdef ASSERT
{ Label L;
@@ -2549,42 +2646,43 @@
__ jcc(Assembler::equal, L);
__ stop("must be long copy, but elsize is wrong");
__ bind(L);
}
#endif
- __ leaq(from, Address(src, src_pos, Address::times_8, 0));// src_addr
- __ leaq(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr
- __ movslq(count, r11_length); // length
+ __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
+ __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr
+ __ movl2ptr(count, r11_length); // length
__ jump(RuntimeAddress(long_copy_entry));
// objArrayKlass
__ BIND(L_objArray);
// live at this point: r10_src_klass, src[_pos], dst[_pos]
Label L_plain_copy, L_checkcast_copy;
// test array classes for subtyping
- __ cmpq(r10_src_klass, dst_klass_addr); // usual case is exact equality
+ __ load_klass(r9_dst_klass, dst);
+ __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
__ jcc(Assembler::notEqual, L_checkcast_copy);
// Identically typed arrays can be copied without element-wise checks.
arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
r10, L_failed);
- __ leaq(from, Address(src, src_pos, Address::times_8,
+ __ lea(from, Address(src, src_pos, TIMES_OOP,
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
- __ leaq(to, Address(dst, dst_pos, Address::times_8,
+ __ lea(to, Address(dst, dst_pos, TIMES_OOP,
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
- __ movslq(count, r11_length); // length
+ __ movl2ptr(count, r11_length); // length
__ BIND(L_plain_copy);
__ jump(RuntimeAddress(oop_copy_entry));
__ BIND(L_checkcast_copy);
// live at this point: r10_src_klass, !r11_length
{
// assert(r11_length == C_RARG4); // will reload from here
Register r11_dst_klass = r11;
- __ movq(r11_dst_klass, dst_klass_addr);
+ __ load_klass(r11_dst_klass, dst);
// Before looking at dst.length, make sure dst is also an objArray.
__ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
__ jcc(Assembler::notEqual, L_failed);
@@ -2594,17 +2692,17 @@
rax, L_failed);
#else
__ movl(r11_length, C_RARG4); // reload
arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
rax, L_failed);
- __ movl(r11_dst_klass, dst_klass_addr); // reload
+ __ load_klass(r11_dst_klass, dst); // reload
#endif
// Marshal the base address arguments now, freeing registers.
- __ leaq(from, Address(src, src_pos, Address::times_8,
+ __ lea(from, Address(src, src_pos, TIMES_OOP,
arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
- __ leaq(to, Address(dst, dst_pos, Address::times_8,
+ __ lea(to, Address(dst, dst_pos, TIMES_OOP,
arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
__ movl(count, C_RARG4); // length (reloaded)
Register sco_temp = c_rarg3; // this register is free now
assert_different_registers(from, to, count, sco_temp,
r11_dst_klass, r10_src_klass);
@@ -2618,23 +2716,23 @@
generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
// Fetch destination element klass from the objArrayKlass header.
int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
objArrayKlass::element_klass_offset_in_bytes());
- __ movq(r11_dst_klass, Address(r11_dst_klass, ek_offset));
+ __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
__ movl(sco_temp, Address(r11_dst_klass, sco_offset));
assert_clean_int(sco_temp, rax);
// the checkcast_copy loop needs two extra arguments:
assert(c_rarg3 == sco_temp, "#3 already in place");
- __ movq(C_RARG4, r11_dst_klass); // dst.klass.element_klass
+ __ movptr(C_RARG4, r11_dst_klass); // dst.klass.element_klass
__ jump(RuntimeAddress(checkcast_copy_entry));
}
__ BIND(L_failed);
- __ xorq(rax, rax);
- __ notq(rax); // return -1
+ __ xorptr(rax, rax);
+ __ notptr(rax); // return -1
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
@@ -2649,18 +2747,24 @@
StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
- StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
- StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy");
+ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
+ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, "jlong_disjoint_arraycopy");
StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, "jlong_arraycopy");
+
+ if (UseCompressedOops) {
+ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
+ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
+ } else {
StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
+ }
StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
@@ -2727,30 +2831,30 @@
// This is an inlined and slightly modified version of call_VM
// which has the ability to fetch the return PC out of
// thread-local storage and also sets up last_Java_sp slightly
// differently than the real call_VM
if (restore_saved_exception_pc) {
- __ movq(rax,
+ __ movptr(rax,
Address(r15_thread,
in_bytes(JavaThread::saved_exception_pc_offset())));
- __ pushq(rax);
+ __ push(rax);
}
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert(is_even(framesize/2), "sp not 16-byte aligned");
// return address and rbp are already in place
- __ subq(rsp, (framesize-4) << LogBytesPerInt); // prolog
+ __ subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog
int frame_complete = __ pc() - start;
// Set up last_Java_sp and last_Java_fp
__ set_last_Java_frame(rsp, rbp, NULL);
// Call runtime
- __ movq(c_rarg0, r15_thread);
+ __ movptr(c_rarg0, r15_thread);
BLOCK_COMMENT("call runtime_entry");
__ call(RuntimeAddress(runtime_entry));
// Generate oop map
OopMap* map = new OopMap(framesize, 0);
@@ -2762,12 +2866,12 @@
__ leave(); // required for proper stackwalking of RuntimeStub frame
// check for pending exceptions
#ifdef ASSERT
Label L;
- __ cmpq(Address(r15_thread, Thread::pending_exception_offset()),
- (int) NULL);
+ __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()),
+ (int32_t) NULL_WORD);
__ jcc(Assembler::notEqual, L);
__ should_not_reach_here();
__ bind(L);
#endif // ASSERT
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
@@ -2786,11 +2890,11 @@
// Initialization
void generate_initial() {
// Generates all stubs and initializes the entry points
// This platform-specific stub is needed by generate_call_stub()
- StubRoutines::amd64::_mxcsr_std = generate_fp_mask("mxcsr_std", 0x0000000000001F80);
+ StubRoutines::x86::_mxcsr_std = generate_fp_mask("mxcsr_std", 0x0000000000001F80);
// entry points that exist in all platforms Note: This is code
// that could be shared among different platforms - however the
// benefit seems to be smaller than the disadvantage of having a
// much more complicated generator structure. See also comment in
@@ -2815,13 +2919,13 @@
StubRoutines::_handler_for_unsafe_access_entry =
generate_handler_for_unsafe_access();
// platform dependent
- StubRoutines::amd64::_get_previous_fp_entry = generate_get_previous_fp();
+ StubRoutines::x86::_get_previous_fp_entry = generate_get_previous_fp();
- StubRoutines::amd64::_verify_mxcsr_entry = generate_verify_mxcsr();
+ StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr();
}
void generate_all() {
// Generates all stubs and initializes the entry points
@@ -2869,19 +2973,19 @@
SharedRuntime::
throw_StackOverflowError),
false);
// entry points that are platform specific
- StubRoutines::amd64::_f2i_fixup = generate_f2i_fixup();
- StubRoutines::amd64::_f2l_fixup = generate_f2l_fixup();
- StubRoutines::amd64::_d2i_fixup = generate_d2i_fixup();
- StubRoutines::amd64::_d2l_fixup = generate_d2l_fixup();
-
- StubRoutines::amd64::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF);
- StubRoutines::amd64::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
- StubRoutines::amd64::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
- StubRoutines::amd64::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
+ StubRoutines::x86::_f2i_fixup = generate_f2i_fixup();
+ StubRoutines::x86::_f2l_fixup = generate_f2l_fixup();
+ StubRoutines::x86::_d2i_fixup = generate_d2i_fixup();
+ StubRoutines::x86::_d2l_fixup = generate_d2l_fixup();
+
+ StubRoutines::x86::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF);
+ StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
+ StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
+ StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
// support for verify_oop (must happen after universe_init)
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
// arraycopy stubs used by compilers