hotspot/src/cpu/x86/vm/x86_32.ad
Print this page
rev 611 : Merge
@@ -1,7 +1,7 @@
//
-// Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+// Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
@@ -234,11 +234,11 @@
//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description
source %{
-#define RELOC_IMM32 Assembler::imm32_operand
+#define RELOC_IMM32 Assembler::imm_operand
#define RELOC_DISP32 Assembler::disp32_operand
#define __ _masm.
// How to find the high register of a Long pair, given the low register
@@ -493,12 +493,12 @@
//=============================================================================
#ifndef PRODUCT
void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
Compile* C = ra_->C;
if( C->in_24_bit_fp_mode() ) {
- tty->print("FLDCW 24 bit fpu control word");
- tty->print_cr(""); tty->print("\t");
+ st->print("FLDCW 24 bit fpu control word");
+ st->print_cr(""); st->print("\t");
}
int framesize = C->frame_slots() << LogBytesPerInt;
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
// Remove two words for return addr and rbp,
@@ -508,26 +508,26 @@
// We require that their callers must bang for them. But be careful, because
// some VM calls (such as call site linkage) can use several kilobytes of
// stack. But the stack safety zone should account for that.
// See bugs 4446381, 4468289, 4497237.
if (C->need_stack_bang(framesize)) {
- tty->print_cr("# stack bang"); tty->print("\t");
+ st->print_cr("# stack bang"); st->print("\t");
}
- tty->print_cr("PUSHL EBP"); tty->print("\t");
+ st->print_cr("PUSHL EBP"); st->print("\t");
if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
- tty->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check");
- tty->print_cr(""); tty->print("\t");
+ st->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check");
+ st->print_cr(""); st->print("\t");
framesize -= wordSize;
}
if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
if (framesize) {
- tty->print("SUB ESP,%d\t# Create frame",framesize);
+ st->print("SUB ESP,%d\t# Create frame",framesize);
}
} else {
- tty->print("SUB ESP,%d\t# Create frame",framesize);
+ st->print("SUB ESP,%d\t# Create frame",framesize);
}
}
#endif
@@ -591,15 +591,15 @@
#ifdef ASSERT
if (VerifyStackAtCalls) {
Label L;
MacroAssembler masm(&cbuf);
- masm.pushl(rax);
- masm.movl(rax, rsp);
- masm.andl(rax, StackAlignmentInBytes-1);
- masm.cmpl(rax, StackAlignmentInBytes-wordSize);
- masm.popl(rax);
+ masm.push(rax);
+ masm.mov(rax, rsp);
+ masm.andptr(rax, StackAlignmentInBytes-1);
+ masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
+ masm.pop(rax);
masm.jcc(Assembler::equal, L);
masm.stop("Stack is not properly aligned!");
masm.bind(L);
}
#endif
@@ -723,32 +723,33 @@
}
assert(r->is_XMMRegister(), "must be");
return rc_xmm;
}
-static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, int opcode, const char *op_str, int size ) {
+static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
+ int opcode, const char *op_str, int size, outputStream* st ) {
if( cbuf ) {
emit_opcode (*cbuf, opcode );
encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
#ifndef PRODUCT
} else if( !do_size ) {
- if( size != 0 ) tty->print("\n\t");
+ if( size != 0 ) st->print("\n\t");
if( opcode == 0x8B || opcode == 0x89 ) { // MOV
- if( is_load ) tty->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
- else tty->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
+ if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
+ else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
} else { // FLD, FST, PUSH, POP
- tty->print("%s [ESP + #%d]",op_str,offset);
+ st->print("%s [ESP + #%d]",op_str,offset);
}
#endif
}
int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
return size+3+offset_size;
}
// Helper for XMM registers. Extra opcode bits, limited syntax.
static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
- int offset, int reg_lo, int reg_hi, int size ) {
+ int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
if( cbuf ) {
if( reg_lo+1 == reg_hi ) { // double move?
if( is_load && !UseXmmLoadAndClearUpper )
emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
else
@@ -762,47 +763,47 @@
else
emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
#ifndef PRODUCT
} else if( !do_size ) {
- if( size != 0 ) tty->print("\n\t");
+ if( size != 0 ) st->print("\n\t");
if( reg_lo+1 == reg_hi ) { // double move?
- if( is_load ) tty->print("%s %s,[ESP + #%d]",
+ if( is_load ) st->print("%s %s,[ESP + #%d]",
UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
Matcher::regName[reg_lo], offset);
- else tty->print("MOVSD [ESP + #%d],%s",
+ else st->print("MOVSD [ESP + #%d],%s",
offset, Matcher::regName[reg_lo]);
} else {
- if( is_load ) tty->print("MOVSS %s,[ESP + #%d]",
+ if( is_load ) st->print("MOVSS %s,[ESP + #%d]",
Matcher::regName[reg_lo], offset);
- else tty->print("MOVSS [ESP + #%d],%s",
+ else st->print("MOVSS [ESP + #%d],%s",
offset, Matcher::regName[reg_lo]);
}
#endif
}
int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
return size+5+offset_size;
}
static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
- int src_hi, int dst_hi, int size ) {
+ int src_hi, int dst_hi, int size, outputStream* st ) {
if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
if( cbuf ) {
if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
emit_opcode(*cbuf, 0x66 );
}
emit_opcode(*cbuf, 0x0F );
emit_opcode(*cbuf, 0x28 );
emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
#ifndef PRODUCT
} else if( !do_size ) {
- if( size != 0 ) tty->print("\n\t");
+ if( size != 0 ) st->print("\n\t");
if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
- tty->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+ st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} else {
- tty->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+ st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
}
#endif
}
return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
} else {
@@ -811,44 +812,45 @@
emit_opcode(*cbuf, 0x0F );
emit_opcode(*cbuf, 0x10 );
emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
#ifndef PRODUCT
} else if( !do_size ) {
- if( size != 0 ) tty->print("\n\t");
+ if( size != 0 ) st->print("\n\t");
if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
- tty->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+ st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} else {
- tty->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+ st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
}
#endif
}
return size+4;
}
}
-static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size ) {
+static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
if( cbuf ) {
emit_opcode(*cbuf, 0x8B );
emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
#ifndef PRODUCT
} else if( !do_size ) {
- if( size != 0 ) tty->print("\n\t");
- tty->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
+ if( size != 0 ) st->print("\n\t");
+ st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
#endif
}
return size+2;
}
-static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, int offset, int size ) {
+static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
+ int offset, int size, outputStream* st ) {
if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
if( cbuf ) {
emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
#ifndef PRODUCT
} else if( !do_size ) {
- if( size != 0 ) tty->print("\n\t");
- tty->print("FLD %s",Matcher::regName[src_lo]);
+ if( size != 0 ) st->print("\n\t");
+ st->print("FLD %s",Matcher::regName[src_lo]);
#endif
}
size += 2;
}
@@ -862,11 +864,11 @@
op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
op = 0xD9;
assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
}
- return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size);
+ return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
}
uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
// Get registers to move
OptoReg::Name src_second = ra_->get_reg_second(in(1));
@@ -890,36 +892,36 @@
// --------------------------------------
// Check for mem-mem move. push/pop to move.
if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
if( src_second == dst_first ) { // overlapping stack copy ranges
assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
- size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size);
- size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size);
+ size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
+ size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
}
// move low bits
- size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size);
- size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size);
+ size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
+ size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
- size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size);
- size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size);
+ size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
+ size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
}
return size;
}
// --------------------------------------
// Check for integer reg-reg copy
if( src_first_rc == rc_int && dst_first_rc == rc_int )
- size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size);
+ size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
// Check for integer store
if( src_first_rc == rc_int && dst_first_rc == rc_stack )
- size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size);
+ size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
// Check for integer load
if( dst_first_rc == rc_int && src_first_rc == rc_stack )
- size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size);
+ size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
// --------------------------------------
// Check for float reg-reg copy
if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
@@ -949,11 +951,11 @@
return size + ((src_first != FPR1L_num) ? 2+2 : 2);
}
// Check for float store
if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
- return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size);
+ return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
}
// Check for float load
if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
int offset = ra_->reg2offset(src_first);
@@ -985,21 +987,21 @@
// Check for xmm reg-reg copy
if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
(src_first+1 == src_second && dst_first+1 == dst_second),
"no non-adjacent float-moves" );
- return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size);
+ return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
}
// Check for xmm store
if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
- return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size);
+ return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
}
// Check for float xmm load
if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
- return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size);
+ return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
}
// Copy from float reg to xmm reg
if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
// copy to the top of stack from floating point reg
@@ -1015,14 +1017,14 @@
st->print("LEA ESP,[ESP-8]");
#endif
}
size += 4;
- size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size);
+ size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
// Copy from the temp memory to the xmm reg.
- size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size);
+ size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
if( cbuf ) {
emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
@@ -1045,19 +1047,19 @@
return size; // Self copy; no move
assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
// Check for second word int-int move
if( src_second_rc == rc_int && dst_second_rc == rc_int )
- return impl_mov_helper(cbuf,do_size,src_second,dst_second,size);
+ return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
// Check for second word integer store
if( src_second_rc == rc_int && dst_second_rc == rc_stack )
- return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size);
+ return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
// Check for second word integer load
if( dst_second_rc == rc_int && src_second_rc == rc_stack )
- return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size);
+ return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
Unimplemented();
}
@@ -1148,11 +1150,12 @@
if (base == NULL) return; // CodeBuffer::expand failed
// static stub relocation stores the instruction address of the call
__ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
// static stub relocation also tags the methodOop in the code-stream.
__ movoop(rbx, (jobject)NULL); // method is zapped till fixup time
- __ jump(RuntimeAddress((address)-1));
+ // This is recognized as unresolved by relocs/nativeInst/ic code
+ __ jump(RuntimeAddress(__ pc()));
__ end_a_stub();
// Update current stubs pointer and restore code_end.
}
// size of call stub, compiled java to interpretor
@@ -1179,11 +1182,11 @@
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler masm(&cbuf);
#ifdef ASSERT
uint code_size = cbuf.code_size();
#endif
- masm.cmpl(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
+ masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
masm.jump_cc(Assembler::notEqual,
RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
/* WARNING these NOPs are critical so that verified entry point is properly
aligned for patching by NativeJump::patch_verified_entry() */
int nops_cnt = 2;
@@ -1315,11 +1318,15 @@
// Is this branch offset short enough that a short branch can be used?
//
// NOTE: If the platform does not provide any short branch variants, then
// this method should return false for offset 0.
-bool Matcher::is_short_branch_offset(int offset) {
+bool Matcher::is_short_branch_offset(int rule, int offset) {
+ // the short version of jmpConUCF2 contains multiple branches,
+ // making the reach slightly less
+ if (rule == jmpConUCF2_rule)
+ return (-126 <= offset && offset <= 125);
return (-128 <= offset && offset <= 127);
}
const bool Matcher::isSimpleConstant64(jlong value) {
// Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
@@ -1685,24 +1692,24 @@
MacroAssembler _masm(&cbuf);
// Compare super with sub directly, since super is not in its own SSA.
// The compiler used to emit this test, but we fold it in here,
// to allow platform-specific tweaking on sparc.
- __ cmpl(Reax, Resi);
+ __ cmpptr(Reax, Resi);
__ jcc(Assembler::equal, hit);
#ifndef PRODUCT
- __ increment(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
+ __ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
#endif //PRODUCT
- __ movl(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
+ __ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
__ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes()));
- __ addl(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+ __ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ repne_scan();
__ jcc(Assembler::notEqual, miss);
- __ movl(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax);
+ __ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax);
__ bind(hit);
if( $primary )
- __ xorl(Redi,Redi);
+ __ xorptr(Redi,Redi);
__ bind(miss);
%}
enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
MacroAssembler masm(&cbuf);
@@ -1747,19 +1754,19 @@
// mode the result needs to be removed from the FPU stack. It's
// likely that this function call could be removed by the
// optimizer if the C function is a pure function.
__ ffree(0);
} else if (rt == T_FLOAT) {
- __ leal(rsp, Address(rsp, -4));
+ __ lea(rsp, Address(rsp, -4));
__ fstp_s(Address(rsp, 0));
__ movflt(xmm0, Address(rsp, 0));
- __ leal(rsp, Address(rsp, 4));
+ __ lea(rsp, Address(rsp, 4));
} else if (rt == T_DOUBLE) {
- __ leal(rsp, Address(rsp, -8));
+ __ lea(rsp, Address(rsp, -8));
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
- __ leal(rsp, Address(rsp, 8));
+ __ lea(rsp, Address(rsp, 8));
}
}
%}
@@ -2886,14 +2893,14 @@
__ jccb(Assembler::parity, nan);
__ jccb(Assembler::equal, done);
__ jccb(Assembler::above, inc);
__ bind(nan);
- __ decrement(as_Register($dst$$reg));
+ __ decrement(as_Register($dst$$reg)); // NO L qqq
__ jmpb(done);
__ bind(inc);
- __ increment(as_Register($dst$$reg));
+ __ increment(as_Register($dst$$reg)); // NO L qqq
__ bind(done);
%}
// Compare the longs and set flags
// BROKEN! Do Not use as-is
@@ -3156,11 +3163,11 @@
%}
enc_class mov_i2x(regXD dst, eRegI src) %{
MacroAssembler _masm(&cbuf);
- __ movd(as_XMMRegister($dst$$reg), as_Register($src$$reg));
+ __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
%}
// Because the transitions from emitted code to the runtime
// monitorenter/exit helper stubs are so slow it's critical that
@@ -3257,30 +3264,30 @@
masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
}
if (EmitSync & 1) {
// set box->dhw = unused_mark (3)
// Force all sync thru slow-path: slow_enter() and slow_exit()
- masm.movl (Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())) ;
- masm.cmpl (rsp, 0) ;
+ masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
+ masm.cmpptr (rsp, (int32_t)0) ;
} else
if (EmitSync & 2) {
Label DONE_LABEL ;
if (UseBiasedLocking) {
// Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
}
- masm.movl (tmpReg, Address(objReg, 0)) ; // fetch markword
- masm.orl (tmpReg, 0x1);
- masm.movl (Address(boxReg, 0), tmpReg); // Anticipate successful CAS
+ masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword
+ masm.orptr (tmpReg, 0x1);
+ masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
if (os::is_MP()) { masm.lock(); }
- masm.cmpxchg(boxReg, Address(objReg, 0)); // Updates tmpReg
+ masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
masm.jcc(Assembler::equal, DONE_LABEL);
// Recursive locking
- masm.subl(tmpReg, rsp);
- masm.andl(tmpReg, 0xFFFFF003 );
- masm.movl(Address(boxReg, 0), tmpReg);
+ masm.subptr(tmpReg, rsp);
+ masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
+ masm.movptr(Address(boxReg, 0), tmpReg);
masm.bind(DONE_LABEL) ;
} else {
// Possible cases that we'll encounter in fast_lock
// ------------------------------------------------
// * Inflated
@@ -3304,33 +3311,33 @@
// TODO: optimize away redundant LDs of obj->mark and improve the markword triage
// order to reduce the number of conditional branches in the most common cases.
// Beware -- there's a subtle invariant that fetch of the markword
// at [FETCH], below, will never observe a biased encoding (*101b).
// If this invariant is not held we risk exclusion (safety) failure.
- if (UseBiasedLocking) {
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
}
- masm.movl (tmpReg, Address(objReg, 0)) ; // [FETCH]
- masm.testl (tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
+ masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
+ masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
masm.jccb (Assembler::notZero, IsInflated) ;
// Attempt stack-locking ...
- masm.orl (tmpReg, 0x1);
- masm.movl (Address(boxReg, 0), tmpReg); // Anticipate successful CAS
+ masm.orptr (tmpReg, 0x1);
+ masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
if (os::is_MP()) { masm.lock(); }
- masm.cmpxchg(boxReg, Address(objReg, 0)); // Updates tmpReg
+ masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
if (_counters != NULL) {
masm.cond_inc32(Assembler::equal,
ExternalAddress((address)_counters->fast_path_entry_count_addr()));
}
masm.jccb (Assembler::equal, DONE_LABEL);
// Recursive locking
- masm.subl(tmpReg, rsp);
- masm.andl(tmpReg, 0xFFFFF003 );
- masm.movl(Address(boxReg, 0), tmpReg);
+ masm.subptr(tmpReg, rsp);
+ masm.andptr(tmpReg, 0xFFFFF003 );
+ masm.movptr(Address(boxReg, 0), tmpReg);
if (_counters != NULL) {
masm.cond_inc32(Assembler::equal,
ExternalAddress((address)_counters->fast_path_entry_count_addr()));
}
masm.jmp (DONE_LABEL) ;
@@ -3359,38 +3366,35 @@
// set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
// This is convenient but results a ST-before-CAS penalty. The following CAS suffers
// additional latency as we have another ST in the store buffer that must drain.
if (EmitSync & 8192) {
- masm.movl (Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
+ masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
masm.get_thread (scrReg) ;
- masm.movl (boxReg, tmpReg); // consider: LEA box, [tmp-2]
- masm.movl (tmpReg, 0); // consider: xor vs mov
+ masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
+ masm.movptr(tmpReg, 0); // consider: xor vs mov
if (os::is_MP()) { masm.lock(); }
- masm.cmpxchg (scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+ masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
} else
if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
- masm.movl (scrReg, boxReg) ;
- masm.movl (boxReg, tmpReg); // consider: LEA box, [tmp-2]
+ masm.movptr(scrReg, boxReg) ;
+ masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
// Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
// prefetchw [eax + Offset(_owner)-2]
- masm.emit_raw (0x0F) ;
- masm.emit_raw (0x0D) ;
- masm.emit_raw (0x48) ;
- masm.emit_raw (ObjectMonitor::owner_offset_in_bytes()-2) ;
+ masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
}
if ((EmitSync & 64) == 0) {
// Optimistic form: consider XORL tmpReg,tmpReg
- masm.movl (tmpReg, 0 ) ;
+ masm.movptr(tmpReg, 0 ) ;
} else {
// Can suffer RTS->RTO upgrades on shared or cold $ lines
// Test-And-CAS instead of CAS
- masm.movl (tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
- masm.testl (tmpReg, tmpReg) ; // Locked ?
+ masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
+ masm.testptr(tmpReg, tmpReg) ; // Locked ?
masm.jccb (Assembler::notZero, DONE_LABEL) ;
}
// Appears unlocked - try to swing _owner from null to non-null.
// Ideally, I'd manifest "Self" with get_thread and then attempt
@@ -3399,53 +3403,50 @@
// rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
// we later store "Self" into m->Owner. Transiently storing a stack address
// (rsp or the address of the box) into m->owner is harmless.
// Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
if (os::is_MP()) { masm.lock(); }
- masm.cmpxchg (scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- masm.movl (Address(scrReg, 0), 3) ; // box->_displaced_header = 3
+ masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+ masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3
masm.jccb (Assembler::notZero, DONE_LABEL) ;
masm.get_thread (scrReg) ; // beware: clobbers ICCs
- masm.movl (Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
- masm.xorl (boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
+ masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
+ masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
// If the CAS fails we can either retry or pass control to the slow-path.
// We use the latter tactic.
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
// If the CAS was successful ...
// Self has acquired the lock
// Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
// Intentional fall-through into DONE_LABEL ...
} else {
- masm.movl (Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
- masm.movl (boxReg, tmpReg) ;
+ masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
+ masm.movptr(boxReg, tmpReg) ;
// Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
// prefetchw [eax + Offset(_owner)-2]
- masm.emit_raw (0x0F) ;
- masm.emit_raw (0x0D) ;
- masm.emit_raw (0x48) ;
- masm.emit_raw (ObjectMonitor::owner_offset_in_bytes()-2) ;
+ masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
}
if ((EmitSync & 64) == 0) {
// Optimistic form
- masm.xorl (tmpReg, tmpReg) ;
+ masm.xorptr (tmpReg, tmpReg) ;
} else {
// Can suffer RTS->RTO upgrades on shared or cold $ lines
- masm.movl (tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
- masm.testl (tmpReg, tmpReg) ; // Locked ?
+ masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
+ masm.testptr(tmpReg, tmpReg) ; // Locked ?
masm.jccb (Assembler::notZero, DONE_LABEL) ;
}
// Appears unlocked - try to swing _owner from null to non-null.
// Use either "Self" (in scr) or rsp as thread identity in _owner.
// Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
masm.get_thread (scrReg) ;
if (os::is_MP()) { masm.lock(); }
- masm.cmpxchg (scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+ masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
// If the CAS fails we can either retry or pass control to the slow-path.
// We use the latter tactic.
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
// If the CAS was successful ...
@@ -3512,38 +3513,38 @@
guarantee (boxReg == as_Register(EAX_enc), "") ;
MacroAssembler masm(&cbuf);
if (EmitSync & 4) {
// Disable - inhibit all inlining. Force control through the slow-path
- masm.cmpl (rsp, 0) ;
+ masm.cmpptr (rsp, 0) ;
} else
if (EmitSync & 8) {
Label DONE_LABEL ;
if (UseBiasedLocking) {
masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
}
// classic stack-locking code ...
- masm.movl (tmpReg, Address(boxReg, 0)) ;
- masm.testl (tmpReg, tmpReg) ;
+ masm.movptr(tmpReg, Address(boxReg, 0)) ;
+ masm.testptr(tmpReg, tmpReg) ;
masm.jcc (Assembler::zero, DONE_LABEL) ;
if (os::is_MP()) { masm.lock(); }
- masm.cmpxchg(tmpReg, Address(objReg, 0)); // Uses EAX which is box
+ masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
masm.bind(DONE_LABEL);
} else {
Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
// Critically, the biased locking test must have precedence over
// and appear before the (box->dhw == 0) recursive stack-lock test.
- if (UseBiasedLocking) {
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
}
- masm.cmpl (Address(boxReg, 0), 0) ; // Examine the displaced header
- masm.movl (tmpReg, Address(objReg, 0)) ; // Examine the object's markword
+ masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header
+ masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock
- masm.testl (tmpReg, 0x02) ; // Inflated?
+ masm.testptr(tmpReg, 0x02) ; // Inflated?
masm.jccb (Assembler::zero, Stacked) ;
masm.bind (Inflated) ;
// It's inflated.
// Despite our balanced locking property we still check that m->_owner == Self
@@ -3570,37 +3571,34 @@
// See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
masm.get_thread (boxReg) ;
if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
// prefetchw [ebx + Offset(_owner)-2]
- masm.emit_raw (0x0F) ;
- masm.emit_raw (0x0D) ;
- masm.emit_raw (0x4B) ;
- masm.emit_raw (ObjectMonitor::owner_offset_in_bytes()-2) ;
+ masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
}
// Note that we could employ various encoding schemes to reduce
// the number of loads below (currently 4) to just 2 or 3.
// Refer to the comments in synchronizer.cpp.
// In practice the chain of fetches doesn't seem to impact performance, however.
if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
// Attempt to reduce branch density - AMD's branch predictor.
- masm.xorl (boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- masm.orl (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
- masm.orl (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
- masm.orl (boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
+ masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+ masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
+ masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
+ masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
masm.jccb (Assembler::notZero, DONE_LABEL) ;
- masm.movl (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ;
+ masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ;
masm.jmpb (DONE_LABEL) ;
} else {
- masm.xorl (boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- masm.orl (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
+ masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+ masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
masm.jccb (Assembler::notZero, DONE_LABEL) ;
- masm.movl (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
- masm.orl (boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
+ masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
+ masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
masm.jccb (Assembler::notZero, CheckSucc) ;
- masm.movl (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ;
+ masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ;
masm.jmpb (DONE_LABEL) ;
}
// The Following code fragment (EmitSync & 65536) improves the performance of
// contended applications and contended synchronization microbenchmarks.
@@ -3614,11 +3612,11 @@
masm.bind (CheckSucc) ;
// Optional pre-test ... it's safe to elide this
if ((EmitSync & 16) == 0) {
- masm.cmpl (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
+ masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
masm.jccb (Assembler::zero, LGoSlowPath) ;
}
// We have a classic Dekker-style idiom:
// ST m->_owner = 0 ; MEMBAR; LD m->_succ
@@ -3644,53 +3642,51 @@
// remains in M-state for the lock:orl.
//
// We currently use (3), although it's likely that switching to (2)
// is correct for the future.
- masm.movl (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ;
+ masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ;
if (os::is_MP()) {
if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
- masm.emit_raw (0x0F) ; // MFENCE ...
- masm.emit_raw (0xAE) ;
- masm.emit_raw (0xF0) ;
+ masm.mfence();
} else {
- masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
+ masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
}
}
// Ratify _succ remains non-null
- masm.cmpl (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
+ masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
masm.jccb (Assembler::notZero, LSuccess) ;
- masm.xorl (boxReg, boxReg) ; // box is really EAX
+ masm.xorptr(boxReg, boxReg) ; // box is really EAX
if (os::is_MP()) { masm.lock(); }
- masm.cmpxchg(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
masm.jccb (Assembler::notEqual, LSuccess) ;
// Since we're low on registers we installed rsp as a placeholding in _owner.
// Now install Self over rsp. This is safe as we're transitioning from
// non-null to non=null
masm.get_thread (boxReg) ;
- masm.movl (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
+ masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
// Intentional fall-through into LGoSlowPath ...
masm.bind (LGoSlowPath) ;
- masm.orl (boxReg, 1) ; // set ICC.ZF=0 to indicate failure
+ masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure
masm.jmpb (DONE_LABEL) ;
masm.bind (LSuccess) ;
- masm.xorl (boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
+ masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
masm.jmpb (DONE_LABEL) ;
}
masm.bind (Stacked) ;
// It's not inflated and it's not recursively stack-locked and it's not biased.
// It must be stack-locked.
// Try to reset the header to displaced header.
// The "box" value on the stack is stable, so we can reload
// and be assured we observe the same value as above.
- masm.movl (tmpReg, Address(boxReg, 0)) ;
+ masm.movptr(tmpReg, Address(boxReg, 0)) ;
if (os::is_MP()) { masm.lock(); }
- masm.cmpxchg(tmpReg, Address(objReg, 0)); // Uses EAX which is box
+ masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
// Intention fall-thru into DONE_LABEL
// DONE_LABEL is a hot target - we'd really like to place it at the
// start of cache line by padding with NOPs.
@@ -3718,34 +3714,34 @@
int value_offset = java_lang_String::value_offset_in_bytes();
int offset_offset = java_lang_String::offset_offset_in_bytes();
int count_offset = java_lang_String::count_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
- masm.movl(rax, Address(rsi, value_offset));
+ masm.movptr(rax, Address(rsi, value_offset));
masm.movl(rcx, Address(rsi, offset_offset));
- masm.leal(rax, Address(rax, rcx, Address::times_2, base_offset));
- masm.movl(rbx, Address(rdi, value_offset));
+ masm.lea(rax, Address(rax, rcx, Address::times_2, base_offset));
+ masm.movptr(rbx, Address(rdi, value_offset));
masm.movl(rcx, Address(rdi, offset_offset));
- masm.leal(rbx, Address(rbx, rcx, Address::times_2, base_offset));
+ masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
// Compute the minimum of the string lengths(rsi) and the
// difference of the string lengths (stack)
if (VM_Version::supports_cmov()) {
masm.movl(rdi, Address(rdi, count_offset));
masm.movl(rsi, Address(rsi, count_offset));
masm.movl(rcx, rdi);
masm.subl(rdi, rsi);
- masm.pushl(rdi);
+ masm.push(rdi);
masm.cmovl(Assembler::lessEqual, rsi, rcx);
} else {
masm.movl(rdi, Address(rdi, count_offset));
masm.movl(rcx, Address(rsi, count_offset));
masm.movl(rsi, rdi);
masm.subl(rdi, rcx);
- masm.pushl(rdi);
+ masm.push(rdi);
masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL);
masm.movl(rsi, rcx);
// rsi holds min, rcx is unused
}
@@ -3759,18 +3755,18 @@
masm.load_unsigned_word(rdi, Address(rax, 0));
// Compare first characters
masm.subl(rcx, rdi);
masm.jcc(Assembler::notZero, POP_LABEL);
- masm.decrement(rsi);
+ masm.decrementl(rsi);
masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
{
// Check after comparing first character to see if strings are equivalent
Label LSkip2;
// Check if the strings start at same location
- masm.cmpl(rbx,rax);
+ masm.cmpptr(rbx,rax);
masm.jcc(Assembler::notEqual, LSkip2);
// Check if the length difference is zero (from stack)
masm.cmpl(Address(rsp, 0), 0x0);
masm.jcc(Assembler::equal, LENGTH_DIFF_LABEL);
@@ -3778,31 +3774,103 @@
// Strings might not be equivalent
masm.bind(LSkip2);
}
// Shift rax, and rbx, to the end of the arrays, negate min
- masm.leal(rax, Address(rax, rsi, Address::times_2, 2));
- masm.leal(rbx, Address(rbx, rsi, Address::times_2, 2));
+ masm.lea(rax, Address(rax, rsi, Address::times_2, 2));
+ masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2));
masm.negl(rsi);
// Compare the rest of the characters
masm.bind(WHILE_HEAD_LABEL);
masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
masm.subl(rcx, rdi);
masm.jcc(Assembler::notZero, POP_LABEL);
- masm.increment(rsi);
+ masm.incrementl(rsi);
masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
// Strings are equal up to min length. Return the length difference.
masm.bind(LENGTH_DIFF_LABEL);
- masm.popl(rcx);
+ masm.pop(rcx);
masm.jmp(DONE_LABEL);
// Discard the stored length difference
masm.bind(POP_LABEL);
- masm.addl(rsp, 4);
+ masm.addptr(rsp, 4);
+
+ // That's it
+ masm.bind(DONE_LABEL);
+ %}
+
+ enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{
+ Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
+ MacroAssembler masm(&cbuf);
+
+ Register ary1Reg = as_Register($ary1$$reg);
+ Register ary2Reg = as_Register($ary2$$reg);
+ Register tmp1Reg = as_Register($tmp1$$reg);
+ Register tmp2Reg = as_Register($tmp2$$reg);
+ Register resultReg = as_Register($result$$reg);
+
+ int length_offset = arrayOopDesc::length_offset_in_bytes();
+ int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
+
+ // Check the input args
+ masm.cmpl(ary1Reg, ary2Reg);
+ masm.jcc(Assembler::equal, TRUE_LABEL);
+ masm.testl(ary1Reg, ary1Reg);
+ masm.jcc(Assembler::zero, FALSE_LABEL);
+ masm.testl(ary2Reg, ary2Reg);
+ masm.jcc(Assembler::zero, FALSE_LABEL);
+
+ // Check the lengths
+ masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
+ masm.movl(resultReg, Address(ary2Reg, length_offset));
+ masm.cmpl(tmp2Reg, resultReg);
+ masm.jcc(Assembler::notEqual, FALSE_LABEL);
+ masm.testl(resultReg, resultReg);
+ masm.jcc(Assembler::zero, TRUE_LABEL);
+
+ // Get the number of 4 byte vectors to compare
+ masm.shrl(resultReg, 1);
+
+ // Check for odd-length arrays
+ masm.andl(tmp2Reg, 1);
+ masm.testl(tmp2Reg, tmp2Reg);
+ masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
+
+ // Compare 2-byte "tail" at end of arrays
+ masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
+ masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
+ masm.cmpl(tmp1Reg, tmp2Reg);
+ masm.jcc(Assembler::notEqual, FALSE_LABEL);
+ masm.testl(resultReg, resultReg);
+ masm.jcc(Assembler::zero, TRUE_LABEL);
+
+ // Setup compare loop
+ masm.bind(COMPARE_LOOP_HDR);
+ // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
+ masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
+ masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
+ masm.negl(resultReg);
+
+ // 4-byte-wide compare loop
+ masm.bind(COMPARE_LOOP);
+ masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
+ masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
+ masm.cmpl(ary1Reg, ary2Reg);
+ masm.jcc(Assembler::notEqual, FALSE_LABEL);
+ masm.increment(resultReg);
+ masm.jcc(Assembler::notZero, COMPARE_LOOP);
+
+ masm.bind(TRUE_LABEL);
+ masm.movl(resultReg, 1); // return true
+ masm.jmp(DONE_LABEL);
+
+ masm.bind(FALSE_LABEL);
+ masm.xorl(resultReg, resultReg); // return false
// That's it
masm.bind(DONE_LABEL);
%}
@@ -4241,11 +4309,12 @@
// masm.membar();
%}
enc_class enc_membar_volatile %{
MacroAssembler masm(&cbuf);
- masm.membar();
+ masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
+ Assembler::StoreStore));
%}
// Atomically load the volatile long
enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
emit_opcode(cbuf,0xDF);
@@ -4536,12 +4605,12 @@
%}
// Location of C & interpreter return values
c_return_value %{
assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
- static int lo[Op_RegL+1] = { 0, 0, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
- static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
+ static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
+ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
// in SSE2+ mode we want to keep the FPU stack clean so pretend
// that C functions return float and double results in XMM0.
if( ideal_reg == Op_RegD && UseSSE>=2 )
return OptoRegPair(XMM0b_num,XMM0a_num);
@@ -4552,12 +4621,12 @@
%}
// Location of return values
return_value %{
assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
- static int lo[Op_RegL+1] = { 0, 0, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
- static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
+ static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
+ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
if( ideal_reg == Op_RegD && UseSSE>=2 )
return OptoRegPair(XMM0b_num,XMM0a_num);
if( ideal_reg == Op_RegF && UseSSE>=1 )
return OptoRegPair(OptoReg::Bad,XMM0a_num);
return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
@@ -4680,10 +4749,37 @@
format %{ %}
interface(CONST_INTER);
%}
+operand immI_1() %{
+ predicate( n->get_int() == 1 );
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_2() %{
+ predicate( n->get_int() == 2 );
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_3() %{
+ predicate( n->get_int() == 3 );
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
// Pointer Immediate
operand immP() %{
match(ConP);
op_cost(10);
@@ -4718,10 +4814,20 @@
format %{ %}
interface(CONST_INTER);
%}
+// Long Immediate zero
+operand immL_M1() %{
+ predicate( n->get_long() == -1L );
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
// Long immediate from 0 to 127.
// Used for a shorter form of long mul by 10.
operand immL_127() %{
predicate((0 <= n->get_long()) && (n->get_long() <= 127));
match(ConL);
@@ -5170,10 +5276,19 @@
format %{ "EFLAGS_U" %}
interface(REG_INTER);
%}
+operand eFlagsRegUCF() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+ predicate(false);
+
+ format %{ "EFLAGS_U_CF" %}
+ interface(REG_INTER);
+%}
+
// Condition Code Register used by long compare
operand flagsReg_long_LTGE() %{
constraint(ALLOC_IN_RC(int_flags));
match(RegFlags);
format %{ "FLAGS_LTGE" %}
@@ -5647,16 +5762,16 @@
operand cmpOp() %{
match(Bool);
format %{ "" %}
interface(COND_INTER) %{
- equal(0x4);
- not_equal(0x5);
- less(0xC);
- greater_equal(0xD);
- less_equal(0xE);
- greater(0xF);
+ equal(0x4, "e");
+ not_equal(0x5, "ne");
+ less(0xC, "l");
+ greater_equal(0xD, "ge");
+ less_equal(0xE, "le");
+ greater(0xF, "g");
%}
%}
// Comparison Code, unsigned compare. Used by FP also, with
// C2 (unordered) turned into GT or LT already. The other bits
@@ -5664,16 +5779,51 @@
operand cmpOpU() %{
match(Bool);
format %{ "" %}
interface(COND_INTER) %{
- equal(0x4);
- not_equal(0x5);
- less(0x2);
- greater_equal(0x3);
- less_equal(0x6);
- greater(0x7);
+ equal(0x4, "e");
+ not_equal(0x5, "ne");
+ less(0x2, "b");
+ greater_equal(0x3, "nb");
+ less_equal(0x6, "be");
+ greater(0x7, "nbe");
+ %}
+%}
+
+// Floating comparisons that don't require any fixup for the unordered case
+operand cmpOpUCF() %{
+ match(Bool);
+ predicate(n->as_Bool()->_test._test == BoolTest::lt ||
+ n->as_Bool()->_test._test == BoolTest::ge ||
+ n->as_Bool()->_test._test == BoolTest::le ||
+ n->as_Bool()->_test._test == BoolTest::gt);
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x4, "e");
+ not_equal(0x5, "ne");
+ less(0x2, "b");
+ greater_equal(0x3, "nb");
+ less_equal(0x6, "be");
+ greater(0x7, "nbe");
+ %}
+%}
+
+
+// Floating comparisons that can be fixed up with extra conditional jumps
+operand cmpOpUCF2() %{
+ match(Bool);
+ predicate(n->as_Bool()->_test._test == BoolTest::ne ||
+ n->as_Bool()->_test._test == BoolTest::eq);
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x4, "e");
+ not_equal(0x5, "ne");
+ less(0x2, "b");
+ greater_equal(0x3, "nb");
+ less_equal(0x6, "be");
+ greater(0x7, "nbe");
%}
%}
// Comparison Code for FP conditional move
operand cmpOp_fcmov() %{
@@ -5694,16 +5844,16 @@
operand cmpOp_commute() %{
match(Bool);
format %{ "" %}
interface(COND_INTER) %{
- equal(0x4);
- not_equal(0x5);
- less(0xF);
- greater_equal(0xE);
- less_equal(0xD);
- greater(0xC);
+ equal(0x4, "e");
+ not_equal(0x5, "ne");
+ less(0xF, "g");
+ greater_equal(0xE, "le");
+ less_equal(0xD, "ge");
+ greater(0xC, "l");
%}
%}
//----------OPERAND CLASSES----------------------------------------------------
// Operand Classes are groups of operands that are used as to simplify
@@ -7255,20 +7405,29 @@
opcode(0x0F,0x40);
ins_encode( enc_cmov(cop), RegReg( dst, src ) );
ins_pipe( pipe_cmov_reg );
%}
-instruct cmovI_regU( eRegI dst, eRegI src, eFlagsRegU cr, cmpOpU cop ) %{
+instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
ins_cost(200);
format %{ "CMOV$cop $dst,$src" %}
opcode(0x0F,0x40);
ins_encode( enc_cmov(cop), RegReg( dst, src ) );
ins_pipe( pipe_cmov_reg );
%}
+instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
+ predicate(VM_Version::supports_cmov() );
+ match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
+ ins_cost(200);
+ expand %{
+ cmovI_regU(cop, cr, dst, src);
+ %}
+%}
+
// Conditional move
instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
ins_cost(250);
@@ -7277,20 +7436,29 @@
ins_encode( enc_cmov(cop), RegMem( dst, src ) );
ins_pipe( pipe_cmov_mem );
%}
// Conditional move
-instruct cmovI_memu(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
+instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
ins_cost(250);
format %{ "CMOV$cop $dst,$src" %}
opcode(0x0F,0x40);
ins_encode( enc_cmov(cop), RegMem( dst, src ) );
ins_pipe( pipe_cmov_mem );
%}
+instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
+ predicate(VM_Version::supports_cmov() );
+ match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
+ ins_cost(250);
+ expand %{
+ cmovI_memU(cop, cr, dst, src);
+ %}
+%}
+
// Conditional move
instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -7314,20 +7482,29 @@
ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
ins_pipe( pipe_cmov_reg );
%}
// Conditional move
-instruct cmovP_regU(eRegP dst, eRegP src, eFlagsRegU cr, cmpOpU cop ) %{
+instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
ins_cost(200);
format %{ "CMOV$cop $dst,$src\t# ptr" %}
opcode(0x0F,0x40);
ins_encode( enc_cmov(cop), RegReg( dst, src ) );
ins_pipe( pipe_cmov_reg );
%}
+instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
+ predicate(VM_Version::supports_cmov() );
+ match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
+ ins_cost(200);
+ expand %{
+ cmovP_regU(cop, cr, dst, src);
+ %}
+%}
+
// DISABLED: Requires the ADLC to emit a bottom_type call that
// correctly meets the two pointer arguments; one is an incoming
// register but the other is a memory operand. ALSO appears to
// be buggy with implicit null checks.
//
@@ -7453,10 +7630,19 @@
__ bind(skip);
%}
ins_pipe( pipe_slow );
%}
+instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
+ predicate (UseSSE>=1);
+ match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
+ ins_cost(200);
+ expand %{
+ fcmovX_regU(cop, cr, dst, src);
+ %}
+%}
+
// unsigned version
instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
predicate (UseSSE>=2);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -7471,10 +7657,19 @@
__ bind(skip);
%}
ins_pipe( pipe_slow );
%}
+instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
+ predicate (UseSSE>=2);
+ match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
+ ins_cost(200);
+ expand %{
+ fcmovXD_regU(cop, cr, dst, src);
+ %}
+%}
+
instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
ins_cost(200);
format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
@@ -7493,10 +7688,19 @@
opcode(0x0F,0x40);
ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
ins_pipe( pipe_cmov_reg_long );
%}
+instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
+ predicate(VM_Version::supports_cmov() );
+ match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
+ ins_cost(200);
+ expand %{
+ cmovL_regU(cop, cr, dst, src);
+ %}
+%}
+
//----------Arithmetic Instructions--------------------------------------------
//----------Addition Instructions----------------------------------------------
// Integer Addition Instructions
instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
match(Set dst (AddI dst src));
@@ -7724,37 +7928,40 @@
format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
ins_pipe( pipe_cmpxchg );
%}
-// Conditional-store of a long value
-// Returns a boolean value (0/1) on success. Implemented with a CMPXCHG8 on Intel.
-// mem_ptr can actually be in either ESI or EDI
-instruct storeLConditional( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
- match(Set res (StoreLConditional mem_ptr (Binary oldval newval)));
- effect(KILL cr);
- // EDX:EAX is killed if there is contention, but then it's also unused.
- // In the common case of no contention, EDX:EAX holds the new oop address.
- format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
- "MOV $res,0\n\t"
- "JNE,s fail\n\t"
- "MOV $res,1\n"
- "fail:" %}
- ins_encode( enc_cmpxchg8(mem_ptr),
- enc_flags_ne_to_boolean(res) );
+// Conditional-store of an int value.
+// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
+instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
+ match(Set cr (StoreIConditional mem (Binary oldval newval)));
+ effect(KILL oldval);
+ format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
+ ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
ins_pipe( pipe_cmpxchg );
%}
-// Conditional-store of a long value
+// Conditional-store of a long value.
// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.
-// mem_ptr can actually be in either ESI or EDI
-instruct storeLConditional_flags( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr, immI0 zero ) %{
- match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero));
- // EDX:EAX is killed if there is contention, but then it's also unused.
- // In the common case of no contention, EDX:EAX holds the new oop address.
- format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
- ins_encode( enc_cmpxchg8(mem_ptr) );
+instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
+ match(Set cr (StoreLConditional mem (Binary oldval newval)));
+ effect(KILL oldval);
+ format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
+ "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
+ "XCHG EBX,ECX"
+ %}
+ ins_encode %{
+ // Note: we need to swap rbx, and rcx before and after the
+ // cmpxchg8 instruction because the instruction uses
+ // rcx as the high order word of the new value to store but
+ // our register encoding uses rbx.
+ __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
+ if( os::is_MP() )
+ __ lock();
+ __ cmpxchg8(Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp));
+ __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
+ %}
ins_pipe( pipe_cmpxchg );
%}
// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
@@ -8217,10 +8424,11 @@
opcode(0xC1, 0x5); /* C1 /5 ib */
ins_encode( RegOpcImm( dst, shift) );
ins_pipe( ialu_reg );
%}
+
// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
// This idiom is used by the compiler for the i2b bytecode.
instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour, eFlagsReg cr) %{
match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
effect(KILL cr);
@@ -8334,10 +8542,22 @@
opcode(0x0B);
ins_encode( OpcP, RegReg( dst, src) );
ins_pipe( ialu_reg_reg );
%}
+instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
+ match(Set dst (OrI dst (CastP2X src)));
+ effect(KILL cr);
+
+ size(2);
+ format %{ "OR $dst,$src" %}
+ opcode(0x0B);
+ ins_encode( OpcP, RegReg( dst, src) );
+ ins_pipe( ialu_reg_reg );
+%}
+
+
// Or Register with Immediate
instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
match(Set dst (OrI dst src));
effect(KILL cr);
@@ -8529,10 +8749,22 @@
opcode(0x33);
ins_encode( OpcP, RegReg( dst, src) );
ins_pipe( ialu_reg_reg );
%}
+// Xor Register with Immediate -1
+instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
+ match(Set dst (XorI dst imm));
+
+ size(2);
+ format %{ "NOT $dst" %}
+ ins_encode %{
+ __ notl($dst$$Register);
+ %}
+ ins_pipe( ialu_reg );
+%}
+
// Xor Register with Immediate
instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
match(Set dst (XorI dst src));
effect(KILL cr);
@@ -8846,10 +9078,22 @@
opcode(0x33,0x33);
ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
ins_pipe( ialu_reg_reg_long );
%}
+// Xor Long Register with Immediate -1
+instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
+ match(Set dst (XorL dst imm));
+ format %{ "NOT $dst.lo\n\t"
+ "NOT $dst.hi" %}
+ ins_encode %{
+ __ notl($dst$$Register);
+ __ notl(HIGH_FROM_LOW($dst$$Register));
+ %}
+ ins_pipe( ialu_reg_long );
+%}
+
// Xor Long Register with Immediate
instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
match(Set dst (XorL dst src));
effect(KILL cr);
format %{ "XOR $dst.lo,$src.lo\n\t"
@@ -8869,10 +9113,67 @@
opcode(0x33,0x33);
ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
ins_pipe( ialu_reg_long_mem );
%}
+// Shift Left Long by 1
+instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
+ predicate(UseNewLongLShift);
+ match(Set dst (LShiftL dst cnt));
+ effect(KILL cr);
+ ins_cost(100);
+ format %{ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi" %}
+ ins_encode %{
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ %}
+ ins_pipe( ialu_reg_long );
+%}
+
+// Shift Left Long by 2
+instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
+ predicate(UseNewLongLShift);
+ match(Set dst (LShiftL dst cnt));
+ effect(KILL cr);
+ ins_cost(100);
+ format %{ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi\n\t"
+ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi" %}
+ ins_encode %{
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ %}
+ ins_pipe( ialu_reg_long );
+%}
+
+// Shift Left Long by 3
+instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
+ predicate(UseNewLongLShift);
+ match(Set dst (LShiftL dst cnt));
+ effect(KILL cr);
+ ins_cost(100);
+ format %{ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi\n\t"
+ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi\n\t"
+ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi" %}
+ ins_encode %{
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ %}
+ ins_pipe( ialu_reg_long );
+%}
+
// Shift Left Long by 1-31
instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
match(Set dst (LShiftL dst cnt));
effect(KILL cr);
ins_cost(200);
@@ -9017,10 +9318,22 @@
OpcP, RegOpc(src2),
cmpF_P6_fixup );
ins_pipe( pipe_slow );
%}
+instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
+ predicate(VM_Version::supports_cmov() && UseSSE <=1);
+ match(Set cr (CmpD src1 src2));
+ ins_cost(150);
+ format %{ "FLD $src1\n\t"
+ "FUCOMIP ST,$src2 // P6 instruction" %}
+ opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
+ ins_encode( Push_Reg_D(src1),
+ OpcP, RegOpc(src2));
+ ins_pipe( pipe_slow );
+%}
+
// Compare & branch
instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
predicate(UseSSE<=1);
match(Set cr (CmpD src1 src2));
effect(KILL rax);
@@ -9081,10 +9394,20 @@
opcode(0x66, 0x0F, 0x2F);
ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
ins_pipe( pipe_slow );
%}
+instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
+ predicate(UseSSE>=2);
+ match(Set cr (CmpD dst src));
+ ins_cost(100);
+ format %{ "COMISD $dst,$src" %}
+ opcode(0x66, 0x0F, 0x2F);
+ ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ ins_pipe( pipe_slow );
+%}
+
// float compare and set condition codes in EFLAGS by XMM regs
instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
predicate(UseSSE>=2);
match(Set cr (CmpD dst (LoadD src)));
effect(KILL rax);
@@ -9097,10 +9420,20 @@
opcode(0x66, 0x0F, 0x2F);
ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
ins_pipe( pipe_slow );
%}
+instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
+ predicate(UseSSE>=2);
+ match(Set cr (CmpD dst (LoadD src)));
+ ins_cost(100);
+ format %{ "COMISD $dst,$src" %}
+ opcode(0x66, 0x0F, 0x2F);
+ ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
+ ins_pipe( pipe_slow );
+%}
+
// Compare into -1,0,1 in XMM
instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
predicate(UseSSE>=2);
match(Set dst (CmpD3 src1 src2));
effect(KILL cr);
@@ -9984,10 +10317,22 @@
OpcP, RegOpc(src2),
cmpF_P6_fixup );
ins_pipe( pipe_slow );
%}
+instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
+ predicate(VM_Version::supports_cmov() && UseSSE == 0);
+ match(Set cr (CmpF src1 src2));
+ ins_cost(100);
+ format %{ "FLD $src1\n\t"
+ "FUCOMIP ST,$src2 // P6 instruction" %}
+ opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
+ ins_encode( Push_Reg_D(src1),
+ OpcP, RegOpc(src2));
+ ins_pipe( pipe_slow );
+%}
+
// Compare & branch
instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
predicate(UseSSE == 0);
match(Set cr (CmpF src1 src2));
@@ -10049,10 +10394,20 @@
opcode(0x0F, 0x2F);
ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
ins_pipe( pipe_slow );
%}
+instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
+ predicate(UseSSE>=1);
+ match(Set cr (CmpF dst src));
+ ins_cost(100);
+ format %{ "COMISS $dst,$src" %}
+ opcode(0x0F, 0x2F);
+ ins_encode(OpcP, OpcS, RegReg(dst, src));
+ ins_pipe( pipe_slow );
+%}
+
// float compare and set condition codes in EFLAGS by XMM regs
instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
predicate(UseSSE>=1);
match(Set cr (CmpF dst (LoadF src)));
effect(KILL rax);
@@ -10065,10 +10420,20 @@
opcode(0x0F, 0x2F);
ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
ins_pipe( pipe_slow );
%}
+instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
+ predicate(UseSSE>=1);
+ match(Set cr (CmpF dst (LoadF src)));
+ ins_cost(100);
+ format %{ "COMISS $dst,$src" %}
+ opcode(0x0F, 0x2F);
+ ins_encode(OpcP, OpcS, RegMem(dst, src));
+ ins_pipe( pipe_slow );
+%}
+
// Compare into -1,0,1 in XMM
instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
predicate(UseSSE>=1);
match(Set dst (CmpF3 src1 src2));
effect(KILL cr);
@@ -10968,11 +11333,11 @@
ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
ins_pipe( fpu_reg_mem );
%}
instruct convI2XD_reg(regXD dst, eRegI src) %{
- predicate( UseSSE>=2 );
+ predicate( UseSSE>=2 && !UseXmmI2D );
match(Set dst (ConvI2D src));
format %{ "CVTSI2SD $dst,$src" %}
opcode(0xF2, 0x0F, 0x2A);
ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
ins_pipe( pipe_slow );
@@ -10985,10 +11350,24 @@
opcode(0xF2, 0x0F, 0x2A);
ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
ins_pipe( pipe_slow );
%}
+instruct convXI2XD_reg(regXD dst, eRegI src)
+%{
+ predicate( UseSSE>=2 && UseXmmI2D );
+ match(Set dst (ConvI2D src));
+
+ format %{ "MOVD $dst,$src\n\t"
+ "CVTDQ2PD $dst,$dst\t# i2d" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow); // XXX
+%}
+
instruct convI2D_mem(regD dst, memory mem) %{
predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2D (LoadI mem)));
format %{ "FILD $mem\n\t"
"FSTP $dst" %}
@@ -11060,19 +11439,33 @@
ins_pipe( fpu_reg_mem );
%}
// Convert an int to a float in xmm; no rounding step needed.
instruct convI2X_reg(regX dst, eRegI src) %{
- predicate(UseSSE>=1);
+ predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
match(Set dst (ConvI2F src));
format %{ "CVTSI2SS $dst, $src" %}
opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */
ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
ins_pipe( pipe_slow );
%}
+ instruct convXI2X_reg(regX dst, eRegI src)
+%{
+ predicate( UseSSE>=2 && UseXmmI2F );
+ match(Set dst (ConvI2F src));
+
+ format %{ "MOVD $dst,$src\n\t"
+ "CVTDQ2PS $dst,$dst\t# i2f" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow); // XXX
+%}
+
instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
match(Set dst (ConvI2L src));
effect(KILL cr);
format %{ "MOV $dst.lo,$src\n\t"
"MOV $dst.hi,$src\n\t"
@@ -11535,10 +11928,21 @@
format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX" %}
ins_encode( enc_String_Compare() );
ins_pipe( pipe_slow );
%}
+// fast array equals
+instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
+ match(Set result (AryEq ary1 ary2));
+ effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
+ //ins_cost(300);
+
+ format %{ "Array Equals $ary1,$ary2 -> $result // KILL EAX, EBX" %}
+ ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
+ ins_pipe( pipe_slow );
+%}
+
//----------Control Flow Instructions------------------------------------------
// Signed compare Instructions
instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
match(Set cr (CmpI op1 op2));
effect( DEF cr, USE op1, USE op2 );
@@ -11877,21 +12281,89 @@
ins_encode( Jcc( cop, labl) );
ins_pipe( pipe_jcc );
ins_pc_relative(1);
%}
+instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
+ match(CountedLoopEnd cop cmp);
+ effect(USE labl);
+
+ ins_cost(200);
+ format %{ "J$cop,u $labl\t# Loop end" %}
+ size(6);
+ opcode(0x0F, 0x80);
+ ins_encode( Jcc( cop, labl) );
+ ins_pipe( pipe_jcc );
+ ins_pc_relative(1);
+%}
+
// Jump Direct Conditional - using unsigned comparison
instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
match(If cop cmp);
effect(USE labl);
ins_cost(300);
format %{ "J$cop,u $labl" %}
size(6);
opcode(0x0F, 0x80);
- ins_encode( Jcc( cop, labl) );
- ins_pipe( pipe_jcc );
+ ins_encode(Jcc(cop, labl));
+ ins_pipe(pipe_jcc);
+ ins_pc_relative(1);
+%}
+
+instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
+ match(If cop cmp);
+ effect(USE labl);
+
+ ins_cost(200);
+ format %{ "J$cop,u $labl" %}
+ size(6);
+ opcode(0x0F, 0x80);
+ ins_encode(Jcc(cop, labl));
+ ins_pipe(pipe_jcc);
+ ins_pc_relative(1);
+%}
+
+instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
+ match(If cop cmp);
+ effect(USE labl);
+
+ ins_cost(200);
+ format %{ $$template
+ if ($cop$$cmpcode == Assembler::notEqual) {
+ $$emit$$"JP,u $labl\n\t"
+ $$emit$$"J$cop,u $labl"
+ } else {
+ $$emit$$"JP,u done\n\t"
+ $$emit$$"J$cop,u $labl\n\t"
+ $$emit$$"done:"
+ }
+ %}
+ size(12);
+ opcode(0x0F, 0x80);
+ ins_encode %{
+ Label* l = $labl$$label;
+ $$$emit8$primary;
+ emit_cc(cbuf, $secondary, Assembler::parity);
+ int parity_disp = -1;
+ bool ok = false;
+ if ($cop$$cmpcode == Assembler::notEqual) {
+ // the two jumps 6 bytes apart so the jump distances are too
+ parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
+ } else if ($cop$$cmpcode == Assembler::equal) {
+ parity_disp = 6;
+ ok = true;
+ } else {
+ ShouldNotReachHere();
+ }
+ emit_d32(cbuf, parity_disp);
+ $$$emit8$primary;
+ emit_cc(cbuf, $secondary, $cop$$cmpcode);
+ int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
+ emit_d32(cbuf, disp);
+ %}
+ ins_pipe(pipe_jcc);
ins_pc_relative(1);
%}
// ============================================================================
// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
@@ -11986,11 +12458,11 @@
instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
match(CountedLoopEnd cop cr);
effect(USE labl);
ins_cost(300);
- format %{ "J$cop,s $labl" %}
+ format %{ "J$cop,s $labl\t# Loop end" %}
size(2);
opcode(0x70);
ins_encode( JccShort( cop, labl) );
ins_pipe( pipe_jcc );
ins_pc_relative(1);
@@ -12001,11 +12473,25 @@
instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
match(CountedLoopEnd cop cmp);
effect(USE labl);
ins_cost(300);
- format %{ "J$cop,us $labl" %}
+ format %{ "J$cop,us $labl\t# Loop end" %}
+ size(2);
+ opcode(0x70);
+ ins_encode( JccShort( cop, labl) );
+ ins_pipe( pipe_jcc );
+ ins_pc_relative(1);
+ ins_short_branch(1);
+%}
+
+instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
+ match(CountedLoopEnd cop cmp);
+ effect(USE labl);
+
+ ins_cost(300);
+ format %{ "J$cop,us $labl\t# Loop end" %}
size(2);
opcode(0x70);
ins_encode( JccShort( cop, labl) );
ins_pipe( pipe_jcc );
ins_pc_relative(1);
@@ -12025,10 +12511,64 @@
ins_pipe( pipe_jcc );
ins_pc_relative(1);
ins_short_branch(1);
%}
+instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
+ match(If cop cmp);
+ effect(USE labl);
+
+ ins_cost(300);
+ format %{ "J$cop,us $labl" %}
+ size(2);
+ opcode(0x70);
+ ins_encode( JccShort( cop, labl) );
+ ins_pipe( pipe_jcc );
+ ins_pc_relative(1);
+ ins_short_branch(1);
+%}
+
+instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
+ match(If cop cmp);
+ effect(USE labl);
+
+ ins_cost(300);
+ format %{ $$template
+ if ($cop$$cmpcode == Assembler::notEqual) {
+ $$emit$$"JP,u,s $labl\n\t"
+ $$emit$$"J$cop,u,s $labl"
+ } else {
+ $$emit$$"JP,u,s done\n\t"
+ $$emit$$"J$cop,u,s $labl\n\t"
+ $$emit$$"done:"
+ }
+ %}
+ size(4);
+ opcode(0x70);
+ ins_encode %{
+ Label* l = $labl$$label;
+ emit_cc(cbuf, $primary, Assembler::parity);
+ int parity_disp = -1;
+ if ($cop$$cmpcode == Assembler::notEqual) {
+ parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
+ } else if ($cop$$cmpcode == Assembler::equal) {
+ parity_disp = 2;
+ } else {
+ ShouldNotReachHere();
+ }
+ emit_d8(cbuf, parity_disp);
+ emit_cc(cbuf, $primary, $cop$$cmpcode);
+ int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
+ emit_d8(cbuf, disp);
+ assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
+ assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
+ %}
+ ins_pipe(pipe_jcc);
+ ins_pc_relative(1);
+ ins_short_branch(1);
+%}
+
// ============================================================================
// Long Compare
//
// Currently we hold longs in 2 registers. Comparing such values efficiently
// is tricky. The flavor of compare used depends on whether we are testing
@@ -12065,22 +12605,22 @@
"JMP,s done\n"
"m_one:\tDEC $dst\n"
"done:" %}
ins_encode %{
Label p_one, m_one, done;
- __ xorl($dst$$Register, $dst$$Register);
+ __ xorptr($dst$$Register, $dst$$Register);
__ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
__ jccb(Assembler::less, m_one);
__ jccb(Assembler::greater, p_one);
__ cmpl($src1$$Register, $src2$$Register);
__ jccb(Assembler::below, m_one);
__ jccb(Assembler::equal, done);
__ bind(p_one);
- __ increment($dst$$Register);
+ __ incrementl($dst$$Register);
__ jmpb(done);
__ bind(m_one);
- __ decrement($dst$$Register);
+ __ decrementl($dst$$Register);
__ bind(done);
%}
ins_pipe( pipe_slow );
%}
@@ -12774,8 +13314,5 @@
%}
//----------SMARTSPILL RULES---------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.
-
-
-