--- old/src/cpu/sparc/vm/sparc.ad Sat Jun 2 20:03:55 2012 +++ new/src/cpu/sparc/vm/sparc.ad Sat Jun 2 20:03:55 2012 @@ -1,5 +1,5 @@ // -// Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -678,18 +678,26 @@ static inline jdouble replicate_immI(int con, int count, int width) { // Load a constant replicated "count" times with width "width" + assert(count*width == 8 && width <= 4, "sanity"); int bit_width = width * 8; - jlong elt_val = con; - elt_val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits - jlong val = elt_val; + jlong val = con; + val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits for (int i = 0; i < count - 1; i++) { - val <<= bit_width; - val |= elt_val; + val |= (val << bit_width); } jdouble dval = *((jdouble*) &val); // coerce to double type return dval; } +static inline jdouble replicate_immF(float con) { + // Replicate float con 2 times and pack into vector. + int val = *((int*)&con); + jlong lval = val; + lval = (lval << 32) | (lval & 0xFFFFFFFFl); + jdouble dval = *((jdouble*) &lval); // coerce to double type + return dval; +} + // Standard Sparc opcode form2 field breakdown static inline void emit2_19(CodeBuffer &cbuf, int f30, int f29, int f25, int f22, int f20, int f19, int f0 ) { f0 &= (1<<19)-1; // Mask displacement to 19 bits @@ -841,10 +849,7 @@ !(n->ideal_Opcode()==Op_PrefetchRead && ld_op==Op_LoadI) && !(n->ideal_Opcode()==Op_PrefetchWrite && ld_op==Op_LoadI) && !(n->ideal_Opcode()==Op_PrefetchAllocation && ld_op==Op_LoadI) && - !(n->ideal_Opcode()==Op_Load2I && ld_op==Op_LoadD) && - !(n->ideal_Opcode()==Op_Load4C && ld_op==Op_LoadD) && - !(n->ideal_Opcode()==Op_Load4S && ld_op==Op_LoadD) && - !(n->ideal_Opcode()==Op_Load8B && ld_op==Op_LoadD) && + !(n->ideal_Opcode()==Op_LoadVector && ld_op==Op_LoadD) && !(n->rule() == loadUB_rule)) { verify_oops_warning(n, n->ideal_Opcode(), ld_op); } @@ -856,9 +861,7 @@ !(n->ideal_Opcode()==Op_StoreI && st_op==Op_StoreF) && !(n->ideal_Opcode()==Op_StoreF && st_op==Op_StoreI) && !(n->ideal_Opcode()==Op_StoreL && st_op==Op_StoreI) && - !(n->ideal_Opcode()==Op_Store2I && st_op==Op_StoreD) && - !(n->ideal_Opcode()==Op_Store4C && st_op==Op_StoreD) && - !(n->ideal_Opcode()==Op_Store8B && st_op==Op_StoreD) && + !(n->ideal_Opcode()==Op_StoreVector && st_op==Op_StoreD) && !(n->ideal_Opcode()==Op_StoreD && st_op==Op_StoreI && n->rule() == storeD0_rule)) { verify_oops_warning(n, n->ideal_Opcode(), st_op); } @@ -1850,16 +1853,45 @@ address last_rethrow = NULL; // debugging aid for Rethrow encoding #endif +// Map Types to machine register types +const int Matcher::base2reg[Type::lastype] = { + Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, + Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ + 0, Op_RegD, 0, 0, /* Vectors */ + Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ + 0, 0/*abio*/, + Op_RegP /* Return address */, 0, /* the memories */ + Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, + 0 /*bottom*/ +}; + // Vector width in bytes -const uint Matcher::vector_width_in_bytes(void) { +const int Matcher::vector_width_in_bytes(BasicType bt) { + assert(MaxVectorSize == 8, ""); return 8; } // Vector ideal reg -const uint Matcher::vector_ideal_reg(void) { +const int Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 8, ""); return Op_RegD; } +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. +} + +// SPARC doesn't support misaligned vectors store/load. +const bool Matcher::misaligned_vectors_ok() { + return false; +} + // USII supports fxtof through the whole range of number, USIII doesn't const bool Matcher::convL2FSupported(void) { return VM_Version::has_fast_fxtof(); @@ -5933,50 +5965,6 @@ ins_pipe(iload_mem); %} -// Load Aligned Packed Byte into a Double Register -instruct loadA8B(regD dst, memory mem) %{ - match(Set dst (Load8B mem)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "LDDF $mem,$dst\t! packed8B" %} - opcode(Assembler::lddf_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(floadD_mem); -%} - -// Load Aligned Packed Char into a Double Register -instruct loadA4C(regD dst, memory mem) %{ - match(Set dst (Load4C mem)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "LDDF $mem,$dst\t! packed4C" %} - opcode(Assembler::lddf_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(floadD_mem); -%} - -// Load Aligned Packed Short into a Double Register -instruct loadA4S(regD dst, memory mem) %{ - match(Set dst (Load4S mem)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "LDDF $mem,$dst\t! packed4S" %} - opcode(Assembler::lddf_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(floadD_mem); -%} - -// Load Aligned Packed Int into a Double Register -instruct loadA2I(regD dst, memory mem) %{ - match(Set dst (Load2I mem)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "LDDF $mem,$dst\t! packed2I" %} - opcode(Assembler::lddf_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(floadD_mem); -%} - // Load Range instruct loadRange(iRegI dst, memory mem) %{ match(Set dst (LoadRange mem)); @@ -6600,17 +6588,6 @@ ins_pipe(fstoreF_mem_zero); %} -// Store Aligned Packed Bytes in Double register to memory -instruct storeA8B(memory mem, regD src) %{ - match(Set mem (Store8B mem src)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STDF $src,$mem\t! packed8B" %} - opcode(Assembler::stdf_op3); - ins_encode(simple_form3_mem_reg( mem, src ) ); - ins_pipe(fstoreD_mem_reg); -%} - // Convert oop pointer into compressed form instruct encodeHeapOop(iRegN dst, iRegP src) %{ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); @@ -6655,62 +6632,6 @@ %} -// Store Zero into Aligned Packed Bytes -instruct storeA8B0(memory mem, immI0 zero) %{ - match(Set mem (Store8B mem zero)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STX $zero,$mem\t! packed8B" %} - opcode(Assembler::stx_op3); - ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); - ins_pipe(fstoreD_mem_zero); -%} - -// Store Aligned Packed Chars/Shorts in Double register to memory -instruct storeA4C(memory mem, regD src) %{ - match(Set mem (Store4C mem src)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STDF $src,$mem\t! packed4C" %} - opcode(Assembler::stdf_op3); - ins_encode(simple_form3_mem_reg( mem, src ) ); - ins_pipe(fstoreD_mem_reg); -%} - -// Store Zero into Aligned Packed Chars/Shorts -instruct storeA4C0(memory mem, immI0 zero) %{ - match(Set mem (Store4C mem (Replicate4C zero))); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STX $zero,$mem\t! packed4C" %} - opcode(Assembler::stx_op3); - ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); - ins_pipe(fstoreD_mem_zero); -%} - -// Store Aligned Packed Ints in Double register to memory -instruct storeA2I(memory mem, regD src) %{ - match(Set mem (Store2I mem src)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STDF $src,$mem\t! packed2I" %} - opcode(Assembler::stdf_op3); - ins_encode(simple_form3_mem_reg( mem, src ) ); - ins_pipe(fstoreD_mem_reg); -%} - -// Store Zero into Aligned Packed Ints -instruct storeA2I0(memory mem, immI0 zero) %{ - match(Set mem (Store2I mem zero)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STX $zero,$mem\t! packed2I" %} - opcode(Assembler::stx_op3); - ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); - ins_pipe(fstoreD_mem_zero); -%} - - //----------MemBar Instructions----------------------------------------------- // Memory barrier flavors @@ -8892,150 +8813,6 @@ ins_pipe(ialu_reg_imm); %} -// Replicate scalar to packed byte values in Double register -instruct Repl8B_reg_helper(iRegL dst, iRegI src) %{ - effect(DEF dst, USE src); - format %{ "SLLX $src,56,$dst\n\t" - "SRLX $dst, 8,O7\n\t" - "OR $dst,O7,$dst\n\t" - "SRLX $dst,16,O7\n\t" - "OR $dst,O7,$dst\n\t" - "SRLX $dst,32,O7\n\t" - "OR $dst,O7,$dst\t! replicate8B" %} - ins_encode( enc_repl8b(src, dst)); - ins_pipe(ialu_reg); -%} - -// Replicate scalar to packed byte values in Double register -instruct Repl8B_reg(stackSlotD dst, iRegI src) %{ - match(Set dst (Replicate8B src)); - expand %{ - iRegL tmp; - Repl8B_reg_helper(tmp, src); - regL_to_stkD(dst, tmp); - %} -%} - -// Replicate scalar constant to packed byte values in Double register -instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{ - match(Set dst (Replicate8B con)); - effect(KILL tmp); - format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %} - ins_encode %{ - // XXX This is a quick fix for 6833573. - //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister); - RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register); - __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); - %} - ins_pipe(loadConFD); -%} - -// Replicate scalar to packed char values into stack slot -instruct Repl4C_reg_helper(iRegL dst, iRegI src) %{ - effect(DEF dst, USE src); - format %{ "SLLX $src,48,$dst\n\t" - "SRLX $dst,16,O7\n\t" - "OR $dst,O7,$dst\n\t" - "SRLX $dst,32,O7\n\t" - "OR $dst,O7,$dst\t! replicate4C" %} - ins_encode( enc_repl4s(src, dst) ); - ins_pipe(ialu_reg); -%} - -// Replicate scalar to packed char values into stack slot -instruct Repl4C_reg(stackSlotD dst, iRegI src) %{ - match(Set dst (Replicate4C src)); - expand %{ - iRegL tmp; - Repl4C_reg_helper(tmp, src); - regL_to_stkD(dst, tmp); - %} -%} - -// Replicate scalar constant to packed char values in Double register -instruct Repl4C_immI(regD dst, immI con, o7RegI tmp) %{ - match(Set dst (Replicate4C con)); - effect(KILL tmp); - format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4C($con)" %} - ins_encode %{ - // XXX This is a quick fix for 6833573. - //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister); - RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register); - __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); - %} - ins_pipe(loadConFD); -%} - -// Replicate scalar to packed short values into stack slot -instruct Repl4S_reg_helper(iRegL dst, iRegI src) %{ - effect(DEF dst, USE src); - format %{ "SLLX $src,48,$dst\n\t" - "SRLX $dst,16,O7\n\t" - "OR $dst,O7,$dst\n\t" - "SRLX $dst,32,O7\n\t" - "OR $dst,O7,$dst\t! replicate4S" %} - ins_encode( enc_repl4s(src, dst) ); - ins_pipe(ialu_reg); -%} - -// Replicate scalar to packed short values into stack slot -instruct Repl4S_reg(stackSlotD dst, iRegI src) %{ - match(Set dst (Replicate4S src)); - expand %{ - iRegL tmp; - Repl4S_reg_helper(tmp, src); - regL_to_stkD(dst, tmp); - %} -%} - -// Replicate scalar constant to packed short values in Double register -instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{ - match(Set dst (Replicate4S con)); - effect(KILL tmp); - format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %} - ins_encode %{ - // XXX This is a quick fix for 6833573. - //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister); - RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register); - __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); - %} - ins_pipe(loadConFD); -%} - -// Replicate scalar to packed int values in Double register -instruct Repl2I_reg_helper(iRegL dst, iRegI src) %{ - effect(DEF dst, USE src); - format %{ "SLLX $src,32,$dst\n\t" - "SRLX $dst,32,O7\n\t" - "OR $dst,O7,$dst\t! replicate2I" %} - ins_encode( enc_repl2i(src, dst)); - ins_pipe(ialu_reg); -%} - -// Replicate scalar to packed int values in Double register -instruct Repl2I_reg(stackSlotD dst, iRegI src) %{ - match(Set dst (Replicate2I src)); - expand %{ - iRegL tmp; - Repl2I_reg_helper(tmp, src); - regL_to_stkD(dst, tmp); - %} -%} - -// Replicate scalar zero constant to packed int values in Double register -instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{ - match(Set dst (Replicate2I con)); - effect(KILL tmp); - format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %} - ins_encode %{ - // XXX This is a quick fix for 6833573. - //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister); - RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register); - __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); - %} - ins_pipe(loadConFD); -%} - //----------Control Flow Instructions------------------------------------------ // Compare Instructions // Compare Integers @@ -10754,6 +10531,267 @@ ins_pipe(istore_mem_reg); %} +// ====================VECTOR INSTRUCTIONS===================================== + +// Load Aligned Packed values into a Double Register +instruct loadV8(regD dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "LDDF $mem,$dst\t! load vector (8 bytes)" %} + opcode(Assembler::lddf_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(floadD_mem); +%} + +// Store Vector in Double register to memory +instruct storeV8(memory mem, regD src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STDF $src,$mem\t! store vector (8 bytes)" %} + opcode(Assembler::stdf_op3); + ins_encode(simple_form3_mem_reg( mem, src ) ); + ins_pipe(fstoreD_mem_reg); +%} + +// Store Zero into vector in memory +instruct storeV8B_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateB zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (8 bytes)" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV4C_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateC zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (4 chars)" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV4S_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateS zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (4 shorts)" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV2I_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateI zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (2 ints)" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV2F_zero(memory mem, immF0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateF zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (2 floats)" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(fstoreD_mem_zero); +%} + +// Replicate scalar to packed byte values in Double register +instruct Repl8B_reg_helper(iRegL dst, iRegI src) %{ + effect(DEF dst, USE src); + format %{ "SLLX $src,56,$dst\n\t" + "SRLX $dst, 8,O7\n\t" + "OR $dst,O7,$dst\n\t" + "SRLX $dst,16,O7\n\t" + "OR $dst,O7,$dst\n\t" + "SRLX $dst,32,O7\n\t" + "OR $dst,O7,$dst\t! replicate8B" %} + ins_encode( enc_repl8b(src, dst)); + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed byte values in Double register +instruct Repl8B_reg(stackSlotD dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + expand %{ + iRegL tmp; + Repl8B_reg_helper(tmp, src); + regL_to_stkD(dst, tmp); + %} +%} + +// Replicate scalar constant to packed byte values in Double register +instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed char values into stack slot +instruct Repl4C_reg_helper(iRegL dst, iRegI src) %{ + effect(DEF dst, USE src); + format %{ "SLLX $src,48,$dst\n\t" + "SRLX $dst,16,O7\n\t" + "OR $dst,O7,$dst\n\t" + "SRLX $dst,32,O7\n\t" + "OR $dst,O7,$dst\t! replicate4C" %} + ins_encode( enc_repl4s(src, dst) ); + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed char values into stack slot +instruct Repl4C_reg(stackSlotD dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateC src)); + expand %{ + iRegL tmp; + Repl4C_reg_helper(tmp, src); + regL_to_stkD(dst, tmp); + %} +%} + +// Replicate scalar constant to packed char values in Double register +instruct Repl4C_immI(regD dst, immI con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateC con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4C($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed short values into stack slot +instruct Repl4S_reg_helper(iRegL dst, iRegI src) %{ + effect(DEF dst, USE src); + format %{ "SLLX $src,48,$dst\n\t" + "SRLX $dst,16,O7\n\t" + "OR $dst,O7,$dst\n\t" + "SRLX $dst,32,O7\n\t" + "OR $dst,O7,$dst\t! replicate4S" %} + ins_encode( enc_repl4s(src, dst) ); + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed short values into stack slot +instruct Repl4S_reg(stackSlotD dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + expand %{ + iRegL tmp; + Repl4S_reg_helper(tmp, src); + regL_to_stkD(dst, tmp); + %} +%} + +// Replicate scalar constant to packed short values in Double register +instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed int values in Double register +instruct Repl2I_reg_helper(iRegL dst, iRegI src) %{ + effect(DEF dst, USE src); + format %{ "SLLX $src,32,$dst\n\t" + "SRLX $dst,32,O7\n\t" + "OR $dst,O7,$dst\t! replicate2I" %} + ins_encode( enc_repl2i(src, dst)); + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed int values in Double register +instruct Repl2I_reg(stackSlotD dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + expand %{ + iRegL tmp; + Repl2I_reg_helper(tmp, src); + regL_to_stkD(dst, tmp); + %} +%} + +// Replicate scalar zero constant to packed int values in Double register +instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed float values in Double register +instruct Repl2F_reg(stackSlotD dst, regF src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + ins_cost(MEMORY_REF_COST*2); + format %{ "STF $src,$dst.hi\t! packed2F\n\t" + "STF $src,$dst.lo" %} + opcode(Assembler::stf_op3); + ins_encode(simple_form3_mem_reg(dst, src), form3_mem_plus_4_reg(dst, src)); + ins_pipe(fstoreF_stk_reg); +%} + +// Replicate scalar zero constant to packed float values in Double register +instruct Repl2F_immF(regD dst, immF con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2F($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immF($con$$constant)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immF($con$$constant)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + //----------PEEPHOLE RULES----------------------------------------------------- // These must follow all instruction definitions as they use the names // defined in the instructions definitions. --- old/src/cpu/sparc/vm/vm_version_sparc.cpp Sat Jun 2 20:03:56 2012 +++ new/src/cpu/sparc/vm/vm_version_sparc.cpp Sat Jun 2 20:03:56 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -217,6 +217,8 @@ // Currently not supported anywhere. FLAG_SET_DEFAULT(UseFPUForSpilling, false); + MaxVectorSize = 8; + assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); #endif --- old/src/cpu/x86/vm/assembler_x86.cpp Sat Jun 2 20:03:57 2012 +++ new/src/cpu/x86/vm/assembler_x86.cpp Sat Jun 2 20:03:57 2012 @@ -1637,6 +1637,13 @@ emit_byte(0xC0 | encode); } +void Assembler::movlhps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE); + emit_byte(0x16); + emit_byte(0xC0 | encode); +} + void Assembler::movb(Register dst, Address src) { NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); InstructionMark im(this); @@ -1686,6 +1693,14 @@ emit_operand(dst, src); } +void Assembler::movdl(Address dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66); + emit_byte(0x7E); + emit_operand(src, dst); +} + void Assembler::movdqa(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); @@ -1716,6 +1731,35 @@ emit_operand(src, dst); } +// Move Unaligned 256bit Vector +void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { + assert(UseAVX, ""); + bool vector256 = true; + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256); + emit_byte(0x6F); + emit_byte(0xC0 | encode); +} + +void Assembler::vmovdqu(XMMRegister dst, Address src) { + assert(UseAVX, ""); + InstructionMark im(this); + bool vector256 = true; + vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256); + emit_byte(0x6F); + emit_operand(dst, src); +} + +void Assembler::vmovdqu(Address dst, XMMRegister src) { + assert(UseAVX, ""); + InstructionMark im(this); + bool vector256 = true; + // swap src<->dst for encoding + assert(src != xnoreg, "sanity"); + vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256); + emit_byte(0x7F); + emit_operand(src, dst); +} + // Uses zero extension on 64bit void Assembler::movl(Register dst, int32_t imm32) { @@ -3112,6 +3156,13 @@ emit_operand(dst, src); } +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256); + emit_byte(0x57); + emit_byte(0xC0 | encode); +} + void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); @@ -3120,6 +3171,30 @@ emit_operand(dst, src); } +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, vector256); + emit_byte(0x57); + emit_byte(0xC0 | encode); +} + +void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + bool vector256 = true; + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); + emit_byte(0x18); + emit_byte(0xC0 | encode); + // 0x00 - insert into lower 128 bits + // 0x01 - insert into upper 128 bits + emit_byte(0x01); +} + +void Assembler::vzeroupper() { + assert(VM_Version::supports_avx(), ""); + (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); + emit_byte(0x77); +} + #ifndef _LP64 // 32bit only pieces of the assembler --- old/src/cpu/x86/vm/assembler_x86.hpp Sat Jun 2 20:03:58 2012 +++ new/src/cpu/x86/vm/assembler_x86.hpp Sat Jun 2 20:03:58 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -591,8 +591,9 @@ void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, VexSimdPrefix pre, bool vector256 = false) { - vex_prefix(src, nds->encoding(), dst->encoding(), - pre, VEX_OPCODE_0F, false, vector256); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256); } int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, @@ -600,9 +601,12 @@ bool vex_w, bool vector256); int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, bool vector256 = false) { - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - pre, VEX_OPCODE_0F, false, vector256); + VexSimdPrefix pre, bool vector256 = false, + VexOpcode opc = VEX_OPCODE_0F) { + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256); } void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, @@ -1261,6 +1265,7 @@ void movdl(XMMRegister dst, Register src); void movdl(Register dst, XMMRegister src); void movdl(XMMRegister dst, Address src); + void movdl(Address dst, XMMRegister src); // Move Double Quadword void movdq(XMMRegister dst, Register src); @@ -1274,6 +1279,14 @@ void movdqu(XMMRegister dst, Address src); void movdqu(XMMRegister dst, XMMRegister src); + // Move Unaligned 256bit Vector + void vmovdqu(Address dst, XMMRegister src); + void vmovdqu(XMMRegister dst, Address src); + void vmovdqu(XMMRegister dst, XMMRegister src); + + // Move lower 64bit to high 64bit in 128bit register + void movlhps(XMMRegister dst, XMMRegister src); + void movl(Register dst, int32_t imm32); void movl(Address dst, int32_t imm32); void movl(Register dst, Register src); @@ -1615,7 +1628,18 @@ void vxorpd(XMMRegister dst, XMMRegister nds, Address src); void vxorps(XMMRegister dst, XMMRegister nds, Address src); + // AVX Vector instrucitons. + void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); + // AVX instruction which is used to clear upper 128 bits of YMM registers and + // to avoid transaction penalty between AVX and SSE states. There is no + // penalty if legacy SSE instructions are encoded using VEX prefix because + // they always clear upper 128 bits. It should be used before calling + // runtime code and native libraries. + void vzeroupper(); + protected: // Next instructions require address alignment 16 bytes SSE mode. // They should be called only from corresponding MacroAssembler instructions. @@ -2527,9 +2551,13 @@ void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + // AVX Vector instructions + + void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); } void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); } void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src); --- old/src/cpu/x86/vm/register_x86.cpp Sat Jun 2 20:03:58 2012 +++ new/src/cpu/x86/vm/register_x86.cpp Sat Jun 2 20:03:58 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,7 +35,7 @@ const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + 2 * FloatRegisterImpl::number_of_registers; const int ConcreteRegisterImpl::max_xmm = ConcreteRegisterImpl::max_fpr + - 2 * XMMRegisterImpl::number_of_registers; + 8 * XMMRegisterImpl::number_of_registers; const char* RegisterImpl::name() const { const char* names[number_of_registers] = { #ifndef AMD64 --- old/src/cpu/x86/vm/register_x86.hpp Sat Jun 2 20:03:59 2012 +++ new/src/cpu/x86/vm/register_x86.hpp Sat Jun 2 20:03:59 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -158,7 +158,7 @@ XMMRegister successor() const { return as_XMMRegister(encoding() + 1); } // accessors - int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int encoding() const { assert(is_valid(), err_msg("invalid register (%d)", (int)(intptr_t)this )); return (intptr_t)this; } bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } const char* name() const; }; @@ -216,7 +216,7 @@ RegisterImpl::number_of_registers + // "H" half of a 64bit register #endif // AMD64 2 * FloatRegisterImpl::number_of_registers + - 2 * XMMRegisterImpl::number_of_registers + + 8 * XMMRegisterImpl::number_of_registers + 1 // eflags }; --- old/src/cpu/x86/vm/vm_version_x86.cpp Sat Jun 2 20:03:59 2012 +++ new/src/cpu/x86/vm/vm_version_x86.cpp Sat Jun 2 20:03:59 2012 @@ -467,6 +467,32 @@ if (!supports_avx ()) // Drop to 0 if no AVX support UseAVX = 0; +#ifdef COMPILER2 + if (UseFPUForSpilling) { + if (UseSSE < 2) { + // Only supported with SSE2+ + FLAG_SET_DEFAULT(UseFPUForSpilling, false); + } + } + if (MaxVectorSize > 0) { + if (!is_power_of_2(MaxVectorSize)) { + warning("MaxVectorSize must be a power of 2"); + FLAG_SET_DEFAULT(MaxVectorSize, 32); + } + if (MaxVectorSize > 32) { + FLAG_SET_DEFAULT(MaxVectorSize, 32); + } + if (MaxVectorSize > 16 && UseAVX == 0) { + // Only supported with AVX+ + FLAG_SET_DEFAULT(MaxVectorSize, 16); + } + if (UseSSE < 2) { + // Only supported with SSE2+ + FLAG_SET_DEFAULT(MaxVectorSize, 0); + } + } +#endif + // On new cpus instructions which update whole XMM register should be used // to prevent partial register stall due to dependencies on high half. // @@ -544,6 +570,12 @@ } } +#ifdef COMPILER2 + if (MaxVectorSize > 16) { + // Limit vectors size to 16 bytes on current AMD cpus. + FLAG_SET_DEFAULT(MaxVectorSize, 16); + } +#endif // COMPILER2 } if( is_intel() ) { // Intel cpus specific settings @@ -606,15 +638,6 @@ FLAG_SET_DEFAULT(UsePopCountInstruction, false); } -#ifdef COMPILER2 - if (UseFPUForSpilling) { - if (UseSSE < 2) { - // Only supported with SSE2+ - FLAG_SET_DEFAULT(UseFPUForSpilling, false); - } - } -#endif - assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); --- old/src/cpu/x86/vm/vmreg_x86.cpp Sat Jun 2 20:04:00 2012 +++ new/src/cpu/x86/vm/vmreg_x86.cpp Sat Jun 2 20:04:00 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -48,8 +48,9 @@ XMMRegister xreg = ::as_XMMRegister(0); for ( ; i < ConcreteRegisterImpl::max_xmm ; ) { - regName[i++] = xreg->name(); - regName[i++] = xreg->name(); + for (int j = 0 ; j < 8 ; j++) { + regName[i++] = xreg->name(); + } xreg = xreg->successor(); } for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { --- old/src/cpu/x86/vm/vmreg_x86.inline.hpp Sat Jun 2 20:04:00 2012 +++ new/src/cpu/x86/vm/vmreg_x86.inline.hpp Sat Jun 2 20:04:00 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,7 @@ } inline VMReg XMMRegisterImpl::as_VMReg() { - return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr); + return VMRegImpl::as_VMReg((encoding() << 3) + ConcreteRegisterImpl::max_fpr); } @@ -75,7 +75,7 @@ inline XMMRegister VMRegImpl::as_XMMRegister() { assert( is_XMMRegister() && is_even(value()), "must be" ); // Yuk - return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 1); + return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 3); } inline bool VMRegImpl::is_concrete() { --- old/src/cpu/x86/vm/x86.ad Sat Jun 2 20:04:01 2012 +++ new/src/cpu/x86/vm/x86.ad Sat Jun 2 20:04:01 2012 @@ -24,6 +24,456 @@ // X86 Common Architecture Description File +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. + +register %{ +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name ( register save type, C convention save type, +// ideal register type, encoding ); +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call +// sites. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// +// The encoding number is the actual bit-pattern placed into the opcodes. + +// XMM registers. 256-bit registers or 8 words each, labeled (a)-h. +// Word a in each register holds a Float, words ab hold a Double. +// The whole registers are used in SSE4.2 version intrinsics, +// array copy stubs and superword operations (see UseSSE42Intrinsics, +// UseXMMForArrayCopy and UseSuperword flags). +// XMM8-XMM15 must be encoded with REX (VEX for UseAVX). +// Linux ABI: No register preserved across function calls +// XMM0-XMM7 might hold parameters +// Windows ABI: XMM6-XMM15 preserved across function calls +// XMM0-XMM3 might hold parameters + +reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); +reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); +reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()); +reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()); +reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()); +reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); +reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); +reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()); +reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()); +reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()); +reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); +reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); +reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()); +reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()); +reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()); +reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); +reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); +reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()); +reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()); +reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()); +reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); +reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); +reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()); +reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()); +reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()); +reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); +reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); +reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()); +reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()); +reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()); +reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +#ifdef _WIN64 + +reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); +reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()); +reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); +reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); +reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); +reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); +reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()); +reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); +reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); +reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); +reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); +reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()); +reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); +reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); +reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); +reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); +reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()); +reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); +reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); +reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); +reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); +reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()); +reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); +reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); +reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); +reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); +reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()); +reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); +reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); +reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); +reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); +reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()); +reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); +reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); +reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); +reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); +reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()); +reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); +reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); +reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); +reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); +reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()); +reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); +reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); +reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); +reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); +reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()); +reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); +reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); +reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); +reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +#else // _WIN64 + +reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); +reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); +reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); +reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); +reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); +reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); +reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); +reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); +reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); +reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); +reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +#ifdef _LP64 + +reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); +reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()); +reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); +reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); +reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); +reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); +reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()); +reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); +reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); +reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); +reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); +reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()); +reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); +reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); +reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); +reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); +reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()); +reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); +reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); +reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); +reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); +reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()); +reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); +reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); +reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); +reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); +reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()); +reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); +reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); +reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); +reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); +reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()); +reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); +reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); +reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); +reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); +reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()); +reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); +reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); +reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); +reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +#endif // _LP64 + +#endif // _WIN64 + +#ifdef _LP64 +reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); +#else +reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); +#endif // _LP64 + +alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, + XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, + XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, + XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, + XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, + XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, + XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h +#ifdef _LP64 + ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, + XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, + XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, + XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, + XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, + XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, + XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h +#endif + ); + +// flags allocation class should be last. +alloc_class chunk2(RFLAGS); + +// Singleton class for condition codes +reg_class int_flags(RFLAGS); + +// Class for all float registers +reg_class float_reg(XMM0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7 +#ifdef _LP64 + ,XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15 +#endif + ); + +// Class for all double registers +reg_class double_reg(XMM0, XMM0b, + XMM1, XMM1b, + XMM2, XMM2b, + XMM3, XMM3b, + XMM4, XMM4b, + XMM5, XMM5b, + XMM6, XMM6b, + XMM7, XMM7b +#ifdef _LP64 + ,XMM8, XMM8b, + XMM9, XMM9b, + XMM10, XMM10b, + XMM11, XMM11b, + XMM12, XMM12b, + XMM13, XMM13b, + XMM14, XMM14b, + XMM15, XMM15b +#endif + ); + +// Class for all 32bit vector registers +reg_class vectors_reg(XMM0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7 +#ifdef _LP64 + ,XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15 +#endif + ); + +// Class for all 64bit vector registers +reg_class vectord_reg(XMM0, XMM0b, + XMM1, XMM1b, + XMM2, XMM2b, + XMM3, XMM3b, + XMM4, XMM4b, + XMM5, XMM5b, + XMM6, XMM6b, + XMM7, XMM7b +#ifdef _LP64 + ,XMM8, XMM8b, + XMM9, XMM9b, + XMM10, XMM10b, + XMM11, XMM11b, + XMM12, XMM12b, + XMM13, XMM13b, + XMM14, XMM14b, + XMM15, XMM15b +#endif + ); + +// Class for all 128bit vector registers +reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, + XMM1, XMM1b, XMM1c, XMM1d, + XMM2, XMM2b, XMM2c, XMM2d, + XMM3, XMM3b, XMM3c, XMM3d, + XMM4, XMM4b, XMM4c, XMM4d, + XMM5, XMM5b, XMM5c, XMM5d, + XMM6, XMM6b, XMM6c, XMM6d, + XMM7, XMM7b, XMM7c, XMM7d +#ifdef _LP64 + ,XMM8, XMM8b, XMM8c, XMM8d, + XMM9, XMM9b, XMM9c, XMM9d, + XMM10, XMM10b, XMM10c, XMM10d, + XMM11, XMM11b, XMM11c, XMM11d, + XMM12, XMM12b, XMM12c, XMM12d, + XMM13, XMM13b, XMM13c, XMM13d, + XMM14, XMM14b, XMM14c, XMM14d, + XMM15, XMM15b, XMM15c, XMM15d +#endif + ); + +// Class for all 256bit vector registers +reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, + XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, + XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, + XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, + XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, + XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, + XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h +#ifdef _LP64 + ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, + XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, + XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, + XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, + XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, + XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, + XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h +#endif + ); + +%} + source %{ // Float masks come from different places depending on platform. #ifdef _LP64 @@ -38,7 +488,253 @@ static address double_signflip() { return (address)double_signflip_pool; } #endif +// Map Types to machine register types +const int Matcher::base2reg[Type::lastype] = { + Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, + Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ + Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */ + Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ + 0, 0/*abio*/, + Op_RegP /* Return address */, 0, /* the memories */ + Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, + 0 /*bottom*/ +}; + +// Max vector size in bytes. 0 if not supported. +const int Matcher::vector_width_in_bytes(BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + if (UseSSE < 2) return 0; + // SSE2 supports 128bit vectors for all types. + // AVX2 supports 256bit vectors for all types. + int size = (UseAVX > 1) ? 32 : 16; + // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. + if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) + size = 32; + // Use flag to limit vector size. + size = MIN2(size,(int)MaxVectorSize); + // Minimum 2 values in vector (or 4 for bytes). + switch (bt) { + case T_DOUBLE: + case T_LONG: + if (size < 16) return 0; + case T_FLOAT: + case T_INT: + if (size < 8) return 0; + case T_BOOLEAN: + case T_BYTE: + case T_CHAR: + case T_SHORT: + if (size < 4) return 0; + break; + default: + ShouldNotReachHere(); + } + return size; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} +const int Matcher::min_vector_size(const BasicType bt) { + int max_size = max_vector_size(bt); + // Min size which can be loaded into vector is 4 bytes. + int size = (type2aelembytes(bt) == 1) ? 4 : 2; + return MIN2(size,max_size); +} + +// Vector ideal reg corresponding to specidied size in bytes +const int Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize >= size, ""); + switch(size) { + case 4: return Op_VecS; + case 8: return Op_VecD; + case 16: return Op_VecX; + case 32: return Op_VecY; + } + ShouldNotReachHere(); + return 0; +} + +// x86 supports misaligned vectors store/load. +const bool Matcher::misaligned_vectors_ok() { + return !AlignVector; // can be changed by flag +} + +// Helper methods for MachSpillCopyNode::implementation(). +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st) { + // In 64-bit VM size calculation is very complex. Emitting instructions + // into scratch buffer is used to get size in 64-bit VM. + LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) + assert(ireg == Op_VecS || // 32bit vector + (src_lo & 1) == 0 && (src_lo + 1) == src_hi && + (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, + "no non-adjacent vector moves" ); + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + switch (ireg) { + case Op_VecS: // copy whole register + case Op_VecD: + case Op_VecX: + __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); + break; + case Op_VecY: + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); + break; + default: + ShouldNotReachHere(); + } + int size = __ offset() - offset; +#ifdef ASSERT + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. + assert(!do_size || size == 4, "incorrect size calculattion"); +#endif + return size; #ifndef PRODUCT + } else if (!do_size) { + switch (ireg) { + case Op_VecS: + case Op_VecD: + case Op_VecX: + st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); + break; + case Op_VecY: + st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); + break; + default: + ShouldNotReachHere(); + } +#endif + } + // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. + return 4; +} + +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st) { + // In 64-bit VM size calculation is very complex. Emitting instructions + // into scratch buffer is used to get size in 64-bit VM. + LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + if (is_load) { + switch (ireg) { + case Op_VecS: + __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + case Op_VecD: + __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + case Op_VecX: + __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + case Op_VecY: + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecS: + __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + case Op_VecD: + __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + case Op_VecX: + __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + case Op_VecY: + __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + default: + ShouldNotReachHere(); + } + } + int size = __ offset() - offset; +#ifdef ASSERT + int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. + assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); +#endif + return size; +#ifndef PRODUCT + } else if (!do_size) { + if (is_load) { + switch (ireg) { + case Op_VecS: + st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecD: + st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecX: + st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecY: + st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecS: + st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + case Op_VecD: + st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + case Op_VecX: + st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + case Op_VecY: + st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + default: + ShouldNotReachHere(); + } + } +#endif + } + int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. + return 5+offset_size; +} + +static inline jfloat replicate4_imm(int con, int width) { + // Load a constant of "width" (in bytes) and replicate it to fill 32bit. + assert(width == 1 || width == 2, "only byte or short types here"); + int bit_width = width * 8; + jint val = con; + val &= (1 << bit_width) - 1; // mask off sign bits + while(bit_width < 32) { + val |= (val << bit_width); + bit_width <<= 1; + } + jfloat fval = *((jfloat*) &val); // coerce to float type + return fval; +} + +static inline jdouble replicate8_imm(int con, int width) { + // Load a constant of "width" (in bytes) and replicate it to fill 64bit. + assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); + int bit_width = width * 8; + jlong val = con; + val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits + while(bit_width < 64) { + val |= (val << bit_width); + bit_width <<= 1; + } + jdouble dval = *((jdouble*) &val); // coerce to double type + return dval; +} + +#ifndef PRODUCT void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { st->print("nop \t# %d bytes pad for loops and calls", _count); } @@ -103,6 +799,46 @@ %} + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +// Vectors +operand vecS() %{ + constraint(ALLOC_IN_RC(vectors_reg)); + match(VecS); + + format %{ %} + interface(REG_INTER); +%} + +operand vecD() %{ + constraint(ALLOC_IN_RC(vectord_reg)); + match(VecD); + + format %{ %} + interface(REG_INTER); +%} + +operand vecX() %{ + constraint(ALLOC_IN_RC(vectorx_reg)); + match(VecX); + + format %{ %} + interface(REG_INTER); +%} + +operand vecY() %{ + constraint(ALLOC_IN_RC(vectory_reg)); + match(VecY); + + format %{ %} + interface(REG_INTER); +%} + + // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) // ============================================================================ @@ -852,3 +1588,942 @@ ins_pipe(pipe_slow); %} + +// ====================VECTOR INSTRUCTIONS===================================== + +// Load vectors (4 bytes long) +instruct loadV4(vecS dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 4); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +// Load vectors (8 bytes long) +instruct loadV8(vecD dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +// Load vectors (16 bytes long) +instruct loadV16(vecX dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} + ins_encode %{ + __ movdqu($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +// Load vectors (32 bytes long) +instruct loadV32(vecY dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 32); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} + ins_encode %{ + __ vmovdqu($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +// Store vectors +instruct storeV4(memory mem, vecS src) %{ + predicate(n->as_StoreVector()->memory_size() == 4); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "movd $mem,$src\t! store vector (4 bytes)" %} + ins_encode %{ + __ movdl($mem$$Address, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct storeV8(memory mem, vecD src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "movq $mem,$src\t! store vector (8 bytes)" %} + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct storeV16(memory mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} + ins_encode %{ + __ movdqu($mem$$Address, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct storeV32(memory mem, vecY src) %{ + predicate(n->as_StoreVector()->memory_size() == 32); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} + ins_encode %{ + __ vmovdqu($mem$$Address, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate byte scalar to be vector +instruct Repl4B(vecS dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\t! replicate4B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8B(vecD dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\t! replicate8B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\t! replicate16B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate32B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate byte scalar immediate to be vector by loading from const table. +instruct Repl4B_imm(vecS dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateB con)); + format %{ "movss $dst,[$constantaddress]\t! replicate4B($con)" %} + ins_encode %{ + __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8B_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate8B($con)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate16B($con)\n\t" + "movlhps $dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB con)); + format %{ "movsd $dst,[$constantaddress]\t! lreplicate32B($con)\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate byte scalar zero to be vector +instruct Repl4B_zero(vecS dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateB zero)); + format %{ "pxor $dst,$dst\t! replicate4B zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8B_zero(vecD dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB zero)); + format %{ "pxor $dst,$dst\t! replicate8B zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl16B_zero(vecX dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB zero)); + format %{ "pxor $dst,$dst\t! replicate16B zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl32B_zero(vecY dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB zero)); + format %{ "vxorpd $dst,$dst,$dst\t! replicate32B zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate short (2 byte) scalar to be vector +instruct Repl2S(vecS dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\t! replicate2S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4S(vecD dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\t! replicate4S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8S(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\t! replicate8S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate16S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate short (2 byte) scalar immediate to be vector by loading from const table. +instruct Repl2S_imm(vecS dst, immI con) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateS con)); + format %{ "movss $dst,[$constantaddress]\t! replicate2S($con)" %} + ins_encode %{ + __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4S_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate4S($con)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8S_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate8S($con)\n\t" + "movlhps $dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate16S($con)\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate short (2 byte) scalar zero to be vector +instruct Repl2S_zero(vecS dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateS zero)); + format %{ "pxor $dst,$dst\t! replicate2S zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4S_zero(vecD dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS zero)); + format %{ "pxor $dst,$dst\t! replicate4S zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8S_zero(vecX dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS zero)); + format %{ "pxor $dst,$dst\t! replicate8S zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl16S_zero(vecY dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS zero)); + format %{ "vxorpd $dst,$dst,$dst\t! replicate16S zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate char (2 byte) scalar to be vector +instruct Repl2C(vecS dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateC src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\t! replicate2C" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4C(vecD dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateC src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\t! replicate4C" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8C(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateC src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\t! replicate8C" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16C(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateC src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate16C" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate char (2 byte) scalar immediate to be vector by loading from const table. +instruct Repl2C_imm(vecS dst, immI con) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateC con)); + format %{ "movss $dst,[$constantaddress]\t! replicate2C($con)" %} + ins_encode %{ + __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4C_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateC con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate4C($con)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8C_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateC con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate8C($con)\n\t" + "movlhps $dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16C_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateC con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate16C($con)\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate char (2 byte) scalar zero to be vector +instruct Repl2C_zero(vecS dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateC zero)); + format %{ "pxor $dst,$dst\t! replicate2C zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4C_zero(vecD dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateC zero)); + format %{ "pxor $dst,$dst\t! replicate4C zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8C_zero(vecX dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateC zero)); + format %{ "pxor $dst,$dst\t! replicate8C zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl16C_zero(vecY dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateC zero)); + format %{ "vxorpd $dst,$dst,$dst\t! replicate16C zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate integer (4 byte) scalar to be vector +instruct Repl2I(vecD dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + format %{ "movd $dst,$src\n\t" + "pshufd $dst,$dst,0x00\t! replicate2I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4I(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + format %{ "movd $dst,$src\n\t" + "pshufd $dst,$dst,0x00\t! replicate4I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI src)); + format %{ "movd $dst,$src\n\t" + "pshufd $dst,$dst,0x00\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. +instruct Repl2I_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate2I($con)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4I_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate4I($con)\n\t" + "movlhps $dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate8I($con)\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Integer could be loaded into xmm register directly from memory. +instruct Repl2I_mem(vecD dst, memory mem) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI mem)); + format %{ "movd $dst,$mem\n\t" + "pshufd $dst,$dst,0x00\t! replicate2I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $mem$$Address); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4I_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI mem)); + format %{ "movd $dst,$mem\n\t" + "pshufd $dst,$dst,0x00\t! replicate4I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $mem$$Address); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI mem)); + format %{ "movd $dst,$mem\n\t" + "pshufd $dst,$dst,0x00\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $mem$$Address); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate integer (4 byte) scalar zero to be vector +instruct Repl2I_zero(vecD dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI zero)); + format %{ "pxor $dst,$dst\t! replicate2I" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4I_zero(vecX dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI zero)); + format %{ "pxor $dst,$dst\t! replicate4I zero)" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8I_zero(vecY dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI zero)); + format %{ "vxorpd $dst,$dst,$dst\t! replicate8I zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate long (8 byte) scalar to be vector +#ifdef _LP64 +instruct Repl2L(vecX dst, rRegL src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + format %{ "movdq $dst,$src\n\t" + "movlhps $dst,$dst\t! replicate2L" %} + ins_encode %{ + __ movdq($dst$$XMMRegister, $src$$Register); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L(vecY dst, rRegL src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL src)); + format %{ "movdq $dst,$src\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} + ins_encode %{ + __ movdq($dst$$XMMRegister, $src$$Register); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} +#else // _LP64 +instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + effect(TEMP dst, USE src, TEMP tmp); + format %{ "movdl $dst,$src.lo\n\t" + "movdl $tmp,$src.hi\n\t" + "punpckldq $dst,$tmp\n\t" + "movlhps $dst,$dst\t! replicate2L"%} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL src)); + effect(TEMP dst, USE src, TEMP tmp); + format %{ "movdl $dst,$src.lo\n\t" + "movdl $tmp,$src.hi\n\t" + "punpckldq $dst,$tmp\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} +#endif // _LP64 + +// Replicate long (8 byte) scalar immediate to be vector by loading from const table. +instruct Repl2L_imm(vecX dst, immL con) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate2L($con)\n\t" + "movlhps $dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress($con)); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L_imm(vecY dst, immL con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate4L($con)\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress($con)); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Long could be loaded into xmm register directly from memory. +instruct Repl2L_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL mem)); + format %{ "movq $dst,$mem\n\t" + "movlhps $dst,$dst\t! replicate2L" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL mem)); + format %{ "movq $dst,$mem\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate long (8 byte) scalar zero to be vector +instruct Repl2L_zero(vecX dst, immL0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL zero)); + format %{ "pxor $dst,$dst\t! replicate2L zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4L_zero(vecY dst, immL0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL zero)); + format %{ "vxorpd $dst,$dst,$dst\t! replicate4L zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate float (4 byte) scalar to be vector +instruct Repl2F(vecD dst, regF src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4F(vecX dst, regF src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8F(vecY dst, regF src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateF src)); + format %{ "pshufd $dst,$src,0x00\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate float (4 byte) scalar zero to be vector +instruct Repl2F_zero(vecD dst, immF0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF zero)); + format %{ "xorps $dst,$dst\t! replicate2F zero" %} + ins_encode %{ + __ xorps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4F_zero(vecX dst, immF0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF zero)); + format %{ "xorps $dst,$dst\t! replicate4F zero" %} + ins_encode %{ + __ xorps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8F_zero(vecY dst, immF0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateF zero)); + format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} + ins_encode %{ + bool vector256 = true; + __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate double (8 bytes) scalar to be vector +instruct Repl2D(vecX dst, regD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4D(vecY dst, regD src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateD src)); + format %{ "pshufd $dst,$src,0x44\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate double (8 byte) scalar zero to be vector +instruct Repl2D_zero(vecX dst, immD0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD zero)); + format %{ "xorpd $dst,$dst\t! replicate2D zero" %} + ins_encode %{ + __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4D_zero(vecY dst, immD0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateD zero)); + format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} + ins_encode %{ + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + --- old/src/cpu/x86/vm/x86_32.ad Sat Jun 2 20:04:02 2012 +++ new/src/cpu/x86/vm/x86_32.ad Sat Jun 2 20:04:01 2012 @@ -74,9 +74,6 @@ reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); -// Special Registers -reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); - // Float registers. We treat TOS/FPR0 special. It is invisible to the // allocator, and only shows up in the encodings. reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); @@ -105,27 +102,6 @@ reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); -// XMM registers. 128-bit registers or 4 words each, labeled a-d. -// Word a in each register holds a Float, words ab hold a Double. -// We currently do not use the SIMD capabilities, so registers cd -// are unused at the moment. -reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); -reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); -reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); -reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); -reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); -reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); -reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); -reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); -reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); -reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); -reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); -reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); -reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); -reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); -reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); -reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); - // Specify priority of register selection within phases of register // allocation. Highest priority is first. A useful heuristic is to // give registers a low priority when they are required by machine @@ -138,16 +114,7 @@ FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, FPR6L, FPR6H, FPR7L, FPR7H ); -alloc_class chunk1( XMM0a, XMM0b, - XMM1a, XMM1b, - XMM2a, XMM2b, - XMM3a, XMM3b, - XMM4a, XMM4b, - XMM5a, XMM5b, - XMM6a, XMM6b, - XMM7a, XMM7b, EFLAGS); - //----------Architecture Description Register Classes-------------------------- // Several register classes are automatically defined based upon information in // this architecture description. @@ -159,12 +126,12 @@ // Class for all registers reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); // Class for general registers -reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); +reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); // Class for general registers which may be used for implicit null checks on win95 // Also safe for use by tailjump. We don't want to allocate in rbp, -reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); +reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); // Class of "X" registers -reg_class x_reg(EBX, ECX, EDX, EAX); +reg_class int_x_reg(EBX, ECX, EDX, EAX); // Class of registers that can appear in an address with no offset. // EBP and ESP require an extra instruction byte for zero offset. // Used in fast-unlock @@ -193,8 +160,6 @@ reg_class sp_reg(ESP); // Singleton class for instruction pointer // reg_class ip_reg(EIP); -// Singleton class for condition codes -reg_class int_flags(EFLAGS); // Class of integer register pairs reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI ); // Class of integer register pairs that aligns with calling convention @@ -206,29 +171,18 @@ // Floating point registers. Notice FPR0 is not a choice. // FPR0 is not ever allocated; we use clever encodings to fake // a 2-address instructions out of Intels FP stack. -reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); +reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); -// make a register class for SSE registers -reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a); +reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, + FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, + FPR7L,FPR7H ); -// make a double register class for SSE2 registers -reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b, - XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b ); +reg_class fp_flt_reg0( FPR1L ); +reg_class fp_dbl_reg0( FPR1L,FPR1H ); +reg_class fp_dbl_reg1( FPR2L,FPR2H ); +reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, + FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); -reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, - FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, - FPR7L,FPR7H ); - -reg_class flt_reg0( FPR1L ); -reg_class dbl_reg0( FPR1L,FPR1H ); -reg_class dbl_reg1( FPR2L,FPR2H ); -reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, - FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); - -// XMM6 and XMM7 could be used as temporary registers for long, float and -// double values for SSE2. -reg_class xdb_reg6( XMM6a,XMM6b ); -reg_class xdb_reg7( XMM7a,XMM7b ); %} @@ -412,7 +366,7 @@ } } - // eRegI ereg, memory mem) %{ // emit_reg_mem + // rRegI ereg, memory mem) %{ // emit_reg_mem void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) { // There is no index & no scale, use form without SIB byte if ((index == 0x4) && @@ -787,7 +741,7 @@ #endif } int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); - // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes. + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. return size+5+offset_size; } @@ -821,7 +775,7 @@ } #endif } - // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes. + // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. // Only MOVAPS SSE prefix uses 1 byte. int sz = 4; if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && @@ -903,6 +857,108 @@ return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); } +// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st); + +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st); + +static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, + int dst_offset, uint ireg, outputStream* st) { + int calc_size = 0; + int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); + int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); + switch (ireg) { + case Op_VecS: + calc_size = 3+src_offset_size + 3+dst_offset_size; + break; + case Op_VecD: + calc_size = 3+src_offset_size + 3+dst_offset_size; + src_offset += 4; + dst_offset += 4; + src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); + dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); + calc_size += 3+src_offset_size + 3+dst_offset_size; + break; + case Op_VecX: + calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; + break; + case Op_VecY: + calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; + break; + default: + ShouldNotReachHere(); + } + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + switch (ireg) { + case Op_VecS: + __ pushl(Address(rsp, src_offset)); + __ popl (Address(rsp, dst_offset)); + break; + case Op_VecD: + __ pushl(Address(rsp, src_offset)); + __ popl (Address(rsp, dst_offset)); + __ pushl(Address(rsp, src_offset+4)); + __ popl (Address(rsp, dst_offset+4)); + break; + case Op_VecX: + __ movdqu(Address(rsp, -16), xmm0); + __ movdqu(xmm0, Address(rsp, src_offset)); + __ movdqu(Address(rsp, dst_offset), xmm0); + __ movdqu(xmm0, Address(rsp, -16)); + break; + case Op_VecY: + __ vmovdqu(Address(rsp, -32), xmm0); + __ vmovdqu(xmm0, Address(rsp, src_offset)); + __ vmovdqu(Address(rsp, dst_offset), xmm0); + __ vmovdqu(xmm0, Address(rsp, -32)); + break; + default: + ShouldNotReachHere(); + } + int size = __ offset() - offset; + assert(size == calc_size, "incorrect size calculattion"); + return size; +#ifndef PRODUCT + } else if (!do_size) { + switch (ireg) { + case Op_VecS: + st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" + "popl [rsp + #%d]", + src_offset, dst_offset); + break; + case Op_VecD: + st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" + "popq [rsp + #%d]\n\t" + "pushl [rsp + #%d]\n\t" + "popq [rsp + #%d]", + src_offset, dst_offset, src_offset+4, dst_offset+4); + break; + case Op_VecX: + st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" + "movdqu xmm0, [rsp + #%d]\n\t" + "movdqu [rsp + #%d], xmm0\n\t" + "movdqu xmm0, [rsp - #16]", + src_offset, dst_offset); + break; + case Op_VecY: + st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" + "vmovdqu xmm0, [rsp + #%d]\n\t" + "vmovdqu [rsp + #%d], xmm0\n\t" + "vmovdqu xmm0, [rsp - #32]", + src_offset, dst_offset); + break; + default: + ShouldNotReachHere(); + } +#endif + } + return calc_size; +} + uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { // Get registers to move OptoReg::Name src_second = ra_->get_reg_second(in(1)); @@ -923,6 +979,29 @@ if( src_first == dst_first && src_second == dst_second ) return size; // Self copy, no move + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); + assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); + assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity"); + if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { + // mem -> mem + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); + } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { + return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); + } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { + int stack_offset = ra_->reg2offset(dst_first); + return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); + } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { + int stack_offset = ra_->reg2offset(src_first); + return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); + } else { + ShouldNotReachHere(); + } + } + // -------------------------------------- // Check for mem-mem move. push/pop to move. if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { @@ -1313,16 +1392,6 @@ return true; } -// Vector width in bytes -const uint Matcher::vector_width_in_bytes(void) { - return UseSSE >= 2 ? 8 : 0; -} - -// Vector ideal reg -const uint Matcher::vector_ideal_reg(void) { - return Op_RegD; -} - // Is this branch offset short enough that a short branch can be used? // // NOTE: If the platform does not provide any short branch variants, then @@ -1452,7 +1521,7 @@ // arguments in those registers not be available to the callee. bool Matcher::can_be_java_arg( int reg ) { if( reg == ECX_num || reg == EDX_num ) return true; - if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true; + if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; return false; } @@ -1565,16 +1634,16 @@ emit_opcode(cbuf,0x66); %} - enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) + enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} - enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many) + enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) emit_opcode(cbuf,$opcode$$constant); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} - enc_class mov_r32_imm0( eRegI dst ) %{ + enc_class mov_r32_imm0( rRegI dst ) %{ emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 %} @@ -1621,7 +1690,7 @@ %} // Dense encoding for older common ops - enc_class Opc_plus(immI opcode, eRegI reg) %{ + enc_class Opc_plus(immI opcode, rRegI reg) %{ emit_opcode(cbuf, $opcode$$constant + $reg$$reg); %} @@ -1637,7 +1706,7 @@ } %} - enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m + enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { @@ -1682,7 +1751,7 @@ else emit_d32(cbuf,con); %} - enc_class OpcSReg (eRegI dst) %{ // BSWAP + enc_class OpcSReg (rRegI dst) %{ // BSWAP emit_cc(cbuf, $secondary, $dst$$reg ); %} @@ -1700,7 +1769,7 @@ emit_rm(cbuf, 0x3, destlo, desthi); %} - enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ... + enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... emit_rm(cbuf, 0x3, $secondary, $div$$reg ); %} @@ -1891,13 +1960,13 @@ // runtime_call_Relocation::spec(), RELOC_IMM32 ); // %} - enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR + enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR $$$emit8$primary; emit_rm(cbuf, 0x3, $secondary, $dst$$reg); $$$emit8$shift$$constant; %} - enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate + enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates emit_opcode(cbuf, 0xB8 + $dst$$reg); @@ -1904,7 +1973,7 @@ $$$emit32$src$$constant; %} - enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate + enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates emit_opcode(cbuf, $primary + $dst$$reg); @@ -1943,15 +2012,15 @@ // Encode a reg-reg copy. If it is useless, then empty encoding. - enc_class enc_Copy( eRegI dst, eRegI src ) %{ + enc_class enc_Copy( rRegI dst, rRegI src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} - enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{ + enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} - enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) + enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} @@ -1973,7 +2042,7 @@ emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); %} - enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{ + enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); %} @@ -2068,7 +2137,7 @@ cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand %} - enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem + enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem int reg_encoding = $ereg$$reg; int base = $mem$$base; int index = $mem$$index; @@ -2132,7 +2201,7 @@ // Clone of RegMem but accepts an extra parameter to access each // half of a double in memory; it never needs relocation info. - enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{ + enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ emit_opcode(cbuf,$opcode$$constant); int reg_encoding = $rm_reg$$reg; int base = $mem$$base; @@ -2168,7 +2237,7 @@ encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); %} - enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea + enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea int reg_encoding = $dst$$reg; int base = $src0$$reg; // 0xFFFFFFFF indicates no base int index = 0x04; // 0x04 indicates no index @@ -2178,7 +2247,7 @@ encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); %} - enc_class min_enc (eRegI dst, eRegI src) %{ // MIN + enc_class min_enc (rRegI dst, rRegI src) %{ // MIN // Compare dst,src emit_opcode(cbuf,0x3B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); @@ -2190,7 +2259,7 @@ emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} - enc_class max_enc (eRegI dst, eRegI src) %{ // MAX + enc_class max_enc (rRegI dst, rRegI src) %{ // MAX // Compare dst,src emit_opcode(cbuf,0x3B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); @@ -2221,7 +2290,7 @@ encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); %} - enc_class neg_reg(eRegI dst) %{ + enc_class neg_reg(rRegI dst) %{ // NEG $dst emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); @@ -2251,7 +2320,7 @@ emit_rm(cbuf, 0x3, $p$$reg, tmpReg); %} - enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT + enc_class enc_cmpLTP_mem(rRegI p, rRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT int tmpReg = $tmp$$reg; // SUB $p,$q @@ -2390,12 +2459,12 @@ %} // Special case for moving an integer register to a stack slot. - enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS + enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); %} // Special case for moving a register to a stack slot. - enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS + enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS // Opcode already emitted emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte @@ -2640,7 +2709,7 @@ // equal_result = 0; // nan_result = -1; - enc_class CmpF_Result(eRegI dst) %{ + enc_class CmpF_Result(rRegI dst) %{ // fnstsw_ax(); emit_opcode( cbuf, 0xDF); emit_opcode( cbuf, 0xE0); @@ -2685,7 +2754,7 @@ // done: %} - enc_class convert_int_long( regL dst, eRegI src ) %{ + enc_class convert_int_long( regL dst, rRegI src ) %{ // mov $dst.lo,$src int dst_encoding = $dst$$reg; int src_encoding = $src$$reg; @@ -2754,7 +2823,7 @@ emit_rm( cbuf, 0x3, 0x4, $src$$reg); %} - enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{ + enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ // Basic idea: lo(result) = lo(x_lo * y_lo) // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) // MOV $tmp,$src.lo @@ -2780,7 +2849,7 @@ emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); %} - enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{ + enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ // Basic idea: lo(result) = lo(src * y_lo) // hi(result) = hi(src * y_lo) + lo(src * y_hi) // IMUL $tmp,EDX,$src @@ -2836,7 +2905,7 @@ emit_d8(cbuf, 4*4); %} - enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{ + enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ // MOV $tmp,$src.lo emit_opcode(cbuf, 0x8B); emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); @@ -2857,7 +2926,7 @@ emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); %} - enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{ + enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); @@ -2869,7 +2938,7 @@ emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); %} - enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{ + enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ // XOR $tmp,$tmp emit_opcode(cbuf,0x33); // XOR emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); @@ -3762,9 +3831,9 @@ // in SSE2+ mode we want to keep the FPU stack clean so pretend // that C functions return float and double results in XMM0. if( ideal_reg == Op_RegD && UseSSE>=2 ) - return OptoRegPair(XMM0b_num,XMM0a_num); + return OptoRegPair(XMM0b_num,XMM0_num); if( ideal_reg == Op_RegF && UseSSE>=2 ) - return OptoRegPair(OptoReg::Bad,XMM0a_num); + return OptoRegPair(OptoReg::Bad,XMM0_num); return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); %} @@ -3775,9 +3844,9 @@ static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; if( ideal_reg == Op_RegD && UseSSE>=2 ) - return OptoRegPair(XMM0b_num,XMM0a_num); + return OptoRegPair(XMM0b_num,XMM0_num); if( ideal_reg == Op_RegF && UseSSE>=1 ) - return OptoRegPair(OptoReg::Bad,XMM0a_num); + return OptoRegPair(OptoReg::Bad,XMM0_num); return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); %} @@ -4147,8 +4216,8 @@ // Register Operands // Integer Register -operand eRegI() %{ - constraint(ALLOC_IN_RC(e_reg)); +operand rRegI() %{ + constraint(ALLOC_IN_RC(int_reg)); match(RegI); match(xRegI); match(eAXRegI); @@ -4163,8 +4232,8 @@ %} // Subset of Integer Register -operand xRegI(eRegI reg) %{ - constraint(ALLOC_IN_RC(x_reg)); +operand xRegI(rRegI reg) %{ + constraint(ALLOC_IN_RC(int_x_reg)); match(reg); match(eAXRegI); match(eBXRegI); @@ -4179,7 +4248,7 @@ operand eAXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(eax_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "EAX" %} interface(REG_INTER); @@ -4189,7 +4258,7 @@ operand eBXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(ebx_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "EBX" %} interface(REG_INTER); @@ -4198,7 +4267,7 @@ operand eCXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(ecx_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "ECX" %} interface(REG_INTER); @@ -4207,7 +4276,7 @@ operand eDXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(edx_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "EDX" %} interface(REG_INTER); @@ -4216,7 +4285,7 @@ operand eDIRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(edi_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "EDI" %} interface(REG_INTER); @@ -4263,7 +4332,7 @@ operand eSIRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(esi_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "ESI" %} interface(REG_INTER); @@ -4284,7 +4353,7 @@ %} operand eRegP() %{ - constraint(ALLOC_IN_RC(e_reg)); + constraint(ALLOC_IN_RC(int_reg)); match(RegP); match(eAXRegP); match(eBXRegP); @@ -4297,7 +4366,7 @@ // On windows95, EBP is not safe to use for implicit null tests. operand eRegP_no_EBP() %{ - constraint(ALLOC_IN_RC(e_reg_no_rbp)); + constraint(ALLOC_IN_RC(int_reg_no_rbp)); match(RegP); match(eAXRegP); match(eBXRegP); @@ -4477,7 +4546,7 @@ // Float register operands operand regDPR() %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(dbl_reg)); + constraint(ALLOC_IN_RC(fp_dbl_reg)); match(RegD); match(regDPR1); match(regDPR2); @@ -4487,7 +4556,7 @@ operand regDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(dbl_reg0)); + constraint(ALLOC_IN_RC(fp_dbl_reg0)); match(reg); format %{ "FPR1" %} interface(REG_INTER); @@ -4495,7 +4564,7 @@ operand regDPR2(regDPR reg) %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(dbl_reg1)); + constraint(ALLOC_IN_RC(fp_dbl_reg1)); match(reg); format %{ "FPR2" %} interface(REG_INTER); @@ -4503,45 +4572,16 @@ operand regnotDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(dbl_notreg0)); + constraint(ALLOC_IN_RC(fp_dbl_notreg0)); match(reg); format %{ %} interface(REG_INTER); %} -// XMM Double register operands -operand regD() %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(xdb_reg)); - match(RegD); - match(regD6); - match(regD7); - format %{ %} - interface(REG_INTER); -%} - -// XMM6 double register operands -operand regD6(regD reg) %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(xdb_reg6)); - match(reg); - format %{ "XMM6" %} - interface(REG_INTER); -%} - -// XMM7 double register operands -operand regD7(regD reg) %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(xdb_reg7)); - match(reg); - format %{ "XMM7" %} - interface(REG_INTER); -%} - // Float register operands operand regFPR() %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(flt_reg)); + constraint(ALLOC_IN_RC(fp_flt_reg)); match(RegF); match(regFPR1); format %{ %} @@ -4551,22 +4591,31 @@ // Float register operands operand regFPR1(regFPR reg) %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(flt_reg0)); + constraint(ALLOC_IN_RC(fp_flt_reg0)); match(reg); format %{ "FPR1" %} interface(REG_INTER); %} -// XMM register operands +// XMM Float register operands operand regF() %{ predicate( UseSSE>=1 ); - constraint(ALLOC_IN_RC(xmm_reg)); + constraint(ALLOC_IN_RC(float_reg)); match(RegF); format %{ %} interface(REG_INTER); %} +// XMM Double register operands +operand regD() %{ + predicate( UseSSE>=2 ); + constraint(ALLOC_IN_RC(double_reg)); + match(RegD); + format %{ %} + interface(REG_INTER); +%} + //----------Memory Operands---------------------------------------------------- // Direct Memory Operand operand direct(immP addr) %{ @@ -4583,7 +4632,7 @@ // Indirect Memory Operand operand indirect(eRegP reg) %{ - constraint(ALLOC_IN_RC(e_reg)); + constraint(ALLOC_IN_RC(int_reg)); match(reg); format %{ "[$reg]" %} @@ -4622,7 +4671,7 @@ %} // Indirect Memory Plus Long Offset Operand -operand indOffset32X(eRegI reg, immP off) %{ +operand indOffset32X(rRegI reg, immP off) %{ match(AddP off reg); format %{ "[$reg + $off]" %} @@ -4635,7 +4684,7 @@ %} // Indirect Memory Plus Index Register Plus Offset Operand -operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{ +operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ match(AddP (AddP reg ireg) off); op_cost(10); @@ -4649,7 +4698,7 @@ %} // Indirect Memory Plus Index Register Plus Offset Operand -operand indIndex(eRegP reg, eRegI ireg) %{ +operand indIndex(eRegP reg, rRegI ireg) %{ match(AddP reg ireg); op_cost(10); @@ -4667,7 +4716,7 @@ // // ------------------------------------------------------------------------- // // Scaled Memory Operands // // Indirect Memory Times Scale Plus Offset Operand -// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{ +// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ // match(AddP off (LShiftI ireg scale)); // // op_cost(10); @@ -4681,7 +4730,7 @@ // %} // Indirect Memory Times Scale Plus Index Register -operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{ +operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ match(AddP reg (LShiftI ireg scale)); op_cost(10); @@ -4695,7 +4744,7 @@ %} // Indirect Memory Times Scale Plus Index Register Plus Offset Operand -operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{ +operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ match(AddP (AddP reg (LShiftI ireg scale)) off); op_cost(10); @@ -4823,7 +4872,7 @@ // Indirect Memory Operand operand indirect_win95_safe(eRegP_no_EBP reg) %{ - constraint(ALLOC_IN_RC(e_reg)); + constraint(ALLOC_IN_RC(int_reg)); match(reg); op_cost(100); @@ -4867,7 +4916,7 @@ %} // Indirect Memory Plus Index Register Plus Offset Operand -operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off) +operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) %{ match(AddP (AddP reg ireg) off); @@ -4882,7 +4931,7 @@ %} // Indirect Memory Times Scale Plus Index Register -operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale) +operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) %{ match(AddP reg (LShiftI ireg scale)); @@ -4897,7 +4946,7 @@ %} // Indirect Memory Times Scale Plus Index Register Plus Offset Operand -operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale) +operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) %{ match(AddP (AddP reg (LShiftI ireg scale)) off); @@ -5086,7 +5135,7 @@ // Or: _mem if it requires the big decoder and a memory unit. // Integer ALU reg operation -pipe_class ialu_reg(eRegI dst) %{ +pipe_class ialu_reg(rRegI dst) %{ single_instruction; dst : S4(write); dst : S3(read); @@ -5104,7 +5153,7 @@ %} // Integer ALU reg operation using big decoder -pipe_class ialu_reg_fat(eRegI dst) %{ +pipe_class ialu_reg_fat(rRegI dst) %{ single_instruction; dst : S4(write); dst : S3(read); @@ -5122,7 +5171,7 @@ %} // Integer ALU reg-reg operation -pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{ +pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5140,7 +5189,7 @@ %} // Integer ALU reg-reg operation -pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{ +pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5158,7 +5207,7 @@ %} // Integer ALU reg-mem operation -pipe_class ialu_reg_mem(eRegI dst, memory mem) %{ +pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ single_instruction; dst : S5(write); mem : S3(read); @@ -5187,7 +5236,7 @@ %} // Integer Store to Memory -pipe_class ialu_mem_reg(memory mem, eRegI src) %{ +pipe_class ialu_mem_reg(memory mem, rRegI src) %{ single_instruction; mem : S3(read); src : S5(read); @@ -5216,7 +5265,7 @@ %} // Integer ALU0 reg-reg operation -pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{ +pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5225,7 +5274,7 @@ %} // Integer ALU0 reg-mem operation -pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{ +pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ single_instruction; dst : S5(write); mem : S3(read); @@ -5235,7 +5284,7 @@ %} // Integer ALU reg-reg operation -pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{ +pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ single_instruction; cr : S4(write); src1 : S3(read); @@ -5245,7 +5294,7 @@ %} // Integer ALU reg-imm operation -pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{ +pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ single_instruction; cr : S4(write); src1 : S3(read); @@ -5254,7 +5303,7 @@ %} // Integer ALU reg-mem operation -pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{ +pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ single_instruction; cr : S4(write); src1 : S3(read); @@ -5265,7 +5314,7 @@ %} // Conditional move reg-reg -pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{ +pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ instruction_count(4); y : S4(read); q : S3(read); @@ -5274,7 +5323,7 @@ %} // Conditional move reg-reg -pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{ +pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5283,7 +5332,7 @@ %} // Conditional move reg-mem -pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{ +pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5534,7 +5583,7 @@ // in the encode section of the architecture description. //----------BSWAP-Instruction-------------------------------------------------- -instruct bytes_reverse_int(eRegI dst) %{ +instruct bytes_reverse_int(rRegI dst) %{ match(Set dst (ReverseBytesI dst)); format %{ "BSWAP $dst" %} @@ -5555,7 +5604,7 @@ ins_pipe( ialu_reg_reg); %} -instruct bytes_reverse_unsigned_short(eRegI dst) %{ +instruct bytes_reverse_unsigned_short(rRegI dst) %{ match(Set dst (ReverseBytesUS dst)); format %{ "BSWAP $dst\n\t" @@ -5567,7 +5616,7 @@ ins_pipe( ialu_reg ); %} -instruct bytes_reverse_short(eRegI dst) %{ +instruct bytes_reverse_short(rRegI dst) %{ match(Set dst (ReverseBytesS dst)); format %{ "BSWAP $dst\n\t" @@ -5582,7 +5631,7 @@ //---------- Zeros Count Instructions ------------------------------------------ -instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosI src)); effect(KILL cr); @@ -5594,7 +5643,7 @@ ins_pipe(ialu_reg); %} -instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(!UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosI src)); effect(KILL cr); @@ -5619,7 +5668,7 @@ ins_pipe(ialu_reg); %} -instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ +instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosL src)); effect(TEMP dst, KILL cr); @@ -5642,7 +5691,7 @@ ins_pipe(ialu_reg); %} -instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{ +instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(!UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosL src)); effect(TEMP dst, KILL cr); @@ -5678,7 +5727,7 @@ ins_pipe(ialu_reg); %} -instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (CountTrailingZerosI src)); effect(KILL cr); @@ -5697,7 +5746,7 @@ ins_pipe(ialu_reg); %} -instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ +instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ match(Set dst (CountTrailingZerosL src)); effect(TEMP dst, KILL cr); @@ -5729,7 +5778,7 @@ //---------- Population Count Instructions ------------------------------------- -instruct popCountI(eRegI dst, eRegI src) %{ +instruct popCountI(rRegI dst, rRegI src) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI src)); @@ -5740,7 +5789,7 @@ ins_pipe(ialu_reg); %} -instruct popCountI_mem(eRegI dst, memory mem) %{ +instruct popCountI_mem(rRegI dst, memory mem) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI (LoadI mem))); @@ -5752,7 +5801,7 @@ %} // Note: Long.bitCount(long) returns an int. -instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ +instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL src)); effect(KILL cr, TEMP tmp, TEMP dst); @@ -5769,7 +5818,7 @@ %} // Note: Long.bitCount(long) returns an int. -instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{ +instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL (LoadL mem))); effect(KILL cr, TEMP tmp, TEMP dst); @@ -5873,7 +5922,7 @@ %} // Load Short (16bit signed) -instruct loadS(eRegI dst, memory mem) %{ +instruct loadS(rRegI dst, memory mem) %{ match(Set dst (LoadS mem)); ins_cost(125); @@ -5887,7 +5936,7 @@ %} // Load Short (16 bit signed) to Byte (8 bit signed) -instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{ +instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ins_cost(125); @@ -5918,7 +5967,7 @@ %} // Load Unsigned Short/Char (16bit unsigned) -instruct loadUS(eRegI dst, memory mem) %{ +instruct loadUS(rRegI dst, memory mem) %{ match(Set dst (LoadUS mem)); ins_cost(125); @@ -5932,7 +5981,7 @@ %} // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) -instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{ +instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); ins_cost(125); @@ -5993,7 +6042,7 @@ %} // Load Integer -instruct loadI(eRegI dst, memory mem) %{ +instruct loadI(rRegI dst, memory mem) %{ match(Set dst (LoadI mem)); ins_cost(125); @@ -6007,7 +6056,7 @@ %} // Load Integer (32 bit signed) to Byte (8 bit signed) -instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{ +instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ins_cost(125); @@ -6019,7 +6068,7 @@ %} // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) -instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{ +instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ match(Set dst (AndI (LoadI mem) mask)); ins_cost(125); @@ -6031,7 +6080,7 @@ %} // Load Integer (32 bit signed) to Short (16 bit signed) -instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{ +instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ins_cost(125); @@ -6043,7 +6092,7 @@ %} // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) -instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{ +instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ match(Set dst (AndI (LoadI mem) mask)); ins_cost(125); @@ -6204,7 +6253,7 @@ %} // Load Range -instruct loadRange(eRegI dst, memory mem) %{ +instruct loadRange(rRegI dst, memory mem) %{ match(Set dst (LoadRange mem)); ins_cost(125); @@ -6301,66 +6350,6 @@ ins_pipe( fpu_reg_mem ); %} -// Load Aligned Packed Byte to XMM register -instruct loadA8B(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load8B mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed8B" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Short to XMM register -instruct loadA4S(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load4S mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed4S" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Char to XMM register -instruct loadA4C(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load4C mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed4C" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Integer to XMM register -instruct load2IU(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load2I mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed2I" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Single to XMM -instruct loadA2F(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load2F mem)); - ins_cost(145); - format %{ "MOVQ $dst,$mem\t! packed2F" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - // Load Effective Address instruct leaP8(eRegP dst, indOffset8 mem) %{ match(Set dst mem); @@ -6413,7 +6402,7 @@ %} // Load Constant -instruct loadConI(eRegI dst, immI src) %{ +instruct loadConI(rRegI dst, immI src) %{ match(Set dst src); format %{ "MOV $dst,$src" %} @@ -6422,7 +6411,7 @@ %} // Load Constant zero -instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{ +instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); @@ -6590,7 +6579,7 @@ %} // Load Stack Slot -instruct loadSSI(eRegI dst, stackSlotI src) %{ +instruct loadSSI(rRegI dst, stackSlotI src) %{ match(Set dst src); ins_cost(125); @@ -6817,7 +6806,7 @@ %} // Store Char/Short -instruct storeC(memory mem, eRegI src) %{ +instruct storeC(memory mem, rRegI src) %{ match(Set mem (StoreC mem src)); ins_cost(125); @@ -6828,7 +6817,7 @@ %} // Store Integer -instruct storeI(memory mem, eRegI src) %{ +instruct storeI(memory mem, rRegI src) %{ match(Set mem (StoreI mem src)); ins_cost(125); @@ -6972,42 +6961,6 @@ ins_pipe( ialu_mem_imm ); %} -// Store Aligned Packed Byte XMM register to memory -instruct storeA8B(memory mem, regD src) %{ - predicate(UseSSE>=1); - match(Set mem (Store8B mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed8B" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Aligned Packed Char/Short XMM register to memory -instruct storeA4C(memory mem, regD src) %{ - predicate(UseSSE>=1); - match(Set mem (Store4C mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed4C" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Aligned Packed Integer XMM register to memory -instruct storeA2I(memory mem, regD src) %{ - predicate(UseSSE>=1); - match(Set mem (Store2I mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed2I" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Store CMS card-mark Immediate instruct storeImmCM(memory mem, immI8 src) %{ match(Set mem (StoreCM mem src)); @@ -7069,18 +7022,6 @@ ins_pipe( pipe_slow ); %} -// Store Aligned Packed Single Float XMM register to memory -instruct storeA2F(memory mem, regD src) %{ - predicate(UseSSE>=1); - match(Set mem (Store2F mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed2F" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Store Float instruct storeFPR( memory mem, regFPR1 src) %{ predicate(UseSSE==0); @@ -7142,7 +7083,7 @@ %} // Store Integer to stack slot -instruct storeSSI(stackSlotI dst, eRegI src) %{ +instruct storeSSI(stackSlotI dst, rRegI src) %{ match(Set dst src); ins_cost(100); @@ -7267,7 +7208,7 @@ ins_pipe(empty); %} -instruct castP2X(eRegI dst, eRegP src ) %{ +instruct castP2X(rRegI dst, eRegP src ) %{ match(Set dst (CastP2X src)); ins_cost(50); format %{ "MOV $dst, $src\t# CastP2X" %} @@ -7277,7 +7218,7 @@ //----------Conditional Move--------------------------------------------------- // Conditional move -instruct jmovI_reg(cmpOp cop, eFlagsReg cr, eRegI dst, eRegI src) %{ +instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ predicate(!VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7294,7 +7235,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src) %{ +instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ predicate(!VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7311,7 +7252,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{ +instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7321,7 +7262,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{ +instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7331,7 +7272,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{ +instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7341,7 +7282,7 @@ %} // Conditional move -instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{ +instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); @@ -7352,7 +7293,7 @@ %} // Conditional move -instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{ +instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); @@ -7362,7 +7303,7 @@ ins_pipe( pipe_cmov_mem ); %} -instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{ +instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); @@ -7616,7 +7557,7 @@ //----------Arithmetic Instructions-------------------------------------------- //----------Addition Instructions---------------------------------------------- // Integer Addition Instructions -instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AddI dst src)); effect(KILL cr); @@ -7627,7 +7568,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (AddI dst src)); effect(KILL cr); @@ -7637,7 +7578,7 @@ ins_pipe( ialu_reg ); %} -instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ +instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ predicate(UseIncDec); match(Set dst (AddI dst src)); effect(KILL cr); @@ -7649,7 +7590,7 @@ ins_pipe( ialu_reg ); %} -instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{ +instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ match(Set dst (AddI src0 src1)); ins_cost(110); @@ -7669,7 +7610,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{ +instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ predicate(UseIncDec); match(Set dst (AddI dst src)); effect(KILL cr); @@ -7681,7 +7622,7 @@ ins_pipe( ialu_reg ); %} -instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{ +instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AddP dst src)); effect(KILL cr); @@ -7703,7 +7644,7 @@ ins_pipe( ialu_reg ); %} -instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (AddI dst (LoadI src))); effect(KILL cr); @@ -7714,7 +7655,7 @@ ins_pipe( ialu_reg_mem ); %} -instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); @@ -7776,7 +7717,7 @@ ins_pipe( empty ); %} -instruct castII( eRegI dst ) %{ +instruct castII( rRegI dst ) %{ match(Set dst (CastII dst)); format %{ "#castII of $dst" %} ins_encode( /*empty encoding*/ ); @@ -7854,7 +7795,7 @@ // Conditional-store of an int value. // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. -instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{ +instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ match(Set cr (StoreIConditional mem (Binary oldval newval))); effect(KILL oldval); format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} @@ -7887,7 +7828,7 @@ // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them -instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ +instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" @@ -7900,7 +7841,7 @@ ins_pipe( pipe_cmpxchg ); %} -instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ +instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" @@ -7912,7 +7853,7 @@ ins_pipe( pipe_cmpxchg ); %} -instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ +instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" @@ -7926,7 +7867,7 @@ //----------Subtraction Instructions------------------------------------------- // Integer Subtraction Instructions -instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (SubI dst src)); effect(KILL cr); @@ -7937,7 +7878,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (SubI dst src)); effect(KILL cr); @@ -7948,7 +7889,7 @@ ins_pipe( ialu_reg ); %} -instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (SubI dst (LoadI src))); effect(KILL cr); @@ -7959,7 +7900,7 @@ ins_pipe( ialu_reg_mem ); %} -instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (SubI (LoadI dst) src))); effect(KILL cr); @@ -7971,7 +7912,7 @@ %} // Subtract from a pointer -instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{ +instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ match(Set dst (AddP dst (SubI zero src))); effect(KILL cr); @@ -7982,7 +7923,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{ +instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ match(Set dst (SubI zero dst)); effect(KILL cr); @@ -7997,7 +7938,7 @@ //----------Multiplication/Division Instructions------------------------------- // Integer Multiplication Instructions // Multiply Register -instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (MulI dst src)); effect(KILL cr); @@ -8010,7 +7951,7 @@ %} // Multiply 32-bit Immediate -instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{ +instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ match(Set dst (MulI src imm)); effect(KILL cr); @@ -8066,7 +8007,7 @@ %} // Multiply Memory 32-bit Immediate -instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{ +instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ match(Set dst (MulI (LoadI src) imm)); effect(KILL cr); @@ -8078,7 +8019,7 @@ %} // Multiply Memory -instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{ +instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (MulI dst (LoadI src))); effect(KILL cr); @@ -8115,7 +8056,7 @@ %} // Multiply Register Long -instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ +instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(4*100+3*400); @@ -8133,7 +8074,7 @@ %} // Multiply Register Long where the left operand's high 32 bits are zero -instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ +instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(1))); match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); @@ -8154,7 +8095,7 @@ %} // Multiply Register Long where the right operand's high 32 bits are zero -instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ +instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(2))); match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); @@ -8190,7 +8131,7 @@ %} // Multiply Register Long by small constant -instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{ +instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(2*100+2*400); @@ -8288,7 +8229,7 @@ %} // Divide Register Long (no special case since divisor != -1) -instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{ +instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ match(Set dst (DivL dst imm)); effect( TEMP tmp, TEMP tmp2, KILL cr ); ins_cost(1000); @@ -8359,7 +8300,7 @@ %} // Remainder Register Long (remainder fit into 32 bits) -instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{ +instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ match(Set dst (ModL dst imm)); effect( TEMP tmp, TEMP tmp2, KILL cr ); ins_cost(1000); @@ -8427,7 +8368,7 @@ // Integer Shift Instructions // Shift Left by one -instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); @@ -8439,7 +8380,7 @@ %} // Shift Left by 8-bit immediate -instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); @@ -8451,7 +8392,7 @@ %} // Shift Left by variable -instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ +instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); @@ -8463,7 +8404,7 @@ %} // Arithmetic shift right by one -instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); @@ -8485,7 +8426,7 @@ %} // Arithmetic Shift Right by 8-bit immediate -instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); @@ -8508,7 +8449,7 @@ %} // Arithmetic Shift Right by variable -instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ +instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); @@ -8520,7 +8461,7 @@ %} // Logical shift right by one -instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); @@ -8532,7 +8473,7 @@ %} // Logical Shift Right by 8-bit immediate -instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); @@ -8546,7 +8487,7 @@ // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. // This idiom is used by the compiler for the i2b bytecode. -instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{ +instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); size(3); @@ -8559,7 +8500,7 @@ // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. // This idiom is used by the compiler the i2s bytecode. -instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{ +instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); size(3); @@ -8572,7 +8513,7 @@ // Logical Shift Right by variable -instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ +instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); @@ -8588,7 +8529,7 @@ //----------Integer Logical Instructions--------------------------------------- // And Instructions // And Register with Register -instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AndI dst src)); effect(KILL cr); @@ -8600,7 +8541,7 @@ %} // And Register with Immediate -instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (AndI dst src)); effect(KILL cr); @@ -8612,7 +8553,7 @@ %} // And Register with Memory -instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (AndI dst (LoadI src))); effect(KILL cr); @@ -8624,7 +8565,7 @@ %} // And Memory with Register -instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AndI (LoadI dst) src))); effect(KILL cr); @@ -8650,7 +8591,7 @@ // Or Instructions // Or Register with Register -instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (OrI dst src)); effect(KILL cr); @@ -8661,7 +8602,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{ +instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ match(Set dst (OrI dst (CastP2X src))); effect(KILL cr); @@ -8674,7 +8615,7 @@ // Or Register with Immediate -instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (OrI dst src)); effect(KILL cr); @@ -8686,7 +8627,7 @@ %} // Or Register with Memory -instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (OrI dst (LoadI src))); effect(KILL cr); @@ -8698,7 +8639,7 @@ %} // Or Memory with Register -instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (OrI (LoadI dst) src))); effect(KILL cr); @@ -8724,7 +8665,7 @@ // ROL/ROR // ROL expand -instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} @@ -8733,7 +8674,7 @@ ins_pipe( ialu_reg ); %} -instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} @@ -8753,7 +8694,7 @@ // end of ROL expand // ROL 32bit by one once -instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ +instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); expand %{ @@ -8762,7 +8703,7 @@ %} // ROL 32bit var by imm8 once -instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ +instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); @@ -8790,7 +8731,7 @@ %} // ROR expand -instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} @@ -8799,7 +8740,7 @@ ins_pipe( ialu_reg ); %} -instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ effect (USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} @@ -8819,7 +8760,7 @@ // end of ROR expand // ROR right once -instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ +instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); expand %{ @@ -8828,7 +8769,7 @@ %} // ROR 32bit by immI8 once -instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ +instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); @@ -8857,7 +8798,7 @@ // Xor Instructions // Xor Register with Register -instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (XorI dst src)); effect(KILL cr); @@ -8869,7 +8810,7 @@ %} // Xor Register with Immediate -1 -instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{ +instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ match(Set dst (XorI dst imm)); size(2); @@ -8881,7 +8822,7 @@ %} // Xor Register with Immediate -instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (XorI dst src)); effect(KILL cr); @@ -8893,7 +8834,7 @@ %} // Xor Register with Memory -instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (XorI dst (LoadI src))); effect(KILL cr); @@ -8905,7 +8846,7 @@ %} // Xor Memory with Register -instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (XorI (LoadI dst) src))); effect(KILL cr); @@ -8930,7 +8871,7 @@ //----------Convert Int to Boolean--------------------------------------------- -instruct movI_nocopy(eRegI dst, eRegI src) %{ +instruct movI_nocopy(rRegI dst, rRegI src) %{ effect( DEF dst, USE src ); format %{ "MOV $dst,$src" %} ins_encode( enc_Copy( dst, src) ); @@ -8937,7 +8878,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{ +instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ effect( USE_DEF dst, USE src, KILL cr ); size(4); @@ -8948,7 +8889,7 @@ ins_pipe( ialu_reg_reg_long ); %} -instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{ +instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ match(Set dst (Conv2B src)); expand %{ @@ -8957,7 +8898,7 @@ %} %} -instruct movP_nocopy(eRegI dst, eRegP src) %{ +instruct movP_nocopy(rRegI dst, eRegP src) %{ effect( DEF dst, USE src ); format %{ "MOV $dst,$src" %} ins_encode( enc_Copy( dst, src) ); @@ -8964,7 +8905,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{ +instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ effect( USE_DEF dst, USE src, KILL cr ); format %{ "NEG $dst\n\t" "ADC $dst,$src" %} @@ -8973,7 +8914,7 @@ ins_pipe( ialu_reg_reg_long ); %} -instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{ +instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ match(Set dst (Conv2B src)); expand %{ @@ -8998,7 +8939,7 @@ ins_pipe( pipe_slow ); %} -instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{ +instruct cmpLTMask0( rRegI dst, immI0 zero, eFlagsReg cr ) %{ match(Set dst (CmpLTMask dst zero)); effect( DEF dst, KILL cr ); ins_cost(100); @@ -9470,7 +9411,7 @@ %} // Compare vs zero into -1,0,1 -instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 zero)); effect(KILL cr, KILL rax); @@ -9484,7 +9425,7 @@ %} // Compare into -1,0,1 -instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 src2)); effect(KILL cr, KILL rax); @@ -10262,7 +10203,7 @@ %} // Compare vs zero into -1,0,1 -instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 zero)); effect(KILL cr, KILL rax); @@ -10276,7 +10217,7 @@ %} // Compare into -1,0,1 -instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 src2)); effect(KILL cr, KILL rax); @@ -11196,7 +11137,7 @@ ins_pipe( fpu_reg_mem ); %} -instruct convI2D_reg(regD dst, eRegI src) %{ +instruct convI2D_reg(regD dst, rRegI src) %{ predicate( UseSSE>=2 && !UseXmmI2D ); match(Set dst (ConvI2D src)); format %{ "CVTSI2SD $dst,$src" %} @@ -11216,7 +11157,7 @@ ins_pipe( pipe_slow ); %} -instruct convXI2D_reg(regD dst, eRegI src) +instruct convXI2D_reg(regD dst, rRegI src) %{ predicate( UseSSE>=2 && UseXmmI2D ); match(Set dst (ConvI2D src)); @@ -11304,7 +11245,7 @@ %} // Convert an int to a float in xmm; no rounding step needed. -instruct convI2F_reg(regF dst, eRegI src) %{ +instruct convI2F_reg(regF dst, rRegI src) %{ predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); match(Set dst (ConvI2F src)); format %{ "CVTSI2SS $dst, $src" %} @@ -11314,7 +11255,7 @@ ins_pipe( pipe_slow ); %} - instruct convXI2F_reg(regF dst, eRegI src) + instruct convXI2F_reg(regF dst, rRegI src) %{ predicate( UseSSE>=2 && UseXmmI2F ); match(Set dst (ConvI2F src)); @@ -11328,7 +11269,7 @@ ins_pipe(pipe_slow); // XXX %} -instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{ +instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ match(Set dst (ConvI2L src)); effect(KILL cr); ins_cost(375); @@ -11340,7 +11281,7 @@ %} // Zero-extend convert int to long -instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{ +instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ match(Set dst (AndL (ConvI2L src) mask) ); effect( KILL flags ); ins_cost(250); @@ -11420,7 +11361,7 @@ ins_pipe( pipe_slow ); %} -instruct convL2I_reg( eRegI dst, eRegL src ) %{ +instruct convL2I_reg( rRegI dst, eRegL src ) %{ match(Set dst (ConvL2I src)); effect( DEF dst, USE src ); format %{ "MOV $dst,$src.lo" %} @@ -11429,7 +11370,7 @@ %} -instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{ +instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(100); @@ -11464,7 +11405,7 @@ ins_pipe( pipe_slow ); %} -instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{ +instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ predicate(UseSSE>=2); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); @@ -11476,7 +11417,7 @@ ins_pipe( pipe_slow ); %} -instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{ +instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); @@ -11516,7 +11457,7 @@ ins_pipe( pipe_slow ); %} -instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{ +instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ predicate(UseSSE>=2); match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); @@ -11650,187 +11591,7 @@ ins_pipe( pipe_slow ); %} -// Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate8B src)); - format %{ "MOVDQA $dst,$src\n\t" - "PUNPCKLBW $dst,$dst\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode %{ - if ($dst$$reg != $src$$reg) { - __ movdqa($dst$$XMMRegister, $src$$XMMRegister); - } - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} -// Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_eRegI(regD dst, eRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate8B src)); - format %{ "MOVD $dst,$src\n\t" - "PUNPCKLBW $dst,$dst\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} - -// Replicate scalar zero to packed byte (1 byte) values in xmm -instruct Repl8B_immI0(regD dst, immI0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate8B zero)); - format %{ "PXOR $dst,$dst\t! replicate8B" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4S src)); - format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} - ins_encode %{ - __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_eRegI(regD dst, eRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4S src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed short (2 byte) values in xmm -instruct Repl4S_immI0(regD dst, immI0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4S zero)); - format %{ "PXOR $dst,$dst\t! replicate4S" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4C src)); - format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} - ins_encode %{ - __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_eRegI(regD dst, eRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4C src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed char (2 byte) values in xmm -instruct Repl4C_immI0(regD dst, immI0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4C zero)); - format %{ "PXOR $dst,$dst\t! replicate4C" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2I src)); - format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_eRegI(regD dst, eRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2I src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFD $dst,$dst,0x00\t! replicate2I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed integer (2 byte) values in xmm -instruct Repl2I_immI0(regD dst, immI0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2I zero)); - format %{ "PXOR $dst,$dst\t! replicate2I" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2F src)); - format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_regF(regD dst, regF src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2F src)); - format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_immF0(regD dst, immF0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2F zero)); - format %{ "PXOR $dst,$dst\t! replicate2F" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - // ======================================================================= // fast clearing of an array instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ @@ -11938,7 +11699,7 @@ //----------Control Flow Instructions------------------------------------------ // Signed compare Instructions -instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{ +instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ match(Set cr (CmpI op1 op2)); effect( DEF cr, USE op1, USE op2 ); format %{ "CMP $op1,$op2" %} @@ -11947,7 +11708,7 @@ ins_pipe( ialu_cr_reg_reg ); %} -instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{ +instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ match(Set cr (CmpI op1 op2)); effect( DEF cr, USE op1 ); format %{ "CMP $op1,$op2" %} @@ -11958,7 +11719,7 @@ %} // Cisc-spilled version of cmpI_eReg -instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{ +instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ match(Set cr (CmpI op1 (LoadI op2))); format %{ "CMP $op1,$op2" %} @@ -11968,7 +11729,7 @@ ins_pipe( ialu_cr_reg_mem ); %} -instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{ +instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ match(Set cr (CmpI src zero)); effect( DEF cr, USE src ); @@ -11978,7 +11739,7 @@ ins_pipe( ialu_cr_reg_imm ); %} -instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{ +instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ match(Set cr (CmpI (AndI src con) zero)); format %{ "TEST $src,$con" %} @@ -11987,7 +11748,7 @@ ins_pipe( ialu_cr_reg_imm ); %} -instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{ +instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ match(Set cr (CmpI (AndI src mem) zero)); format %{ "TEST $src,$mem" %} @@ -11998,7 +11759,7 @@ // Unsigned compare Instructions; really, same as signed except they // produce an eFlagsRegU instead of eFlagsReg. -instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{ +instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ match(Set cr (CmpU op1 op2)); format %{ "CMPu $op1,$op2" %} @@ -12007,7 +11768,7 @@ ins_pipe( ialu_cr_reg_reg ); %} -instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{ +instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ match(Set cr (CmpU op1 op2)); format %{ "CMPu $op1,$op2" %} @@ -12017,7 +11778,7 @@ %} // // Cisc-spilled version of cmpU_eReg -instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{ +instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ match(Set cr (CmpU op1 (LoadI op2))); format %{ "CMPu $op1,$op2" %} @@ -12028,7 +11789,7 @@ %} // // Cisc-spilled version of cmpU_eReg -//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{ +//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ // match(Set cr (CmpU (LoadI op1) op2)); // // format %{ "CMPu $op1,$op2" %} @@ -12037,7 +11798,7 @@ // ins_encode( OpcP, RegMem( op1, op2) ); //%} -instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{ +instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ match(Set cr (CmpU src zero)); format %{ "TESTu $src,$src" %} @@ -12133,7 +11894,7 @@ // *** Min and Max using the conditional move are slower than the // *** branch version on a Pentium III. // // Conditional move for min -//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ +//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ // effect( USE_DEF op2, USE op1, USE cr ); // format %{ "CMOVlt $op2,$op1\t! min" %} // opcode(0x4C,0x0F); @@ -12142,7 +11903,7 @@ //%} // //// Min Register with Register (P6 version) -//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ +//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ // predicate(VM_Version::supports_cmov() ); // match(Set op2 (MinI op1 op2)); // ins_cost(200); @@ -12154,7 +11915,7 @@ //%} // Min Register with Register (generic version) -instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ +instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ match(Set dst (MinI dst src)); effect(KILL flags); ins_cost(300); @@ -12169,7 +11930,7 @@ // *** Min and Max using the conditional move are slower than the // *** branch version on a Pentium III. // // Conditional move for max -//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ +//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ // effect( USE_DEF op2, USE op1, USE cr ); // format %{ "CMOVgt $op2,$op1\t! max" %} // opcode(0x4F,0x0F); @@ -12178,7 +11939,7 @@ //%} // // // Max Register with Register (P6 version) -//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ +//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ // predicate(VM_Version::supports_cmov() ); // match(Set op2 (MaxI op1 op2)); // ins_cost(200); @@ -12190,7 +11951,7 @@ //%} // Max Register with Register (generic version) -instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ +instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ match(Set dst (MaxI dst src)); effect(KILL flags); ins_cost(300); @@ -12251,7 +12012,7 @@ // ============================================================================ // Branch Instructions // Jump Table -instruct jumpXtnd(eRegI switch_val) %{ +instruct jumpXtnd(rRegI switch_val) %{ match(Jump switch_val); ins_cost(350); format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} @@ -12669,7 +12430,7 @@ // Manifest a CmpL result in the normal flags. Only good for LT or GE // compares. Can be used for LE or GT compares by reversing arguments. // NOT GOOD FOR EQ/NE tests. -instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{ +instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ match( Set flags (CmpL src1 src2 )); effect( TEMP tmp ); ins_cost(300); @@ -12715,7 +12476,7 @@ %} // Compare 2 longs and CMOVE ints. -instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{ +instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); @@ -12725,7 +12486,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{ +instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); @@ -12786,7 +12547,7 @@ //====== // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. -instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{ +instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ match( Set flags (CmpL src zero )); effect(TEMP tmp); ins_cost(200); @@ -12843,7 +12604,7 @@ %} // Compare 2 longs and CMOVE ints. -instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{ +instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); @@ -12853,7 +12614,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{ +instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); @@ -12915,7 +12676,7 @@ //====== // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. // Same as cmpL_reg_flags_LEGT except must negate src -instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{ +instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ match( Set flags (CmpL src zero )); effect( TEMP tmp ); ins_cost(300); @@ -12929,7 +12690,7 @@ // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands // requires a commuted test to get the same result. -instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{ +instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ match( Set flags (CmpL src1 src2 )); effect( TEMP tmp ); ins_cost(300); @@ -12976,7 +12737,7 @@ %} // Compare 2 longs and CMOVE ints. -instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{ +instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); @@ -12986,7 +12747,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{ +instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); @@ -13315,11 +13076,11 @@ // ---------EXAMPLE---------------------------------------------------------- // // // pertinent parts of existing instructions in architecture description -// instruct movI(eRegI dst, eRegI src) %{ +// instruct movI(rRegI dst, rRegI src) %{ // match(Set dst (CopyI src)); // %} // -// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ +// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ // match(Set dst (AddI dst src)); // effect(KILL cr); // %} @@ -13364,11 +13125,11 @@ // %} // // Change load of spilled value to only a spill -// instruct storeI(memory mem, eRegI src) %{ +// instruct storeI(memory mem, rRegI src) %{ // match(Set mem (StoreI mem src)); // %} // -// instruct loadI(eRegI dst, memory mem) %{ +// instruct loadI(rRegI dst, memory mem) %{ // match(Set dst (LoadI mem)); // %} // --- old/src/cpu/x86/vm/x86_64.ad Sat Jun 2 20:04:04 2012 +++ new/src/cpu/x86/vm/x86_64.ad Sat Jun 2 20:04:03 2012 @@ -131,102 +131,6 @@ // Floating Point Registers -// XMM registers. 128-bit registers or 4 words each, labeled (a)-d. -// Word a in each register holds a Float, words ab hold a Double. We -// currently do not use the SIMD capabilities, so registers cd are -// unused at the moment. -// XMM8-XMM15 must be encoded with REX. -// Linux ABI: No register preserved across function calls -// XMM0-XMM7 might hold parameters -// Windows ABI: XMM6-XMM15 preserved across function calls -// XMM0-XMM3 might hold parameters - -reg_def XMM0 (SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); -reg_def XMM0_H (SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); - -reg_def XMM1 (SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); -reg_def XMM1_H (SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); - -reg_def XMM2 (SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); -reg_def XMM2_H (SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); - -reg_def XMM3 (SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); -reg_def XMM3_H (SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); - -reg_def XMM4 (SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); -reg_def XMM4_H (SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); - -reg_def XMM5 (SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); -reg_def XMM5_H (SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); - -#ifdef _WIN64 - -reg_def XMM6 (SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); -reg_def XMM6_H (SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()); - -reg_def XMM7 (SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); -reg_def XMM7_H (SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()); - -reg_def XMM8 (SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); -reg_def XMM8_H (SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()); - -reg_def XMM9 (SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); -reg_def XMM9_H (SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()); - -reg_def XMM10 (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); -reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()); - -reg_def XMM11 (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); -reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()); - -reg_def XMM12 (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); -reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()); - -reg_def XMM13 (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); -reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()); - -reg_def XMM14 (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); -reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()); - -reg_def XMM15 (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); -reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()); - -#else - -reg_def XMM6 (SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); -reg_def XMM6_H (SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); - -reg_def XMM7 (SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); -reg_def XMM7_H (SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); - -reg_def XMM8 (SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); -reg_def XMM8_H (SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()); - -reg_def XMM9 (SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); -reg_def XMM9_H (SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()); - -reg_def XMM10 (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); -reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()); - -reg_def XMM11 (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); -reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()); - -reg_def XMM12 (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); -reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()); - -reg_def XMM13 (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); -reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()); - -reg_def XMM14 (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); -reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()); - -reg_def XMM15 (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); -reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()); - -#endif // _WIN64 - -reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); - // Specify priority of register selection within phases of register // allocation. Highest priority is first. A useful heuristic is to // give registers a low priority when they are required by machine @@ -252,27 +156,7 @@ R15, R15_H, RSP, RSP_H); -// XXX probably use 8-15 first on Linux -alloc_class chunk1(XMM0, XMM0_H, - XMM1, XMM1_H, - XMM2, XMM2_H, - XMM3, XMM3_H, - XMM4, XMM4_H, - XMM5, XMM5_H, - XMM6, XMM6_H, - XMM7, XMM7_H, - XMM8, XMM8_H, - XMM9, XMM9_H, - XMM10, XMM10_H, - XMM11, XMM11_H, - XMM12, XMM12_H, - XMM13, XMM13_H, - XMM14, XMM14_H, - XMM15, XMM15_H); -alloc_class chunk2(RFLAGS); - - //----------Architecture Description Register Classes-------------------------- // Several register classes are automatically defined based upon information in // this architecture description. @@ -501,47 +385,8 @@ // Singleton class for instruction pointer // reg_class ip_reg(RIP); -// Singleton class for condition codes -reg_class int_flags(RFLAGS); - -// Class for all float registers -reg_class float_reg(XMM0, - XMM1, - XMM2, - XMM3, - XMM4, - XMM5, - XMM6, - XMM7, - XMM8, - XMM9, - XMM10, - XMM11, - XMM12, - XMM13, - XMM14, - XMM15); - -// Class for all double registers -reg_class double_reg(XMM0, XMM0_H, - XMM1, XMM1_H, - XMM2, XMM2_H, - XMM3, XMM3_H, - XMM4, XMM4_H, - XMM5, XMM5_H, - XMM6, XMM6_H, - XMM7, XMM7_H, - XMM8, XMM8_H, - XMM9, XMM9_H, - XMM10, XMM10_H, - XMM11, XMM11_H, - XMM12, XMM12_H, - XMM13, XMM13_H, - XMM14, XMM14_H, - XMM15, XMM15_H); %} - //----------SOURCE BLOCK------------------------------------------------------- // This is a block of C++ code which provides values, functions, and // definitions necessary in the rest of the architecture description @@ -1027,12 +872,84 @@ return rc_float; } +// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st); + +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st); + +static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, + int dst_offset, uint ireg, outputStream* st) { + if (cbuf) { + MacroAssembler _masm(cbuf); + switch (ireg) { + case Op_VecS: + __ movq(Address(rsp, -8), rax); + __ movl(rax, Address(rsp, src_offset)); + __ movl(Address(rsp, dst_offset), rax); + __ movq(rax, Address(rsp, -8)); + break; + case Op_VecD: + __ pushq(Address(rsp, src_offset)); + __ popq (Address(rsp, dst_offset)); + break; + case Op_VecX: + __ pushq(Address(rsp, src_offset)); + __ popq (Address(rsp, dst_offset)); + __ pushq(Address(rsp, src_offset+8)); + __ popq (Address(rsp, dst_offset+8)); + break; + case Op_VecY: + __ vmovdqu(Address(rsp, -32), xmm0); + __ vmovdqu(xmm0, Address(rsp, src_offset)); + __ vmovdqu(Address(rsp, dst_offset), xmm0); + __ vmovdqu(xmm0, Address(rsp, -32)); + break; + default: + ShouldNotReachHere(); + } +#ifndef PRODUCT + } else { + switch (ireg) { + case Op_VecS: + st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t" + "movl rax, [rsp + #%d]\n\t" + "movl [rsp + #%d], rax\n\t" + "movq rax, [rsp - #8]", + src_offset, dst_offset); + break; + case Op_VecD: + st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t" + "popq [rsp + #%d]", + src_offset, dst_offset); + break; + case Op_VecX: + st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t" + "popq [rsp + #%d]\n\t" + "pushq [rsp + #%d]\n\t" + "popq [rsp + #%d]", + src_offset, dst_offset, src_offset+8, dst_offset+8); + break; + case Op_VecY: + st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" + "vmovdqu xmm0, [rsp + #%d]\n\t" + "vmovdqu [rsp + #%d], xmm0\n\t" + "vmovdqu xmm0, [rsp - #32]", + src_offset, dst_offset); + break; + default: + ShouldNotReachHere(); + } +#endif + } +} + uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, PhaseRegAlloc* ra_, bool do_size, - outputStream* st) const -{ - + outputStream* st) const { + assert(cbuf != NULL || st != NULL, "sanity"); // Get registers to move OptoReg::Name src_second = ra_->get_reg_second(in(1)); OptoReg::Name src_first = ra_->get_reg_first(in(1)); @@ -1050,7 +967,30 @@ if (src_first == dst_first && src_second == dst_second) { // Self copy, no move return 0; - } else if (src_first_rc == rc_stack) { + } + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); + assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity"); + if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { + // mem -> mem + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) { + vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) { + int stack_offset = ra_->reg2offset(dst_first); + vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st); + } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) { + int stack_offset = ra_->reg2offset(src_first); + vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st); + } else { + ShouldNotReachHere(); + } + return 0; + } + if (src_first_rc == rc_stack) { // mem -> if (dst_first_rc == rc_stack) { // mem -> mem @@ -1061,23 +1001,16 @@ int src_offset = ra_->reg2offset(src_first); int dst_offset = ra_->reg2offset(dst_first); if (cbuf) { - emit_opcode(*cbuf, 0xFF); - encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false); - - emit_opcode(*cbuf, 0x8F); - encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false); - + MacroAssembler _masm(cbuf); + __ pushq(Address(rsp, src_offset)); + __ popq (Address(rsp, dst_offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t" - "popq [rsp + #%d]", - src_offset, - dst_offset); + "popq [rsp + #%d]", + src_offset, dst_offset); #endif } - return - 3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + - 3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)); } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1086,46 +1019,22 @@ int src_offset = ra_->reg2offset(src_first); int dst_offset = ra_->reg2offset(dst_first); if (cbuf) { - emit_opcode(*cbuf, Assembler::REX_W); - emit_opcode(*cbuf, 0x89); - emit_opcode(*cbuf, 0x44); - emit_opcode(*cbuf, 0x24); - emit_opcode(*cbuf, 0xF8); - - emit_opcode(*cbuf, 0x8B); - encode_RegMem(*cbuf, - RAX_enc, - RSP_enc, 0x4, 0, src_offset, - false); - - emit_opcode(*cbuf, 0x89); - encode_RegMem(*cbuf, - RAX_enc, - RSP_enc, 0x4, 0, dst_offset, - false); - - emit_opcode(*cbuf, Assembler::REX_W); - emit_opcode(*cbuf, 0x8B); - emit_opcode(*cbuf, 0x44); - emit_opcode(*cbuf, 0x24); - emit_opcode(*cbuf, 0xF8); - + MacroAssembler _masm(cbuf); + __ movq(Address(rsp, -8), rax); + __ movl(rax, Address(rsp, src_offset)); + __ movl(Address(rsp, dst_offset), rax); + __ movq(rax, Address(rsp, -8)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t" - "movl rax, [rsp + #%d]\n\t" - "movl [rsp + #%d], rax\n\t" - "movq rax, [rsp - #8]", - src_offset, - dst_offset); + "movl rax, [rsp + #%d]\n\t" + "movl [rsp + #%d], rax\n\t" + "movq rax, [rsp - #8]", + src_offset, dst_offset); #endif } - return - 5 + // movq - 3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl - 3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl - 5; // movq } + return 0; } else if (dst_first_rc == rc_int) { // mem -> gpr if ((src_first & 1) == 0 && src_first + 1 == src_second && @@ -1133,25 +1042,15 @@ // 64-bit int offset = ra_->reg2offset(src_first); if (cbuf) { - if (Matcher::_regEncode[dst_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_W); - } else { - emit_opcode(*cbuf, Assembler::REX_WR); - } - emit_opcode(*cbuf, 0x8B); - encode_RegMem(*cbuf, - Matcher::_regEncode[dst_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movq %s, [rsp + #%d]\t# spill", Matcher::regName[dst_first], offset); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1158,27 +1057,17 @@ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); int offset = ra_->reg2offset(src_first); if (cbuf) { - if (Matcher::_regEncode[dst_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } - emit_opcode(*cbuf, 0x8B); - encode_RegMem(*cbuf, - Matcher::_regEncode[dst_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movl %s, [rsp + #%d]\t# spill", Matcher::regName[dst_first], offset); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[dst_first] < 8) - ? 3 - : 4); // REX } + return 0; } else if (dst_first_rc == rc_float) { // mem-> xmm if ((src_first & 1) == 0 && src_first + 1 == src_second && @@ -1189,7 +1078,7 @@ MacroAssembler _masm(cbuf); __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("%s %s, [rsp + #%d]\t# spill", UseXmmLoadAndClearUpper ? "movsd " : "movlpd", Matcher::regName[dst_first], @@ -1196,11 +1085,6 @@ offset); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[dst_first] >= 8) - ? 6 - : (5 + ((UseAVX>0)?1:0))); // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1210,18 +1094,14 @@ MacroAssembler _masm(cbuf); __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movss %s, [rsp + #%d]\t# spill", Matcher::regName[dst_first], offset); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[dst_first] >= 8) - ? 6 - : (5 + ((UseAVX>0)?1:0))); // REX } + return 0; } } else if (src_first_rc == rc_int) { // gpr -> @@ -1232,24 +1112,15 @@ // 64-bit int offset = ra_->reg2offset(dst_first); if (cbuf) { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_W); - } else { - emit_opcode(*cbuf, Assembler::REX_WR); - } - emit_opcode(*cbuf, 0x89); - encode_RegMem(*cbuf, - Matcher::_regEncode[src_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movq [rsp + #%d], %s\t# spill", offset, Matcher::regName[src_first]); #endif } - return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1256,27 +1127,17 @@ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); int offset = ra_->reg2offset(dst_first); if (cbuf) { - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } - emit_opcode(*cbuf, 0x89); - encode_RegMem(*cbuf, - Matcher::_regEncode[src_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movl [rsp + #%d], %s\t# spill", offset, Matcher::regName[src_first]); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[src_first] < 8) - ? 3 - : 4); // REX } + return 0; } else if (dst_first_rc == rc_int) { // gpr -> gpr if ((src_first & 1) == 0 && src_first + 1 == src_second && @@ -1283,62 +1144,33 @@ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { // 64-bit if (cbuf) { - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_W); - } else { - emit_opcode(*cbuf, Assembler::REX_WB); - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_WR); - } else { - emit_opcode(*cbuf, Assembler::REX_WRB); - } - } - emit_opcode(*cbuf, 0x8B); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + MacroAssembler _masm(cbuf); + __ movq(as_Register(Matcher::_regEncode[dst_first]), + as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movq %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return 3; // REX + return 0; } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); if (cbuf) { - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_B); - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } else { - emit_opcode(*cbuf, Assembler::REX_RB); - } - } - emit_opcode(*cbuf, 0x8B); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + MacroAssembler _masm(cbuf); + __ movl(as_Register(Matcher::_regEncode[dst_first]), + as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movl %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) - ? 2 - : 3; // REX + return 0; } } else if (dst_first_rc == rc_float) { // gpr -> xmm @@ -1349,13 +1181,12 @@ MacroAssembler _masm(cbuf); __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movdq %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return 5; // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1364,17 +1195,14 @@ MacroAssembler _masm(cbuf); __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return - (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) - ? 5 - : (4 + ((UseAVX>0)?1:0)); // REX } + return 0; } } else if (src_first_rc == rc_float) { // xmm -> @@ -1388,17 +1216,12 @@ MacroAssembler _masm(cbuf); __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movsd [rsp + #%d], %s\t# spill", offset, Matcher::regName[src_first]); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[src_first] >= 8) - ? 6 - : (5 + ((UseAVX>0)?1:0))); // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1408,18 +1231,14 @@ MacroAssembler _masm(cbuf); __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movss [rsp + #%d], %s\t# spill", offset, Matcher::regName[src_first]); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[src_first] >=8) - ? 6 - : (5 + ((UseAVX>0)?1:0))); // REX } + return 0; } else if (dst_first_rc == rc_int) { // xmm -> gpr if ((src_first & 1) == 0 && src_first + 1 == src_second && @@ -1429,13 +1248,12 @@ MacroAssembler _masm(cbuf); __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movdq %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return 5; // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1444,17 +1262,14 @@ MacroAssembler _masm(cbuf); __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return - (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) - ? 5 - : (4 + ((UseAVX>0)?1:0)); // REX } + return 0; } else if (dst_first_rc == rc_float) { // xmm -> xmm if ((src_first & 1) == 0 && src_first + 1 == src_second && @@ -1464,7 +1279,7 @@ MacroAssembler _masm(cbuf); __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("%s %s, %s\t# spill", UseXmmRegToRegMoveAll ? "movapd" : "movsd ", Matcher::regName[dst_first], @@ -1471,10 +1286,6 @@ Matcher::regName[src_first]); #endif } - return - (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) - ? 5 - : (4 + ((UseAVX>0)?1:0)); // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1483,7 +1294,7 @@ MacroAssembler _masm(cbuf); __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("%s %s, %s\t# spill", UseXmmRegToRegMoveAll ? "movaps" : "movss ", Matcher::regName[dst_first], @@ -1490,35 +1301,28 @@ Matcher::regName[src_first]); #endif } - return ((UseAVX>0) ? 5: - ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) - ? (UseXmmRegToRegMoveAll ? 4 : 5) - : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX } + return 0; } } assert(0," foo "); Unimplemented(); - return 0; } #ifndef PRODUCT -void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const -{ +void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { implementation(NULL, ra_, false, st); } #endif -void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const -{ +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { implementation(&cbuf, ra_, false, NULL); } -uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const -{ - return implementation(NULL, ra_, true, NULL); +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); } //============================================================================= @@ -1735,16 +1539,6 @@ return true; } -// Vector width in bytes -const uint Matcher::vector_width_in_bytes(void) { - return 8; -} - -// Vector ideal reg -const uint Matcher::vector_ideal_reg(void) { - return Op_RegD; -} - // Is this branch offset short enough that a short branch can be used? // // NOTE: If the platform does not provide any short branch variants, then @@ -1831,21 +1625,21 @@ bool Matcher::can_be_java_arg(int reg) { return - reg == RDI_num || reg == RDI_H_num || - reg == RSI_num || reg == RSI_H_num || - reg == RDX_num || reg == RDX_H_num || - reg == RCX_num || reg == RCX_H_num || - reg == R8_num || reg == R8_H_num || - reg == R9_num || reg == R9_H_num || - reg == R12_num || reg == R12_H_num || - reg == XMM0_num || reg == XMM0_H_num || - reg == XMM1_num || reg == XMM1_H_num || - reg == XMM2_num || reg == XMM2_H_num || - reg == XMM3_num || reg == XMM3_H_num || - reg == XMM4_num || reg == XMM4_H_num || - reg == XMM5_num || reg == XMM5_H_num || - reg == XMM6_num || reg == XMM6_H_num || - reg == XMM7_num || reg == XMM7_H_num; + reg == RDI_num || reg == RDI_H_num || + reg == RSI_num || reg == RSI_H_num || + reg == RDX_num || reg == RDX_H_num || + reg == RCX_num || reg == RCX_H_num || + reg == R8_num || reg == R8_H_num || + reg == R9_num || reg == R9_H_num || + reg == R12_num || reg == R12_H_num || + reg == XMM0_num || reg == XMM0b_num || + reg == XMM1_num || reg == XMM1b_num || + reg == XMM2_num || reg == XMM2b_num || + reg == XMM3_num || reg == XMM3b_num || + reg == XMM4_num || reg == XMM4b_num || + reg == XMM5_num || reg == XMM5b_num || + reg == XMM6_num || reg == XMM6b_num || + reg == XMM7_num || reg == XMM7b_num; } bool Matcher::is_spillable_arg(int reg) @@ -3220,10 +3014,11 @@ OptoReg::Bad, // Op_RegI RAX_H_num, // Op_RegP OptoReg::Bad, // Op_RegF - XMM0_H_num, // Op_RegD + XMM0b_num, // Op_RegD RAX_H_num // Op_RegL }; - assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type"); + // Excluded flags and vector registers. + assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type"); return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); %} %} @@ -3985,7 +3780,6 @@ interface(REG_INTER); %} - //----------Memory Operands---------------------------------------------------- // Direct Memory Operand // operand direct(immP addr) @@ -5416,61 +5210,6 @@ ins_pipe(pipe_slow); // XXX %} -// Load Aligned Packed Byte to XMM register -instruct loadA8B(regD dst, memory mem) %{ - match(Set dst (Load8B mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed8B" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Short to XMM register -instruct loadA4S(regD dst, memory mem) %{ - match(Set dst (Load4S mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed4S" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Char to XMM register -instruct loadA4C(regD dst, memory mem) %{ - match(Set dst (Load4C mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed4C" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Integer to XMM register -instruct load2IU(regD dst, memory mem) %{ - match(Set dst (Load2I mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed2I" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Single to XMM -instruct loadA2F(regD dst, memory mem) %{ - match(Set dst (Load2F mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed2F" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - // Load Effective Address instruct leaP8(rRegP dst, indOffset8 mem) %{ @@ -6200,39 +5939,6 @@ ins_pipe(ialu_mem_imm); %} -// Store Aligned Packed Byte XMM register to memory -instruct storeA8B(memory mem, regD src) %{ - match(Set mem (Store8B mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed8B" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Aligned Packed Char/Short XMM register to memory -instruct storeA4C(memory mem, regD src) %{ - match(Set mem (Store4C mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed4C" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Aligned Packed Integer XMM register to memory -instruct storeA2I(memory mem, regD src) %{ - match(Set mem (Store2I mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed2I" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Store CMS card-mark Immediate instruct storeImmCM0_reg(memory mem, immI0 zero) %{ @@ -6258,17 +5964,6 @@ ins_pipe(ialu_mem_imm); %} -// Store Aligned Packed Single Float XMM register to memory -instruct storeA2F(memory mem, regD src) %{ - match(Set mem (Store2F mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed2F" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Store Float instruct storeF(memory mem, regF src) %{ @@ -10384,172 +10079,6 @@ ins_pipe( pipe_slow ); %} -// Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_reg(regD dst, regD src) %{ - match(Set dst (Replicate8B src)); - format %{ "MOVDQA $dst,$src\n\t" - "PUNPCKLBW $dst,$dst\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode %{ - if ($dst$$reg != $src$$reg) { - __ movdqa($dst$$XMMRegister, $src$$XMMRegister); - } - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} - -// Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_rRegI(regD dst, rRegI src) %{ - match(Set dst (Replicate8B src)); - format %{ "MOVD $dst,$src\n\t" - "PUNPCKLBW $dst,$dst\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} - -// Replicate scalar zero to packed byte (1 byte) values in xmm -instruct Repl8B_immI0(regD dst, immI0 zero) %{ - match(Set dst (Replicate8B zero)); - format %{ "PXOR $dst,$dst\t! replicate8B" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_reg(regD dst, regD src) %{ - match(Set dst (Replicate4S src)); - format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} - ins_encode %{ - __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_rRegI(regD dst, rRegI src) %{ - match(Set dst (Replicate4S src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed short (2 byte) values in xmm -instruct Repl4S_immI0(regD dst, immI0 zero) %{ - match(Set dst (Replicate4S zero)); - format %{ "PXOR $dst,$dst\t! replicate4S" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_reg(regD dst, regD src) %{ - match(Set dst (Replicate4C src)); - format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} - ins_encode %{ - __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_rRegI(regD dst, rRegI src) %{ - match(Set dst (Replicate4C src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed char (2 byte) values in xmm -instruct Repl4C_immI0(regD dst, immI0 zero) %{ - match(Set dst (Replicate4C zero)); - format %{ "PXOR $dst,$dst\t! replicate4C" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_reg(regD dst, regD src) %{ - match(Set dst (Replicate2I src)); - format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_rRegI(regD dst, rRegI src) %{ - match(Set dst (Replicate2I src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFD $dst,$dst,0x00\t! replicate2I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed integer (2 byte) values in xmm -instruct Repl2I_immI0(regD dst, immI0 zero) %{ - match(Set dst (Replicate2I zero)); - format %{ "PXOR $dst,$dst\t! replicate2I" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_reg(regD dst, regD src) %{ - match(Set dst (Replicate2F src)); - format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_regF(regD dst, regF src) %{ - match(Set dst (Replicate2F src)); - format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_immF0(regD dst, immF0 zero) %{ - match(Set dst (Replicate2F zero)); - format %{ "PXOR $dst,$dst\t! replicate2F" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - // ======================================================================= // fast clearing of an array --- old/src/share/vm/adlc/adlparse.cpp Sat Jun 2 20:04:05 2012 +++ new/src/share/vm/adlc/adlparse.cpp Sat Jun 2 20:04:05 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -115,6 +115,12 @@ parse_err(SYNERR, "expected one of - instruct, operand, ins_attrib, op_attrib, source, register, pipeline, encode\n Found %s",ident); } } + // Add reg_class spill_regs after parsing. + RegisterForm *regBlock = _AD.get_registers(); + if (regBlock == NULL) { + parse_err(SEMERR, "Did not declare 'register' definitions"); + } + regBlock->addSpillRegClass(); // Done with parsing, check consistency. @@ -768,12 +774,13 @@ //------------------------------reg_parse-------------------------------------- void ADLParser::reg_parse(void) { + RegisterForm *regBlock = _AD.get_registers(); // Information about registers encoding + if (regBlock == NULL) { + // Create the RegisterForm for the architecture description. + regBlock = new RegisterForm(); // Build new Source object + _AD.addForm(regBlock); + } - // Create the RegisterForm for the architecture description. - RegisterForm *regBlock = new RegisterForm(); // Build new Source object - regBlock->_linenum = linenum(); - _AD.addForm(regBlock); - skipws(); // Skip leading whitespace if (_curchar == '%' && *(_ptr+1) == '{') { next_char(); next_char(); // Skip "%{" @@ -796,15 +803,11 @@ parse_err(SYNERR, "Missing %c{ ... %c} block after register keyword.\n",'%','%'); return; } - - // Add reg_class spill_regs - regBlock->addSpillRegClass(); } //------------------------------encode_parse----------------------------------- void ADLParser::encode_parse(void) { EncodeForm *encBlock; // Information about instruction/operand encoding - char *desc = NULL; // String representation of encode rule _AD.getForm(&encBlock); if ( encBlock == NULL) { --- old/src/share/vm/adlc/archDesc.cpp Sat Jun 2 20:04:06 2012 +++ new/src/share/vm/adlc/archDesc.cpp Sat Jun 2 20:04:05 2012 @@ -1,5 +1,5 @@ // -// Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -911,12 +911,24 @@ // Find last character in idealOp, it specifies the type char last_char = 0; const char *ptr = idealOp; - for( ; *ptr != '\0'; ++ptr) { + for (; *ptr != '\0'; ++ptr) { last_char = *ptr; } + // Match Vector types. + if (strncmp(idealOp, "Vec",3)==0) { + switch(last_char) { + case 'S': return "TypeVect::VECTS"; + case 'D': return "TypeVect::VECTD"; + case 'X': return "TypeVect::VECTX"; + case 'Y': return "TypeVect::VECTY"; + default: + internal_err("Vector type %s with unrecognized type\n",idealOp); + } + } + // !!!!! - switch( last_char ) { + switch(last_char) { case 'I': return "TypeInt::INT"; case 'P': return "TypePtr::BOTTOM"; case 'N': return "TypeNarrowOop::BOTTOM"; --- old/src/share/vm/adlc/forms.cpp Sat Jun 2 20:04:06 2012 +++ new/src/share/vm/adlc/forms.cpp Sat Jun 2 20:04:06 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -266,21 +266,7 @@ if( strcmp(opType,"LoadN")==0 ) return Form::idealN; if( strcmp(opType,"LoadRange")==0 ) return Form::idealI; if( strcmp(opType,"LoadS")==0 ) return Form::idealS; - if( strcmp(opType,"Load16B")==0 ) return Form::idealB; - if( strcmp(opType,"Load8B")==0 ) return Form::idealB; - if( strcmp(opType,"Load4B")==0 ) return Form::idealB; - if( strcmp(opType,"Load8C")==0 ) return Form::idealC; - if( strcmp(opType,"Load4C")==0 ) return Form::idealC; - if( strcmp(opType,"Load2C")==0 ) return Form::idealC; - if( strcmp(opType,"Load8S")==0 ) return Form::idealS; - if( strcmp(opType,"Load4S")==0 ) return Form::idealS; - if( strcmp(opType,"Load2S")==0 ) return Form::idealS; - if( strcmp(opType,"Load2D")==0 ) return Form::idealD; - if( strcmp(opType,"Load4F")==0 ) return Form::idealF; - if( strcmp(opType,"Load2F")==0 ) return Form::idealF; - if( strcmp(opType,"Load4I")==0 ) return Form::idealI; - if( strcmp(opType,"Load2I")==0 ) return Form::idealI; - if( strcmp(opType,"Load2L")==0 ) return Form::idealL; + if( strcmp(opType,"LoadVector")==0 ) return Form::idealV; assert( strcmp(opType,"Load") != 0, "Must type Loads" ); return Form::none; } @@ -287,7 +273,7 @@ Form::DataType Form::is_store_to_memory(const char *opType) const { if( strcmp(opType,"StoreB")==0) return Form::idealB; - if( strcmp(opType,"StoreCM")==0) return Form::idealB; + if( strcmp(opType,"StoreCM")==0) return Form::idealB; if( strcmp(opType,"StoreC")==0) return Form::idealC; if( strcmp(opType,"StoreD")==0) return Form::idealD; if( strcmp(opType,"StoreF")==0) return Form::idealF; @@ -294,19 +280,8 @@ if( strcmp(opType,"StoreI")==0) return Form::idealI; if( strcmp(opType,"StoreL")==0) return Form::idealL; if( strcmp(opType,"StoreP")==0) return Form::idealP; - if( strcmp(opType,"StoreN")==0) return Form::idealN; - if( strcmp(opType,"Store16B")==0) return Form::idealB; - if( strcmp(opType,"Store8B")==0) return Form::idealB; - if( strcmp(opType,"Store4B")==0) return Form::idealB; - if( strcmp(opType,"Store8C")==0) return Form::idealC; - if( strcmp(opType,"Store4C")==0) return Form::idealC; - if( strcmp(opType,"Store2C")==0) return Form::idealC; - if( strcmp(opType,"Store2D")==0) return Form::idealD; - if( strcmp(opType,"Store4F")==0) return Form::idealF; - if( strcmp(opType,"Store2F")==0) return Form::idealF; - if( strcmp(opType,"Store4I")==0) return Form::idealI; - if( strcmp(opType,"Store2I")==0) return Form::idealI; - if( strcmp(opType,"Store2L")==0) return Form::idealL; + if( strcmp(opType,"StoreN")==0) return Form::idealN; + if( strcmp(opType,"StoreVector")==0 ) return Form::idealV; assert( strcmp(opType,"Store") != 0, "Must type Stores" ); return Form::none; } --- old/src/share/vm/adlc/forms.hpp Sat Jun 2 20:04:06 2012 +++ new/src/share/vm/adlc/forms.hpp Sat Jun 2 20:04:06 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -172,7 +172,8 @@ idealB = 6, // Byte type idealC = 7, // Char type idealS = 8, // String type - idealN = 9 // Narrow oop types + idealN = 9, // Narrow oop types + idealV = 10 // Vector type }; // Convert ideal name to a DataType, return DataType::none if not a 'ConX' Form::DataType ideal_to_const_type(const char *ideal_type_name) const; --- old/src/share/vm/adlc/formsopt.cpp Sat Jun 2 20:04:07 2012 +++ new/src/share/vm/adlc/formsopt.cpp Sat Jun 2 20:04:07 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -66,7 +66,7 @@ // for spill-slots/regs. void RegisterForm::addSpillRegClass() { // Stack slots start at the next available even register number. - _reg_ctr = (_reg_ctr+1) & ~1; + _reg_ctr = (_reg_ctr+7) & ~7; const char *rc_name = "stack_slots"; RegClass *reg_class = new RegClass(rc_name); reg_class->_stack_or_reg = true; @@ -150,9 +150,14 @@ int RegisterForm::RegMask_Size() { // Need at least this many words int words_for_regs = (_reg_ctr + 31)>>5; - // Add a few for incoming & outgoing arguments to calls. + // The array of Register Mask bits should be large enough to cover + // all the machine registers and all parameters that need to be passed + // on the stack (stack registers) up to some interesting limit. Methods + // that need more parameters will NOT be compiled. On Intel, the limit + // is something like 90+ parameters. + // Add a few (3 words == 96 bits) for incoming & outgoing arguments to calls. // Round up to the next doubleword size. - return (words_for_regs + 2 + 1) & ~1; + return (words_for_regs + 3 + 1) & ~1; } void RegisterForm::dump() { // Debug printer --- old/src/share/vm/adlc/formssel.cpp Sat Jun 2 20:04:07 2012 +++ new/src/share/vm/adlc/formssel.cpp Sat Jun 2 20:04:07 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -432,6 +432,14 @@ return _matrule->is_ideal_store(); } +// Return 'true' if this instruction matches an ideal vector node +bool InstructForm::is_vector() const { + if( _matrule == NULL ) return false; + + return _matrule->is_vector(); +} + + // Return the input register that must match the output register // If this is not required, return 0 uint InstructForm::two_address(FormDict &globals) { @@ -751,6 +759,9 @@ if (needs_base_oop_edge(globals)) return true; + if (is_vector()) return true; + if (is_mach_constant()) return true; + return false; } @@ -3381,11 +3392,8 @@ "StoreI","StoreL","StoreP","StoreN","StoreD","StoreF" , "StoreB","StoreC","Store" ,"StoreFP", "LoadI", "LoadUI2L", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" , - "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" , - "Store4I","Store2I","Store2L","Store2D","Store4F","Store2F","Store16B", - "Store8B","Store4B","Store8C","Store4C","Store2C", - "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" , - "Load8B" ,"Load4B" ,"Load8C" ,"Load4C" ,"Load2C" ,"Load8S", "Load4S","Load2S", + "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" , + "StoreVector", "LoadVector", "LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned", "LoadPLocked", "LoadLLocked", "StorePConditional", "StoreIConditional", "StoreLConditional", @@ -3822,6 +3830,10 @@ strcmp(opType,"RegL")==0 || strcmp(opType,"RegF")==0 || strcmp(opType,"RegD")==0 || + strcmp(opType,"VecS")==0 || + strcmp(opType,"VecD")==0 || + strcmp(opType,"VecX")==0 || + strcmp(opType,"VecY")==0 || strcmp(opType,"Reg" )==0) ) { return 1; } @@ -3938,19 +3950,13 @@ strcmp(opType,"ReverseBytesL")==0 || strcmp(opType,"ReverseBytesUS")==0 || strcmp(opType,"ReverseBytesS")==0 || - strcmp(opType,"Replicate16B")==0 || - strcmp(opType,"Replicate8B")==0 || - strcmp(opType,"Replicate4B")==0 || - strcmp(opType,"Replicate8C")==0 || - strcmp(opType,"Replicate4C")==0 || - strcmp(opType,"Replicate8S")==0 || - strcmp(opType,"Replicate4S")==0 || - strcmp(opType,"Replicate4I")==0 || - strcmp(opType,"Replicate2I")==0 || - strcmp(opType,"Replicate2L")==0 || - strcmp(opType,"Replicate4F")==0 || - strcmp(opType,"Replicate2F")==0 || - strcmp(opType,"Replicate2D")==0 || + strcmp(opType,"ReplicateB")==0 || + strcmp(opType,"ReplicateC")==0 || + strcmp(opType,"ReplicateS")==0 || + strcmp(opType,"ReplicateI")==0 || + strcmp(opType,"ReplicateL")==0 || + strcmp(opType,"ReplicateF")==0 || + strcmp(opType,"ReplicateD")==0 || 0 /* 0 to line up columns nicely */ ) return 1; } @@ -4034,6 +4040,23 @@ return ideal_load; } +bool MatchRule::is_vector() const { + if( _rChild ) { + const char *opType = _rChild->_opType; + if( strcmp(opType,"ReplicateB")==0 || + strcmp(opType,"ReplicateC")==0 || + strcmp(opType,"ReplicateS")==0 || + strcmp(opType,"ReplicateI")==0 || + strcmp(opType,"ReplicateL")==0 || + strcmp(opType,"ReplicateF")==0 || + strcmp(opType,"LoadVector")==0 || + strcmp(opType,"StoreVector")==0 || + 0 /* 0 to line up columns nicely */ ) + return true; + } + return false; +} + bool MatchRule::skip_antidep_check() const { // Some loads operate on what is effectively immutable memory so we --- old/src/share/vm/adlc/formssel.hpp Sat Jun 2 20:04:08 2012 +++ new/src/share/vm/adlc/formssel.hpp Sat Jun 2 20:04:08 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -160,6 +160,7 @@ virtual bool is_ideal_safepoint() const; // node matches 'SafePoint' virtual bool is_ideal_nop() const; // node matches 'Nop' virtual bool is_ideal_control() const; // control node + virtual bool is_vector() const; // vector instruction virtual Form::CallType is_ideal_call() const; // matches ideal 'Call' virtual Form::DataType is_ideal_load() const; // node matches ideal 'LoadXNode' @@ -1011,6 +1012,7 @@ bool is_ideal_goto() const; // node matches ideal 'Goto' bool is_ideal_loopEnd() const; // node matches ideal 'LoopEnd' bool is_ideal_bool() const; // node matches ideal 'Bool' + bool is_vector() const; // vector instruction Form::DataType is_ideal_load() const;// node matches ideal 'LoadXNode' // Should antidep checks be disabled for this rule // See definition of MatchRule::skip_antidep_check --- old/src/share/vm/adlc/main.cpp Sat Jun 2 20:04:08 2012 +++ new/src/share/vm/adlc/main.cpp Sat Jun 2 20:04:08 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -250,6 +250,7 @@ AD.addInclude(AD._HPP_file, "opto/node.hpp"); AD.addInclude(AD._HPP_file, "opto/regalloc.hpp"); AD.addInclude(AD._HPP_file, "opto/subnode.hpp"); + AD.addInclude(AD._HPP_file, "opto/vectornode.hpp"); AD.addInclude(AD._CPP_CLONE_file, "precompiled.hpp"); AD.addInclude(AD._CPP_CLONE_file, "adfiles", get_basename(AD._HPP_file._name)); AD.addInclude(AD._CPP_EXPAND_file, "precompiled.hpp"); --- old/src/share/vm/code/vmreg.cpp Sat Jun 2 20:04:09 2012 +++ new/src/share/vm/code/vmreg.cpp Sat Jun 2 20:04:09 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,7 +27,7 @@ #include "code/vmreg.hpp" // First VMReg value that could refer to a stack slot -VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 1) & ~1); +VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 7) & ~7); // VMRegs are 4 bytes wide on all platforms const int VMRegImpl::stack_slot_size = 4; --- old/src/share/vm/opto/c2_globals.hpp Sat Jun 2 20:04:09 2012 +++ new/src/share/vm/opto/c2_globals.hpp Sat Jun 2 20:04:09 2012 @@ -81,6 +81,13 @@ product(intx, MaxLoopPad, (OptoLoopAlignment-1), \ "Align a loop if padding size in bytes is less or equal to this value") \ \ + product(intx, MaxVectorSize, 32, \ + "Max vector size in bytes, " \ + "actual size could be less depending on elements type") \ + \ + product(bool, AlignVector, false, \ + "Perform vector store/load alignment in loop") \ + \ product(intx, NumberOfLoopInstrToAlign, 4, \ "Number of first instructions in a loop to align") \ \ --- old/src/share/vm/opto/chaitin.cpp Sat Jun 2 20:04:10 2012 +++ new/src/share/vm/opto/chaitin.cpp Sat Jun 2 20:04:10 2012 @@ -75,6 +75,7 @@ // Flags if( _is_oop ) tty->print("Oop "); if( _is_float ) tty->print("Float "); + if( _is_vector ) tty->print("Vector "); if( _was_spilled1 ) tty->print("Spilled "); if( _was_spilled2 ) tty->print("Spilled2 "); if( _direct_conflict ) tty->print("Direct_conflict "); @@ -479,16 +480,18 @@ // Move important info out of the live_arena to longer lasting storage. alloc_node_regs(_names.Size()); - for( uint i=0; i < _names.Size(); i++ ) { - if( _names[i] ) { // Live range associated with Node? - LRG &lrg = lrgs( _names[i] ); - if( lrg.num_regs() == 1 ) { - _node_regs[i].set1( lrg.reg() ); + for (uint i=0; i < _names.Size(); i++) { + if (_names[i]) { // Live range associated with Node? + LRG &lrg = lrgs(_names[i]); + if (!lrg.alive()) { + _node_regs[i].set_bad(); + } else if (lrg.num_regs() == 1) { + _node_regs[i].set1(lrg.reg()); } else { // Must be a register-pair - if( !lrg._fat_proj ) { // Must be aligned adjacent register pair + if (!lrg._fat_proj) { // Must be aligned adjacent register pair // Live ranges record the highest register in their mask. // We want the low register for the AD file writer's convenience. - _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) ); + _node_regs[i].set2( OptoReg::add(lrg.reg(),(1-lrg.num_regs())) ); } else { // Misaligned; extract 2 bits OptoReg::Name hi = lrg.reg(); // Get hi register lrg.Remove(hi); // Yank from mask @@ -568,7 +571,7 @@ // Check for float-vs-int live range (used in register-pressure // calculations) const Type *n_type = n->bottom_type(); - if( n_type->is_floatingpoint() ) + if (n_type->is_floatingpoint()) lrg._is_float = 1; // Check for twice prior spilling. Once prior spilling might have @@ -599,18 +602,28 @@ // Limit result register mask to acceptable registers const RegMask &rm = n->out_RegMask(); lrg.AND( rm ); + + int ireg = n->ideal_reg(); + assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP, + "oops must be in Op_RegP's" ); + + // Check for vector live range (only if vector register is used). + // On SPARC vector uses RegD which could be misaligned so it is not + // processes as vector in RA. + if (RegMask::is_vector(ireg)) + lrg._is_vector = 1; + assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD, + "vector must be in vector registers"); + // Check for bound register masks const RegMask &lrgmask = lrg.mask(); - if( lrgmask.is_bound1() || lrgmask.is_bound2() ) + if (lrgmask.is_bound(ireg)) lrg._is_bound = 1; // Check for maximum frequency value - if( lrg._maxfreq < b->_freq ) + if (lrg._maxfreq < b->_freq) lrg._maxfreq = b->_freq; - int ireg = n->ideal_reg(); - assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP, - "oops must be in Op_RegP's" ); // Check for oop-iness, or long/double // Check for multi-kill projection switch( ireg ) { @@ -689,7 +702,7 @@ // AND changes how we count interferences. A mis-aligned // double can interfere with TWO aligned pairs, or effectively // FOUR registers! - if( rm.is_misaligned_Pair() ) { + if (rm.is_misaligned_pair()) { lrg._fat_proj = 1; lrg._is_bound = 1; } @@ -706,6 +719,33 @@ lrg.set_reg_pressure(1); #endif break; + case Op_VecS: + assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); + assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); + lrg.set_num_regs(RegMask::SlotsPerVecS); + lrg.set_reg_pressure(1); + break; + case Op_VecD: + assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecD), "sanity"); + assert(RegMask::num_registers(Op_VecD) == RegMask::SlotsPerVecD, "sanity"); + assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecD), "vector should be aligned"); + lrg.set_num_regs(RegMask::SlotsPerVecD); + lrg.set_reg_pressure(1); + break; + case Op_VecX: + assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecX), "sanity"); + assert(RegMask::num_registers(Op_VecX) == RegMask::SlotsPerVecX, "sanity"); + assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecX), "vector should be aligned"); + lrg.set_num_regs(RegMask::SlotsPerVecX); + lrg.set_reg_pressure(1); + break; + case Op_VecY: + assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecY), "sanity"); + assert(RegMask::num_registers(Op_VecY) == RegMask::SlotsPerVecY, "sanity"); + assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecY), "vector should be aligned"); + lrg.set_num_regs(RegMask::SlotsPerVecY); + lrg.set_reg_pressure(1); + break; default: ShouldNotReachHere(); } @@ -763,24 +803,38 @@ } else { lrg.AND( rm ); } + // Check for bound register masks const RegMask &lrgmask = lrg.mask(); - if( lrgmask.is_bound1() || lrgmask.is_bound2() ) + int kreg = n->in(k)->ideal_reg(); + bool is_vect = RegMask::is_vector(kreg); + assert(n->in(k)->bottom_type()->isa_vect() == NULL || + is_vect || kreg == Op_RegD, + "vector must be in vector registers"); + if (lrgmask.is_bound(kreg)) lrg._is_bound = 1; + // If this use of a double forces a mis-aligned double, // flag as '_fat_proj' - really flag as allowing misalignment // AND changes how we count interferences. A mis-aligned // double can interfere with TWO aligned pairs, or effectively // FOUR registers! - if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) { +#ifdef ASSERT + if (is_vect) { + assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned"); + assert(!lrg._fat_proj, "sanity"); + assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity"); + } +#endif + if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) { lrg._fat_proj = 1; lrg._is_bound = 1; } // if the LRG is an unaligned pair, we will have to spill // so clear the LRG's register mask if it is not already spilled - if ( !n->is_SpillCopy() && - (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) && - lrgmask.is_misaligned_Pair()) { + if (!is_vect && !n->is_SpillCopy() && + (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) && + lrgmask.is_misaligned_pair()) { lrg.Clear(); } @@ -793,12 +847,14 @@ } // end for all blocks // Final per-liverange setup - for( uint i2=0; i2<_maxlrg; i2++ ) { + for (uint i2=0; i2<_maxlrg; i2++) { LRG &lrg = lrgs(i2); - if( lrg.num_regs() == 2 && !lrg._fat_proj ) - lrg.ClearToPairs(); + assert(!lrg._is_vector || !lrg._fat_proj, "sanity"); + if (lrg.num_regs() > 1 && !lrg._fat_proj) { + lrg.clear_to_sets(); + } lrg.compute_set_mask_size(); - if( lrg.not_free() ) { // Handle case where we lose from the start + if (lrg.not_free()) { // Handle case where we lose from the start lrg.set_reg(OptoReg::Name(LRG::SPILL_REG)); lrg._direct_conflict = 1; } @@ -1104,22 +1160,17 @@ // Choose a color which is legal for him RegMask tempmask = lrg.mask(); tempmask.AND(lrgs(copy_lrg).mask()); - OptoReg::Name reg; - if( lrg.num_regs() == 1 ) { - reg = tempmask.find_first_elem(); - } else { - tempmask.ClearToPairs(); - reg = tempmask.find_first_pair(); - } - if( OptoReg::is_valid(reg) ) + tempmask.clear_to_sets(lrg.num_regs()); + OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); + if (OptoReg::is_valid(reg)) return reg; } } // If no bias info exists, just go with the register selection ordering - if( lrg.num_regs() == 2 ) { - // Find an aligned pair - return OptoReg::add(lrg.mask().find_first_pair(),chunk); + if (lrg._is_vector || lrg.num_regs() == 2) { + // Find an aligned set + return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); } // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate @@ -1149,6 +1200,7 @@ // Use a heuristic to "bias" the color choice return bias_color(lrg, chunk); + assert(!lrg._is_vector, "should be not vector here" ); assert( lrg.num_regs() >= 2, "dead live ranges do not color" ); // Fat-proj case or misaligned double argument. @@ -1238,14 +1290,16 @@ } //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness"); // Aligned pairs need aligned masks - if( lrg->num_regs() == 2 && !lrg->_fat_proj ) - lrg->ClearToPairs(); + assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); + if (lrg->num_regs() > 1 && !lrg->_fat_proj) { + lrg->clear_to_sets(); + } // Check if a color is available and if so pick the color OptoReg::Name reg = choose_color( *lrg, chunk ); #ifdef SPARC debug_only(lrg->compute_set_mask_size()); - assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned"); + assert(lrg->num_regs() < 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned"); #endif //--------------- @@ -1277,17 +1331,16 @@ // If the live range is not bound, then we actually had some choices // to make. In this case, the mask has more bits in it than the colors // chosen. Restrict the mask to just what was picked. - if( lrg->num_regs() == 1 ) { // Size 1 live range + int n_regs = lrg->num_regs(); + assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); + if (n_regs == 1 || !lrg->_fat_proj) { + assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecY, "sanity"); lrg->Clear(); // Clear the mask lrg->Insert(reg); // Set regmask to match selected reg - lrg->set_mask_size(1); - } else if( !lrg->_fat_proj ) { - // For pairs, also insert the low bit of the pair - assert( lrg->num_regs() == 2, "unbound fatproj???" ); - lrg->Clear(); // Clear the mask - lrg->Insert(reg); // Set regmask to match selected reg - lrg->Insert(OptoReg::add(reg,-1)); - lrg->set_mask_size(2); + // For vectors and pairs, also insert the low bit of the pair + for (int i = 1; i < n_regs; i++) + lrg->Insert(OptoReg::add(reg,-i)); + lrg->set_mask_size(n_regs); } else { // Else fatproj // mask must be equal to fatproj bits, by definition } @@ -1860,12 +1913,20 @@ sprintf(buf,"L%d",lidx); // No register binding yet } else if( !lidx ) { // Special, not allocated value strcpy(buf,"Special"); - } else if( (lrgs(lidx).num_regs() == 1) - ? !lrgs(lidx).mask().is_bound1() - : !lrgs(lidx).mask().is_bound2() ) { - sprintf(buf,"L%d",lidx); // No register binding yet - } else { // Hah! We have a bound machine register - print_reg( lrgs(lidx).reg(), this, buf ); + } else { + if (lrgs(lidx)._is_vector) { + if (lrgs(lidx).mask().is_bound_set(lrgs(lidx).num_regs())) + print_reg( lrgs(lidx).reg(), this, buf ); // a bound machine register + else + sprintf(buf,"L%d",lidx); // No register binding yet + } else if( (lrgs(lidx).num_regs() == 1) + ? lrgs(lidx).mask().is_bound1() + : lrgs(lidx).mask().is_bound_pair() ) { + // Hah! We have a bound machine register + print_reg( lrgs(lidx).reg(), this, buf ); + } else { + sprintf(buf,"L%d",lidx); // No register binding yet + } } } return buf+strlen(buf); --- old/src/share/vm/opto/chaitin.hpp Sat Jun 2 20:04:10 2012 +++ new/src/share/vm/opto/chaitin.hpp Sat Jun 2 20:04:10 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -99,8 +99,15 @@ void set_mask_size( int size ) { assert((size == 65535) || (size == (int)_mask.Size()), ""); _mask_size = size; - debug_only(_msize_valid=1;) - debug_only( if( _num_regs == 2 && !_fat_proj ) _mask.VerifyPairs(); ) +#ifdef ASSERT + _msize_valid=1; + if (_is_vector) { + assert(!_fat_proj, "sanity"); + _mask.verify_sets(_num_regs); + } else if (_num_regs == 2 && !_fat_proj) { + _mask.verify_pairs(); + } +#endif } void compute_set_mask_size() { set_mask_size(compute_mask_size()); } int mask_size() const { assert( _msize_valid, "mask size not valid" ); @@ -116,7 +123,8 @@ void Set_All() { _mask.Set_All(); debug_only(_msize_valid=1); _mask_size = RegMask::CHUNK_SIZE; } void Insert( OptoReg::Name reg ) { _mask.Insert(reg); debug_only(_msize_valid=0;) } void Remove( OptoReg::Name reg ) { _mask.Remove(reg); debug_only(_msize_valid=0;) } - void ClearToPairs() { _mask.ClearToPairs(); debug_only(_msize_valid=0;) } + void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) } + void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) } // Number of registers this live range uses when it colors private: @@ -150,6 +158,7 @@ uint _is_oop:1, // Live-range holds an oop _is_float:1, // True if in float registers + _is_vector:1, // True if in vector registers _was_spilled1:1, // True if prior spilling on def _was_spilled2:1, // True if twice prior spilling on def _is_bound:1, // live range starts life with no --- old/src/share/vm/opto/classes.hpp Sat Jun 2 20:04:11 2012 +++ new/src/share/vm/opto/classes.hpp Sat Jun 2 20:04:11 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -267,42 +267,15 @@ macro(LShiftVC) macro(LShiftVS) macro(LShiftVI) -macro(URShiftVB) -macro(URShiftVC) -macro(URShiftVS) -macro(URShiftVI) +macro(RShiftVB) +macro(RShiftVC) +macro(RShiftVS) +macro(RShiftVI) macro(AndV) macro(OrV) macro(XorV) -macro(VectorLoad) -macro(Load16B) -macro(Load8B) -macro(Load4B) -macro(Load8C) -macro(Load4C) -macro(Load2C) -macro(Load8S) -macro(Load4S) -macro(Load2S) -macro(Load4I) -macro(Load2I) -macro(Load2L) -macro(Load4F) -macro(Load2F) -macro(Load2D) -macro(VectorStore) -macro(Store16B) -macro(Store8B) -macro(Store4B) -macro(Store8C) -macro(Store4C) -macro(Store2C) -macro(Store4I) -macro(Store2I) -macro(Store2L) -macro(Store4F) -macro(Store2F) -macro(Store2D) +macro(LoadVector) +macro(StoreVector) macro(Pack) macro(PackB) macro(PackS) @@ -311,23 +284,15 @@ macro(PackL) macro(PackF) macro(PackD) -macro(Pack2x1B) -macro(Pack2x2B) -macro(Replicate16B) -macro(Replicate8B) -macro(Replicate4B) -macro(Replicate8S) -macro(Replicate4S) -macro(Replicate2S) -macro(Replicate8C) -macro(Replicate4C) -macro(Replicate2C) -macro(Replicate4I) -macro(Replicate2I) -macro(Replicate2L) -macro(Replicate4F) -macro(Replicate2F) -macro(Replicate2D) +macro(Pack2L) +macro(Pack2D) +macro(ReplicateB) +macro(ReplicateS) +macro(ReplicateC) +macro(ReplicateI) +macro(ReplicateL) +macro(ReplicateF) +macro(ReplicateD) macro(Extract) macro(ExtractB) macro(ExtractS) --- old/src/share/vm/opto/compile.cpp Sat Jun 2 20:04:11 2012 +++ new/src/share/vm/opto/compile.cpp Sat Jun 2 20:04:11 2012 @@ -2592,33 +2592,8 @@ } break; - case Op_Load16B: - case Op_Load8B: - case Op_Load4B: - case Op_Load8S: - case Op_Load4S: - case Op_Load2S: - case Op_Load8C: - case Op_Load4C: - case Op_Load2C: - case Op_Load4I: - case Op_Load2I: - case Op_Load2L: - case Op_Load4F: - case Op_Load2F: - case Op_Load2D: - case Op_Store16B: - case Op_Store8B: - case Op_Store4B: - case Op_Store8C: - case Op_Store4C: - case Op_Store2C: - case Op_Store4I: - case Op_Store2I: - case Op_Store2L: - case Op_Store4F: - case Op_Store2F: - case Op_Store2D: + case Op_LoadVector: + case Op_StoreVector: break; case Op_PackB: --- old/src/share/vm/opto/ifg.cpp Sat Jun 2 20:04:12 2012 +++ new/src/share/vm/opto/ifg.cpp Sat Jun 2 20:04:12 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -416,6 +416,7 @@ if( lrgs(lidx).mask().is_UP() && lrgs(lidx).mask_size() && !lrgs(lidx)._is_float && + !lrgs(lidx)._is_vector && lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) cnt += lrgs(lidx).reg_pressure(); } @@ -430,7 +431,7 @@ while ((lidx = elements.next()) != 0) { if( lrgs(lidx).mask().is_UP() && lrgs(lidx).mask_size() && - lrgs(lidx)._is_float ) + (lrgs(lidx)._is_float || lrgs(lidx)._is_vector)) cnt += lrgs(lidx).reg_pressure(); } return cnt; @@ -439,8 +440,8 @@ //------------------------------lower_pressure--------------------------------- // Adjust register pressure down by 1. Capture last hi-to-low transition, static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) { - if( lrg->mask().is_UP() && lrg->mask_size() ) { - if( lrg->_is_float ) { + if (lrg->mask().is_UP() && lrg->mask_size()) { + if (lrg->_is_float || lrg->_is_vector) { pressure[1] -= lrg->reg_pressure(); if( pressure[1] == (uint)FLOATPRESSURE ) { hrp_index[1] = where; @@ -522,8 +523,8 @@ LRG &lrg = lrgs(lidx); lrg._area += cost; // Compute initial register pressure - if( lrg.mask().is_UP() && lrg.mask_size() ) { - if( lrg._is_float ) { // Count float pressure + if (lrg.mask().is_UP() && lrg.mask_size()) { + if (lrg._is_float || lrg._is_vector) { // Count float pressure pressure[1] += lrg.reg_pressure(); #ifdef EXACT_PRESSURE if( pressure[1] > b->_freg_pressure ) @@ -681,13 +682,10 @@ // according to its bindings. const RegMask &rmask = lrgs(r).mask(); if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) { - // Smear odd bits; leave only aligned pairs of bits. - RegMask r2mask = rmask; - r2mask.SmearToPairs(); // Check for common case int r_size = lrgs(r).num_regs(); OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical; - + // Smear odd bits IndexSetIterator elements(&liveout); uint l; while ((l = elements.next()) != 0) { @@ -701,10 +699,15 @@ // Remove the bits from LRG 'r' from LRG 'l' so 'l' no // longer interferes with 'r'. If 'l' requires aligned // adjacent pairs, subtract out bit pairs. - if( lrg.num_regs() == 2 && !lrg._fat_proj ) { + assert(!lrg._is_vector || !lrg._fat_proj, "sanity"); + if (lrg.num_regs() > 1 && !lrg._fat_proj) { + RegMask r2mask = rmask; + // Leave only aligned set of bits. + r2mask.smear_to_sets(lrg.num_regs()); + // It includes vector case. lrg.SUBTRACT( r2mask ); lrg.compute_set_mask_size(); - } else if( r_size != 1 ) { + } else if( r_size != 1 ) { // fat proj lrg.SUBTRACT( rmask ); lrg.compute_set_mask_size(); } else { // Common case: size 1 bound removal @@ -763,8 +766,8 @@ // Newly live things assumed live from here to top of block lrg._area += cost; // Adjust register pressure - if( lrg.mask().is_UP() && lrg.mask_size() ) { - if( lrg._is_float ) { + if (lrg.mask().is_UP() && lrg.mask_size()) { + if (lrg._is_float || lrg._is_vector) { pressure[1] += lrg.reg_pressure(); #ifdef EXACT_PRESSURE if( pressure[1] > b->_freg_pressure ) --- old/src/share/vm/opto/lcm.cpp Sat Jun 2 20:04:12 2012 +++ new/src/share/vm/opto/lcm.cpp Sat Jun 2 20:04:12 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -445,6 +445,11 @@ if( e->is_MachNullCheck() && e->in(1) == n ) continue; + // Schedule IV increment last. + if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd && + e->in(1)->in(1) == n && n->is_iteratively_computed()) + continue; + uint n_choice = 2; // See if this instruction is consumed by a branch. If so, then (as the --- old/src/share/vm/opto/loopnode.cpp Sat Jun 2 20:04:13 2012 +++ new/src/share/vm/opto/loopnode.cpp Sat Jun 2 20:04:13 2012 @@ -2756,7 +2756,8 @@ // Do not count uncommon calls if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) { Node *iff = n->in(0)->in(0); - if( !iff->is_If() || + // No any calls for vectorized loops. + if( UseSuperWord || !iff->is_If() || (n->in(0)->Opcode() == Op_IfFalse && (1.0 - iff->as_If()->_prob) >= 0.01) || (iff->as_If()->_prob >= 0.01) ) --- old/src/share/vm/opto/machnode.cpp Sat Jun 2 20:04:13 2012 +++ new/src/share/vm/opto/machnode.cpp Sat Jun 2 20:04:13 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -439,9 +439,9 @@ // Don't remateralize somebody with bound inputs - it stretches a // fixed register lifetime. uint idx = oper_input_base(); - if( req() > idx ) { + if (req() > idx) { const RegMask &rm = in_RegMask(idx); - if( rm.is_bound1() || rm.is_bound2() ) + if (rm.is_bound(ideal_reg())) return false; } --- old/src/share/vm/opto/machnode.hpp Sat Jun 2 20:04:14 2012 +++ new/src/share/vm/opto/machnode.hpp Sat Jun 2 20:04:14 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -319,6 +319,7 @@ class MachTypeNode : public MachNode { virtual uint size_of() const { return sizeof(*this); } // Size is bigger public: + MachTypeNode( ) {} const Type *_bottom_type; virtual const class Type *bottom_type() const { return _bottom_type; } @@ -370,12 +371,12 @@ //------------------------------MachConstantNode------------------------------- // Machine node that holds a constant which is stored in the constant table. -class MachConstantNode : public MachNode { +class MachConstantNode : public MachTypeNode { protected: Compile::Constant _constant; // This node's constant. public: - MachConstantNode() : MachNode() { + MachConstantNode() : MachTypeNode() { init_class_id(Class_MachConstant); } --- old/src/share/vm/opto/matcher.cpp Sat Jun 2 20:04:14 2012 +++ new/src/share/vm/opto/matcher.cpp Sat Jun 2 20:04:14 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,6 +35,7 @@ #include "opto/rootnode.hpp" #include "opto/runtime.hpp" #include "opto/type.hpp" +#include "opto/vectornode.hpp" #include "runtime/atomic.hpp" #include "runtime/os.hpp" #ifdef TARGET_ARCH_MODEL_x86_32 @@ -58,18 +59,6 @@ OptoReg::Name OptoReg::c_frame_pointer; - - -const int Matcher::base2reg[Type::lastype] = { - Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, - Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ - Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ - 0, 0/*abio*/, - Op_RegP /* Return address */, 0, /* the memories */ - Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, - 0 /*bottom*/ -}; - const RegMask *Matcher::idealreg2regmask[_last_machine_leaf]; RegMask Matcher::mreg2regmask[_last_Mach_Reg]; RegMask Matcher::STACK_ONLY_mask; @@ -107,6 +96,10 @@ idealreg2spillmask [Op_RegF] = NULL; idealreg2spillmask [Op_RegD] = NULL; idealreg2spillmask [Op_RegP] = NULL; + idealreg2spillmask [Op_VecS] = NULL; + idealreg2spillmask [Op_VecD] = NULL; + idealreg2spillmask [Op_VecX] = NULL; + idealreg2spillmask [Op_VecY] = NULL; idealreg2debugmask [Op_RegI] = NULL; idealreg2debugmask [Op_RegN] = NULL; @@ -114,6 +107,10 @@ idealreg2debugmask [Op_RegF] = NULL; idealreg2debugmask [Op_RegD] = NULL; idealreg2debugmask [Op_RegP] = NULL; + idealreg2debugmask [Op_VecS] = NULL; + idealreg2debugmask [Op_VecD] = NULL; + idealreg2debugmask [Op_VecX] = NULL; + idealreg2debugmask [Op_VecY] = NULL; idealreg2mhdebugmask[Op_RegI] = NULL; idealreg2mhdebugmask[Op_RegN] = NULL; @@ -121,6 +118,10 @@ idealreg2mhdebugmask[Op_RegF] = NULL; idealreg2mhdebugmask[Op_RegD] = NULL; idealreg2mhdebugmask[Op_RegP] = NULL; + idealreg2mhdebugmask[Op_VecS] = NULL; + idealreg2mhdebugmask[Op_VecD] = NULL; + idealreg2mhdebugmask[Op_VecX] = NULL; + idealreg2mhdebugmask[Op_VecY] = NULL; debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node } @@ -134,7 +135,7 @@ warped = OptoReg::add(warped, C->out_preserve_stack_slots()); if( warped >= _in_arg_limit ) _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen - if (!RegMask::can_represent(warped)) { + if (!RegMask::can_represent_arg(warped)) { // the compiler cannot represent this method's calling sequence C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence"); return OptoReg::Bad; @@ -302,7 +303,7 @@ _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); assert( is_even(_out_arg_limit), "out_preserve must be even" ); - if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) { + if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) { // the compiler cannot represent this method's calling sequence C->record_method_not_compilable("must be able to represent all call arguments in reg mask"); } @@ -428,7 +429,7 @@ void Matcher::init_first_stack_mask() { // Allocate storage for spill masks as masks for the appropriate load type. - RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * 3*6); + RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4)); idealreg2spillmask [Op_RegN] = &rms[0]; idealreg2spillmask [Op_RegI] = &rms[1]; @@ -451,6 +452,11 @@ idealreg2mhdebugmask[Op_RegD] = &rms[16]; idealreg2mhdebugmask[Op_RegP] = &rms[17]; + idealreg2spillmask [Op_VecS] = &rms[18]; + idealreg2spillmask [Op_VecD] = &rms[19]; + idealreg2spillmask [Op_VecX] = &rms[20]; + idealreg2spillmask [Op_VecY] = &rms[21]; + OptoReg::Name i; // At first, start with the empty mask @@ -462,7 +468,7 @@ C->FIRST_STACK_mask().Insert(i); // Add in all bits past the outgoing argument area - guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)), + guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)), "must be able to represent all call arguments in reg mask"); init = _out_arg_limit; for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) @@ -472,21 +478,48 @@ C->FIRST_STACK_mask().set_AllStack(); // Make spill masks. Registers for their class, plus FIRST_STACK_mask. + RegMask aligned_stack_mask = C->FIRST_STACK_mask(); + // Keep spill masks aligned. + aligned_stack_mask.clear_to_pairs(); + assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); + + *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; #ifdef _LP64 *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN]; idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask()); + idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask); +#else + idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); #endif *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI]; idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask()); *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL]; - idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask()); + idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask); *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF]; idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask()); *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD]; - idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask()); - *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; - idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); + idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask); + if (Matcher::vector_size_supported(T_BYTE,4)) { + *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS]; + idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask()); + } + if (Matcher::vector_size_supported(T_FLOAT,2)) { + *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD]; + idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask); + } + if (Matcher::vector_size_supported(T_FLOAT,4)) { + aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX); + assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); + *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX]; + idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask); + } + if (Matcher::vector_size_supported(T_FLOAT,8)) { + aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY); + assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); + *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY]; + idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask); + } if (UseFPUForSpilling) { // This mask logic assumes that the spill operations are // symmetric and that the registers involved are the same size. @@ -807,6 +840,25 @@ idealreg2regmask[Op_RegF] = &spillF->out_RegMask(); idealreg2regmask[Op_RegD] = &spillD->out_RegMask(); idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); + + // Vector regmasks. + if (Matcher::vector_size_supported(T_BYTE,4)) { + TypeVect::VECTS = TypeVect::make(T_BYTE, 4); + MachNode *spillVectS = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS)); + idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask(); + } + if (Matcher::vector_size_supported(T_FLOAT,2)) { + MachNode *spillVectD = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD)); + idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask(); + } + if (Matcher::vector_size_supported(T_FLOAT,4)) { + MachNode *spillVectX = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX)); + idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask(); + } + if (Matcher::vector_size_supported(T_FLOAT,8)) { + MachNode *spillVectY = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY)); + idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask(); + } } #ifdef ASSERT @@ -1063,7 +1115,7 @@ // that is killed by the call. if( warped >= out_arg_limit_per_call ) out_arg_limit_per_call = OptoReg::add(warped,1); - if (!RegMask::can_represent(warped)) { + if (!RegMask::can_represent_arg(warped)) { C->record_method_not_compilable_all_tiers("unsupported calling sequence"); return OptoReg::Bad; } @@ -1251,7 +1303,7 @@ // this killed area. uint r_cnt = mcall->tf()->range()->cnt(); MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj ); - if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) { + if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) { C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence"); } else { for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) --- old/src/share/vm/opto/matcher.hpp Sat Jun 2 20:04:15 2012 +++ new/src/share/vm/opto/matcher.hpp Sat Jun 2 20:04:15 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -250,11 +250,22 @@ static const bool convL2FSupported(void); // Vector width in bytes - static const uint vector_width_in_bytes(void); + static const int vector_width_in_bytes(BasicType bt); + // Limits on vector size (number of elements). + static const int max_vector_size(const BasicType bt); + static const int min_vector_size(const BasicType bt); + static const bool vector_size_supported(const BasicType bt, int size) { + return (Matcher::max_vector_size(bt) >= size && + Matcher::min_vector_size(bt) <= size); + } + // Vector ideal reg - static const uint vector_ideal_reg(void); + static const int vector_ideal_reg(int len); + // CPU supports misaligned vectors store/load. + static const bool misaligned_vectors_ok(); + // Used to determine a "low complexity" 64-bit constant. (Zero is simple.) // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI). // Depends on the details of 64-bit constant generation on the CPU. --- old/src/share/vm/opto/memnode.cpp Sat Jun 2 20:04:15 2012 +++ new/src/share/vm/opto/memnode.cpp Sat Jun 2 20:04:15 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1526,7 +1526,8 @@ // In fact, that could have been the original type of p1, and p1 could have // had an original form like p1:(AddP x x (LShiftL quux 3)), where the // expression (LShiftL quux 3) independently optimized to the constant 8. - if ((t->isa_int() == NULL) && (t->isa_long() == NULL) + if ((t->isa_int() == NULL) && (t->isa_long() == NULL) + && (_type->isa_vect() == NULL) && Opcode() != Op_LoadKlass && Opcode() != Op_LoadNKlass) { // t might actually be lower than _type, if _type is a unique // concrete subclass of abstract class t. --- old/src/share/vm/opto/mulnode.hpp Sat Jun 2 20:04:16 2012 +++ new/src/share/vm/opto/mulnode.hpp Sat Jun 2 20:04:16 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,9 @@ class MulNode : public Node { virtual uint hash() const; public: - MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {} + MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) { + init_class_id(Class_Mul); + } // Handle algebraic identities here. If we have an identity, return the Node // we are equivalent to. We look for "add of zero" as an identity. --- old/src/share/vm/opto/node.cpp Sat Jun 2 20:04:16 2012 +++ new/src/share/vm/opto/node.cpp Sat Jun 2 20:04:16 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1576,6 +1576,9 @@ } else { tty->print("no type"); } + } else if (t->isa_vect() && this->is_MachSpillCopy()) { + // Dump MachSpillcopy vector type. + t->dump(); } if (is_new) { debug_only(dump_orig(debug_orig())); --- old/src/share/vm/opto/node.hpp Sat Jun 2 20:04:17 2012 +++ new/src/share/vm/opto/node.hpp Sat Jun 2 20:04:17 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -100,6 +100,7 @@ class MemBarStoreStoreNode; class MemNode; class MergeMemNode; +class MulNode; class MultiNode; class MultiBranchNode; class NeverBranchNode; @@ -133,8 +134,8 @@ class TypeNode; class UnlockNode; class VectorNode; -class VectorLoadNode; -class VectorStoreNode; +class LoadVectorNode; +class StoreVectorNode; class VectorSet; typedef void (*NFunc)(Node&,void*); extern "C" { @@ -609,9 +610,9 @@ DEFINE_CLASS_ID(Mem, Node, 4) DEFINE_CLASS_ID(Load, Mem, 0) - DEFINE_CLASS_ID(VectorLoad, Load, 0) + DEFINE_CLASS_ID(LoadVector, Load, 0) DEFINE_CLASS_ID(Store, Mem, 1) - DEFINE_CLASS_ID(VectorStore, Store, 0) + DEFINE_CLASS_ID(StoreVector, Store, 0) DEFINE_CLASS_ID(LoadStore, Mem, 2) DEFINE_CLASS_ID(Region, Node, 5) @@ -629,8 +630,9 @@ DEFINE_CLASS_ID(AddP, Node, 9) DEFINE_CLASS_ID(BoxLock, Node, 10) DEFINE_CLASS_ID(Add, Node, 11) - DEFINE_CLASS_ID(Vector, Node, 12) - DEFINE_CLASS_ID(ClearArray, Node, 13) + DEFINE_CLASS_ID(Mul, Node, 12) + DEFINE_CLASS_ID(Vector, Node, 13) + DEFINE_CLASS_ID(ClearArray, Node, 14) _max_classes = ClassMask_ClearArray }; @@ -752,6 +754,7 @@ DEFINE_CLASS_QUERY(MemBar) DEFINE_CLASS_QUERY(MemBarStoreStore) DEFINE_CLASS_QUERY(MergeMem) + DEFINE_CLASS_QUERY(Mul) DEFINE_CLASS_QUERY(Multi) DEFINE_CLASS_QUERY(MultiBranch) DEFINE_CLASS_QUERY(Parm) @@ -767,8 +770,8 @@ DEFINE_CLASS_QUERY(Sub) DEFINE_CLASS_QUERY(Type) DEFINE_CLASS_QUERY(Vector) - DEFINE_CLASS_QUERY(VectorLoad) - DEFINE_CLASS_QUERY(VectorStore) + DEFINE_CLASS_QUERY(LoadVector) + DEFINE_CLASS_QUERY(StoreVector) DEFINE_CLASS_QUERY(Unlock) #undef DEFINE_CLASS_QUERY --- old/src/share/vm/opto/opcodes.cpp Sat Jun 2 20:04:17 2012 +++ new/src/share/vm/opto/opcodes.cpp Sat Jun 2 20:04:17 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,6 +38,10 @@ "RegD", "RegL", "RegFlags", + "VecS", + "VecD", + "VecX", + "VecY", "_last_machine_leaf", #include "classes.hpp" "_last_class_name", --- old/src/share/vm/opto/opcodes.hpp Sat Jun 2 20:04:18 2012 +++ new/src/share/vm/opto/opcodes.hpp Sat Jun 2 20:04:18 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,6 +36,10 @@ macro(RegF) // Machine float register macro(RegD) // Machine double register macro(RegL) // Machine long register + macro(VecS) // Machine vectors register + macro(VecD) // Machine vectord register + macro(VecX) // Machine vectorx register + macro(VecY) // Machine vectory register macro(RegFlags) // Machine flags register _last_machine_leaf, // Split between regular opcodes and machine #include "classes.hpp" --- old/src/share/vm/opto/postaloc.cpp Sat Jun 2 20:04:18 2012 +++ new/src/share/vm/opto/postaloc.cpp Sat Jun 2 20:04:18 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,13 +27,15 @@ #include "opto/chaitin.hpp" #include "opto/machnode.hpp" -// see if this register kind does not requires two registers -static bool is_single_register(uint x) { -#ifdef _LP64 - return (x != Op_RegD && x != Op_RegL && x != Op_RegP); -#else - return (x != Op_RegD && x != Op_RegL); -#endif +// See if this register (or pairs, or vector) already contains the value. +static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs, + Node_List& value) { + for (int i = 0; i < n_regs; i++) { + OptoReg::Name nreg = OptoReg::add(reg,-i); + if (value[nreg] != val) + return false; + } + return true; } //---------------------------may_be_copy_of_callee----------------------------- @@ -167,9 +169,11 @@ const RegMask &use_mask = n->in_RegMask(idx); bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0) : (use_mask.is_AllStack() != 0)); - // Check for a copy to or from a misaligned pair. - can_use = can_use && !use_mask.is_misaligned_Pair() && !def_lrg.mask().is_misaligned_Pair(); - + if (!RegMask::is_vector(def->ideal_reg())) { + // Check for a copy to or from a misaligned pair. + // It is workaround for a sparc with misaligned pairs. + can_use = can_use && !use_mask.is_misaligned_pair() && !def_lrg.mask().is_misaligned_pair(); + } if (!can_use) return 0; @@ -263,9 +267,9 @@ val = skip_copies(n->in(k)); } - if( val == x ) return blk_adjust; // No progress? + if (val == x) return blk_adjust; // No progress? - bool single = is_single_register(val->ideal_reg()); + int n_regs = RegMask::num_registers(val->ideal_reg()); uint val_idx = n2lidx(val); OptoReg::Name val_reg = lrgs(val_idx).reg(); @@ -272,9 +276,7 @@ // See if it happens to already be in the correct register! // (either Phi's direct register, or the common case of the name // never-clobbered original-def register) - if( value[val_reg] == val && - // Doubles check both halves - ( single || value[val_reg-1] == val ) ) { + if (register_contains_value(val, val_reg, n_regs, value)) { blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd); if( n->in(k) == regnd[val_reg] ) // Success! Quit trying return blk_adjust; @@ -306,7 +308,7 @@ } Node *vv = value[reg]; - if( !single ) { // Doubles check for aligned-adjacent pair + if (n_regs > 1) { // Doubles check for aligned-adjacent pair if( (reg&1)==0 ) continue; // Wrong half of a pair if( vv != value[reg-1] ) continue; // Not a complete pair } @@ -526,8 +528,9 @@ if( pidx ) { value.map(preg,phi); regnd.map(preg,phi); - OptoReg::Name preg_lo = OptoReg::add(preg,-1); - if( !is_single_register(phi->ideal_reg()) ) { + int n_regs = RegMask::num_registers(phi->ideal_reg()); + for (int l = 1; l < n_regs; l++) { + OptoReg::Name preg_lo = OptoReg::add(preg,-l); value.map(preg_lo,phi); regnd.map(preg_lo,phi); } @@ -568,13 +571,17 @@ value.map(ureg,valdef); // record improved reaching-def info regnd.map(ureg, def); // Record other half of doubles - OptoReg::Name ureg_lo = OptoReg::add(ureg,-1); - if( !is_single_register(def->ideal_reg()) && - ( !RegMask::can_represent(ureg_lo) || - lrgs(useidx).mask().Member(ureg_lo) ) && // Nearly always adjacent - !value[ureg_lo] ) { - value.map(ureg_lo,valdef); // record improved reaching-def info - regnd.map(ureg_lo, def); + uint def_ideal_reg = def->ideal_reg(); + int n_regs = RegMask::num_registers(def_ideal_reg); + bool is_vec = RegMask::is_vector(def_ideal_reg); + for (int l = 1; l < n_regs; l++) { + OptoReg::Name ureg_lo = OptoReg::add(ureg,-l); + if (!value[ureg_lo] && + (!RegMask::can_represent(ureg_lo) || + lrgs(useidx).mask().Member(ureg_lo))) { // Nearly always adjacent + value.map(ureg_lo,valdef); // record improved reaching-def info + regnd.map(ureg_lo, def); + } } } } @@ -607,7 +614,8 @@ } uint n_ideal_reg = n->ideal_reg(); - if( is_single_register(n_ideal_reg) ) { + int n_regs = RegMask::num_registers(n_ideal_reg); + if (n_regs == 1) { // If Node 'n' does not change the value mapped by the register, // then 'n' is a useless copy. Do not update the register->node // mapping so 'n' will go dead. @@ -625,6 +633,25 @@ assert( n->is_Copy(), "" ); j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); } + } else if (RegMask::is_vector(n_ideal_reg)) { + // If Node 'n' does not change the value mapped by the register, + // then 'n' is a useless copy. Do not update the register->node + // mapping so 'n' will go dead. + if (!register_contains_value(val, nreg, n_regs, value)) { + // Update the mapping: record new Node defined by the register + regnd.map(nreg,n); + // Update mapping for defined *value*, which is the defined + // Node after skipping all copies. + value.map(nreg,val); + for (int l = 1; l < n_regs; l++) { + OptoReg::Name nreg_lo = OptoReg::add(nreg,-l); + regnd.map(nreg_lo, n ); + value.map(nreg_lo,val); + } + } else if (n->is_Copy()) { + // Note: vector can't be constant and can't be copy of calee. + j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + } } else { // If the value occupies a register pair, record same info // in both registers. --- old/src/share/vm/opto/reg_split.cpp Sat Jun 2 20:04:19 2012 +++ new/src/share/vm/opto/reg_split.cpp Sat Jun 2 20:04:19 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,12 +74,13 @@ const RegMask *w_i_mask = w_mask->overlap( *i_mask ) ? w_mask : i_mask; const RegMask *w_o_mask; + int num_regs = RegMask::num_registers(ireg); + bool is_vect = RegMask::is_vector(ireg); if( w_mask->overlap( *o_mask ) && // Overlap AND - ((ireg != Op_RegL && ireg != Op_RegD // Single use or aligned -#ifdef _LP64 - && ireg != Op_RegP -#endif - ) || o_mask->is_aligned_Pairs()) ) { + ((num_regs == 1) // Single use or aligned + || is_vect // or vector + || !is_vect && o_mask->is_aligned_pairs()) ) { + assert(!is_vect || o_mask->is_aligned_sets(num_regs), "vectors are aligned"); // Don't come here for mis-aligned doubles w_o_mask = w_mask; } else { // wide ideal mask does not overlap with o_mask @@ -400,15 +401,17 @@ // CNC - Turned off 7/8/99, causes too much spilling // if( lrg->_is_bound ) return false; + // Use float pressure numbers for vectors. + bool is_float_or_vector = lrg->_is_float || lrg->_is_vector; // Not yet reached the high-pressure cutoff point, so low pressure - uint hrp_idx = lrg->_is_float ? b->_fhrp_index : b->_ihrp_index; + uint hrp_idx = is_float_or_vector ? b->_fhrp_index : b->_ihrp_index; if( insidx < hrp_idx ) return false; // Register pressure for the block as a whole depends on reg class - int block_pres = lrg->_is_float ? b->_freg_pressure : b->_reg_pressure; + int block_pres = is_float_or_vector ? b->_freg_pressure : b->_reg_pressure; // Bound live ranges will split at the binding points first; // Intermediate splits should assume the live range's register set // got "freed up" and that num_regs will become INT_PRESSURE. - int bound_pres = lrg->_is_float ? FLOATPRESSURE : INTPRESSURE; + int bound_pres = is_float_or_vector ? FLOATPRESSURE : INTPRESSURE; // Effective register pressure limit. int lrg_pres = (lrg->get_invalid_mask_size() > lrg->num_regs()) ? (lrg->get_invalid_mask_size() >> (lrg->num_regs()-1)) : bound_pres; @@ -794,12 +797,15 @@ if( i < n->req() ) break; insert_point--; } + uint orig_eidx = b->end_idx(); maxlrg = split_DEF( n1, b, insert_point, maxlrg, Reachblock, debug_defs, splits, slidx); // If it wasn't split bail if (!maxlrg) { return 0; } - insidx++; + // Spill of NULL check mem op goes into the following block. + if (b->end_idx() > orig_eidx) + insidx++; } // This is a new DEF, so update UP UPblock[slidx] = false; @@ -960,7 +966,7 @@ // Grab register mask info const RegMask &dmask = def->out_RegMask(); const RegMask &umask = n->in_RegMask(inpidx); - + bool is_vect = RegMask::is_vector(def->ideal_reg()); assert(inpidx < oopoff, "cannot use-split oop map info"); bool dup = UPblock[slidx]; @@ -972,7 +978,7 @@ if( !umask.is_AllStack() && (int)umask.Size() <= lrgs(useidx).num_regs() && (!def->rematerialize() || - umask.is_misaligned_Pair())) { + !is_vect && umask.is_misaligned_pair())) { // These need a Split regardless of overlap or pressure // SPLIT - NO DEF - NO CISC SPILL maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx); @@ -1123,10 +1129,12 @@ // Grab UP info for DEF const RegMask &dmask = n->out_RegMask(); bool defup = dmask.is_UP(); + int ireg = n->ideal_reg(); + bool is_vect = RegMask::is_vector(ireg); // Only split at Def if this is a HRP block or bound (and spilled once) if( !n->rematerialize() && - (((dmask.is_bound1() || dmask.is_bound2() || dmask.is_misaligned_Pair()) && - (deflrg._direct_conflict || deflrg._must_spill)) || + (((dmask.is_bound(ireg) || !is_vect && dmask.is_misaligned_pair()) && + (deflrg._direct_conflict || deflrg._must_spill)) || // Check for LRG being up in a register and we are inside a high // pressure area. Spill it down immediately. (defup && is_high_pressure(b,&deflrg,insidx))) ) { --- old/src/share/vm/opto/regmask.cpp Sat Jun 2 20:04:19 2012 +++ new/src/share/vm/opto/regmask.cpp Sat Jun 2 20:04:19 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -129,11 +129,34 @@ 0 ); +//============================================================================= +bool RegMask::is_vector(uint ireg) { + return (ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY); +} + +int RegMask::num_registers(uint ireg) { + switch(ireg) { + case Op_VecY: + return 8; + case Op_VecX: + return 4; + case Op_VecD: + case Op_RegD: + case Op_RegL: +#ifdef _LP64 + case Op_RegP: +#endif + return 2; + } + // Op_VecS and the rest ideal registers. + return 1; +} + //------------------------------find_first_pair-------------------------------- // Find the lowest-numbered register pair in the mask. Return the // HIGHEST register number in the pair, or BAD if no pairs. OptoReg::Name RegMask::find_first_pair() const { - VerifyPairs(); + verify_pairs(); for( int i = 0; i < RM_SIZE; i++ ) { if( _A[i] ) { // Found some bits int bit = _A[i] & -_A[i]; // Extract low bit @@ -146,7 +169,7 @@ //------------------------------ClearToPairs----------------------------------- // Clear out partial bits; leave only bit pairs -void RegMask::ClearToPairs() { +void RegMask::clear_to_pairs() { for( int i = 0; i < RM_SIZE; i++ ) { int bits = _A[i]; bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair @@ -153,12 +176,12 @@ bits |= (bits>>1); // Smear 1 hi-bit into a pair _A[i] = bits; } - VerifyPairs(); + verify_pairs(); } //------------------------------SmearToPairs----------------------------------- // Smear out partial bits; leave only bit pairs -void RegMask::SmearToPairs() { +void RegMask::smear_to_pairs() { for( int i = 0; i < RM_SIZE; i++ ) { int bits = _A[i]; bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair @@ -165,11 +188,11 @@ bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair _A[i] = bits; } - VerifyPairs(); + verify_pairs(); } //------------------------------is_aligned_pairs------------------------------- -bool RegMask::is_aligned_Pairs() const { +bool RegMask::is_aligned_pairs() const { // Assert that the register mask contains only bit pairs. for( int i = 0; i < RM_SIZE; i++ ) { int bits = _A[i]; @@ -204,7 +227,7 @@ //------------------------------is_bound2-------------------------------------- // Return TRUE if the mask contains an adjacent pair of bits and no other bits. -int RegMask::is_bound2() const { +int RegMask::is_bound_pair() const { if( is_AllStack() ) return false; int bit = -1; // Set to hold the one bit allowed @@ -225,6 +248,132 @@ // True for both the empty mask and for a bit pair return true; } + +static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 }; +//------------------------------find_first_set--------------------------------- +// Find the lowest-numbered register set in the mask. Return the +// HIGHEST register number in the set, or BAD if no sets. +// Works also for size 1. +OptoReg::Name RegMask::find_first_set(int size) const { + verify_sets(size); + for (int i = 0; i < RM_SIZE; i++) { + if (_A[i]) { // Found some bits + int bit = _A[i] & -_A[i]; // Extract low bit + // Convert to bit number, return hi bit in pair + return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1)); + } + } + return OptoReg::Bad; +} + +//------------------------------clear_to_sets---------------------------------- +// Clear out partial bits; leave only aligned adjacent bit pairs +void RegMask::clear_to_sets(int size) { + if (size == 1) return; + assert(2 <= size && size <= 8, "update low bits table"); + assert(is_power_of_2(size), "sanity"); + int low_bits_mask = low_bits[size>>2]; + for (int i = 0; i < RM_SIZE; i++) { + int bits = _A[i]; + int sets = (bits & low_bits_mask); + for (int j = 1; j < size; j++) { + sets = (bits & (sets<<1)); // filter bits which produce whole sets + } + sets |= (sets>>1); // Smear 1 hi-bit into a set + if (size > 2) { + sets |= (sets>>2); // Smear 2 hi-bits into a set + if (size > 4) { + sets |= (sets>>4); // Smear 4 hi-bits into a set + } + } + _A[i] = sets; + } + verify_sets(size); +} + +//------------------------------smear_to_sets---------------------------------- +// Smear out partial bits to aligned adjacent bit sets +void RegMask::smear_to_sets(int size) { + if (size == 1) return; + assert(2 <= size && size <= 8, "update low bits table"); + assert(is_power_of_2(size), "sanity"); + int low_bits_mask = low_bits[size>>2]; + for (int i = 0; i < RM_SIZE; i++) { + int bits = _A[i]; + int sets = 0; + for (int j = 0; j < size; j++) { + sets |= (bits & low_bits_mask); // collect partial bits + bits = bits>>1; + } + sets |= (sets<<1); // Smear 1 lo-bit into a set + if (size > 2) { + sets |= (sets<<2); // Smear 2 lo-bits into a set + if (size > 4) { + sets |= (sets<<4); // Smear 4 lo-bits into a set + } + } + _A[i] = sets; + } + verify_sets(size); +} + +//------------------------------is_aligned_set-------------------------------- +bool RegMask::is_aligned_sets(int size) const { + if (size == 1) return true; + assert(2 <= size && size <= 8, "update low bits table"); + assert(is_power_of_2(size), "sanity"); + int low_bits_mask = low_bits[size>>2]; + // Assert that the register mask contains only bit sets. + for (int i = 0; i < RM_SIZE; i++) { + int bits = _A[i]; + while (bits) { // Check bits for pairing + int bit = bits & -bits; // Extract low bit + // Low bit is not odd means its mis-aligned. + if ((bit & low_bits_mask) == 0) return false; + // Do extra work since (bit << size) may overflow. + int hi_bit = bit << (size-1); // high bit + int set = hi_bit + ((hi_bit-1) & ~(bit-1)); + // Check for aligned adjacent bits in this set + if ((bits & set) != set) return false; + bits -= set; // Remove this set + } + } + return true; +} + +//------------------------------is_bound_set----------------------------------- +// Return TRUE if the mask contains one adjacent set of bits and no other bits. +// Works also for size 1. +int RegMask::is_bound_set(int size) const { + if( is_AllStack() ) return false; + assert(1 <= size && size <= 8, "update low bits table"); + int bit = -1; // Set to hold the one bit allowed + for (int i = 0; i < RM_SIZE; i++) { + if (_A[i] ) { // Found some bits + if (bit != -1) + return false; // Already had bits, so fail + bit = _A[i] & -_A[i]; // Extract 1 bit from mask + int hi_bit = bit << (size-1); // high bit + if (hi_bit != 0) { // Bit set stays in same word? + int set = hi_bit + ((hi_bit-1) & ~(bit-1)); + if (set != _A[i]) + return false; // Require adjacent bit set and no more bits + } else { // Else its a split-set case + if (((-1) & ~(bit-1)) != _A[i]) + return false; // Found many bits, so fail + i++; // Skip iteration forward and check high part + assert(size <= 8, "update next code"); + // The lower 24 bits should be 0 since it is split case and size <= 8. + int set = bit>>24; + set = set & -set; // Remove sign extension. + set = (((set << size) - 1) >> 8); + if (_A[i] != set) return false; // Require 1 lo bit in next word + } + } + } + // True for both the empty mask and for a bit set + return true; +} //------------------------------is_UP------------------------------------------ // UP means register only, Register plus stack, or stack only is DOWN --- old/src/share/vm/opto/regmask.hpp Sat Jun 2 20:04:20 2012 +++ new/src/share/vm/opto/regmask.hpp Sat Jun 2 20:04:20 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -113,7 +113,11 @@ // the controlling alignment constraint. Note that this alignment // requirement is internal to the allocator, and independent of any // particular platform. - enum { SlotsPerLong = 2 }; + enum { SlotsPerLong = 2, + SlotsPerVecS = 1, + SlotsPerVecD = 2, + SlotsPerVecX = 4, + SlotsPerVecY = 8 }; // A constructor only used by the ADLC output. All mask fields are filled // in directly. Calls to this look something like RM(1,2,3,4); @@ -193,21 +197,54 @@ OptoReg::Name find_first_pair() const; // Clear out partial bits; leave only aligned adjacent bit pairs. - void ClearToPairs(); + void clear_to_pairs(); // Smear out partial bits; leave only aligned adjacent bit pairs. - void SmearToPairs(); + void smear_to_pairs(); // Verify that the mask contains only aligned adjacent bit pairs - void VerifyPairs() const { assert( is_aligned_Pairs(), "mask is not aligned, adjacent pairs" ); } + void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); } // Test that the mask contains only aligned adjacent bit pairs - bool is_aligned_Pairs() const; + bool is_aligned_pairs() const; // mask is a pair of misaligned registers - bool is_misaligned_Pair() const { return Size()==2 && !is_aligned_Pairs();} + bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); } // Test for single register int is_bound1() const; // Test for a single adjacent pair - int is_bound2() const; + int is_bound_pair() const; + // Test for a single adjacent set of ideal register's size. + int is_bound(uint ireg) const { + if (is_vector(ireg)) { + if (is_bound_set(num_registers(ireg))) + return true; + } else if (is_bound1() || is_bound_pair()) { + return true; + } + return false; + } + // Find the lowest-numbered register set in the mask. Return the + // HIGHEST register number in the set, or BAD if no sets. + // Assert that the mask contains only bit sets. + OptoReg::Name find_first_set(int size) const; + + // Clear out partial bits; leave only aligned adjacent bit sets of size. + void clear_to_sets(int size); + // Smear out partial bits to aligned adjacent bit sets. + void smear_to_sets(int size); + // Verify that the mask contains only aligned adjacent bit sets + void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); } + // Test that the mask contains only aligned adjacent bit sets + bool is_aligned_sets(int size) const; + + // mask is a set of misaligned registers + bool is_misaligned_set(int size) const { return (int)Size()==size && !is_aligned_sets(size);} + + // Test for a single adjacent set + int is_bound_set(int size) const; + + static bool is_vector(uint ireg); + static int num_registers(uint ireg); + // Fast overlap test. Non-zero if any registers in common. int overlap( const RegMask &rm ) const { return @@ -280,9 +317,15 @@ static bool can_represent(OptoReg::Name reg) { // NOTE: -1 in computation reflects the usage of the last - // bit of the regmask as an infinite stack flag. + // bit of the regmask as an infinite stack flag and + // -7 is to keep mask aligned for largest value (VecY). return (int)reg < (int)(CHUNK_SIZE-1); } + static bool can_represent_arg(OptoReg::Name reg) { + // NOTE: -SlotsPerVecY in computation reflects the need + // to keep mask aligned for largest value (VecY). + return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecY); + } }; // Do not use this constant directly in client code! --- old/src/share/vm/opto/superword.cpp Sat Jun 2 20:04:20 2012 +++ new/src/share/vm/opto/superword.cpp Sat Jun 2 20:04:20 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -67,6 +67,10 @@ //------------------------------transform_loop--------------------------- void SuperWord::transform_loop(IdealLoopTree* lpt) { + assert(UseSuperWord, "should be"); + // Do vectors exist on this architecture? + if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; + assert(lpt->_head->is_CountedLoop(), "must be"); CountedLoopNode *cl = lpt->_head->as_CountedLoop(); @@ -89,15 +93,12 @@ Node *pre_opaq1 = pre_end->limit(); if (pre_opaq1->Opcode() != Op_Opaque1) return; - // Do vectors exist on this architecture? - if (vector_width_in_bytes() == 0) return; - init(); // initialize data structures set_lpt(lpt); set_lp(cl); - // For now, define one block which is the entire loop body + // For now, define one block which is the entire loop body set_bb(cl); assert(_packset.length() == 0, "packset must be empty"); @@ -177,7 +178,7 @@ Node_List memops; for (int i = 0; i < _block.length(); i++) { Node* n = _block.at(i); - if (n->is_Mem() && in_bb(n) && + if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) && is_java_primitive(n->as_Mem()->memory_type())) { int align = memory_alignment(n->as_Mem(), 0); if (align != bottom_align) { @@ -185,55 +186,134 @@ } } } - if (memops.size() == 0) return; - // Find a memory reference to align to. The pre-loop trip count - // is modified to align this reference to a vector-aligned address - find_align_to_ref(memops); - if (align_to_ref() == NULL) return; + Node_List align_to_refs; + const Type* best_vt = NULL; + int best_iv_adjustment = 0; + MemNode* best_align_to_mem_ref = NULL; - SWPointer align_to_ref_p(align_to_ref(), this); - int offset = align_to_ref_p.offset_in_bytes(); - int scale = align_to_ref_p.scale_in_bytes(); - int vw = vector_width_in_bytes(); - int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; - int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; + while (memops.size() != 0) { + // Find a memory reference to align to. + MemNode* mem_ref = find_align_to_ref(memops); + if (mem_ref == NULL) break; + align_to_refs.push(mem_ref); + const Type* vt = velt_type(mem_ref); + int iv_adjustment = get_iv_adjustment(mem_ref); -#ifndef PRODUCT - if (TraceSuperWord) - tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d scale = %d iv_stride = %d", - offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride()); -#endif + if (best_align_to_mem_ref == NULL) { + // Set memory reference which is the best from all memory operations + // to be used for alignment. The pre-loop trip count is modified to align + // this reference to a vector-aligned address. + best_vt = vt; + best_align_to_mem_ref = mem_ref; + best_iv_adjustment = iv_adjustment; + } - // Set alignment relative to "align_to_ref" - for (int i = memops.size() - 1; i >= 0; i--) { - MemNode* s = memops.at(i)->as_Mem(); - SWPointer p2(s, this); - if (p2.comparable(align_to_ref_p)) { - int align = memory_alignment(s, iv_adjustment); - set_alignment(s, align); - } else { - memops.remove(i); + SWPointer align_to_ref_p(mem_ref, this); + // Set alignment relative to "align_to_ref" for all related memory operations. + for (int i = memops.size() - 1; i >= 0; i--) { + MemNode* s = memops.at(i)->as_Mem(); + if (isomorphic(s, mem_ref)) { + SWPointer p2(s, this); + if (p2.comparable(align_to_ref_p)) { + int align = memory_alignment(s, iv_adjustment); + set_alignment(s, align); + } + } } - } - // Create initial pack pairs of memory operations - for (uint i = 0; i < memops.size(); i++) { - Node* s1 = memops.at(i); - for (uint j = 0; j < memops.size(); j++) { - Node* s2 = memops.at(j); - if (s1 != s2 && are_adjacent_refs(s1, s2)) { + // Create initial pack pairs of memory operations for which + // alignment is set and vectors will be aligned. + bool create_pack = true; + if (memory_alignment(mem_ref, best_iv_adjustment) != 0) { + if (vt == best_vt) { + // Can't allow vectorization of unaligned memory accesses with the + // same type since it could be overlapped accesses to the same array. + create_pack = false; + } else { + // Allow independent (different type) unaligned memory operations + // if HW supports them. + if (!Matcher::misaligned_vectors_ok()) { + create_pack = false; + } else { + // Check if packs of the same memory type but + // with a different alignment were created before. + for (uint i = 0; i < align_to_refs.size(); i++) { + MemNode* mr = align_to_refs.at(i)->as_Mem(); + if (velt_type(mr) == vt && memory_alignment(mr, iv_adjustment) != 0) + create_pack = false; + } + } + } + } + if (create_pack) { + for (uint i = 0; i < memops.size(); i++) { + Node* s1 = memops.at(i); int align = alignment(s1); - if (stmts_can_pack(s1, s2, align)) { - Node_List* pair = new Node_List(); - pair->push(s1); - pair->push(s2); - _packset.append(pair); + if (align == top_align) continue; + for (uint j = 0; j < memops.size(); j++) { + Node* s2 = memops.at(j); + if (alignment(s2) == top_align) continue; + if (s1 != s2 && are_adjacent_refs(s1, s2)) { + if (stmts_can_pack(s1, s2, align)) { + Node_List* pair = new Node_List(); + pair->push(s1); + pair->push(s2); + _packset.append(pair); + } + } } } + } else { // Don't create unaligned pack + // First, remove remaining memory ops of the same type from the list. + for (int i = memops.size() - 1; i >= 0; i--) { + MemNode* s = memops.at(i)->as_Mem(); + if (velt_type(s) == vt) { + memops.remove(i); + } + } + + // Second, removed already constructed packs of the same type. + for (int i = _packset.length() - 1; i >= 0; i--) { + Node_List* p = _packset.at(i); + MemNode* s = p->at(0)->as_Mem(); + if (velt_type(s) == vt) { + remove_pack_at(i); + } + } + + // If needed find the best memory reference for loop alignment again. + if (best_vt == vt) { + // Put memory ops from remaining packs back on memops list for + // the best alignment search. + uint orig_msize = memops.size(); + for (int i = 0; i < _packset.length(); i++) { + Node_List* p = _packset.at(i); + MemNode* s = p->at(0)->as_Mem(); + assert(velt_type(s) != vt, "sanity"); + memops.push(s); + } + MemNode* best_align_to_mem_ref = find_align_to_ref(memops); + if (best_align_to_mem_ref == NULL) break; + best_vt = velt_type(best_align_to_mem_ref); + best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); + // Restore list. + while (memops.size() > orig_msize) + (void)memops.pop(); + } + } // unaligned memory accesses + + // Remove used mem nodes + for (int i = memops.size() - 1; i >= 0; i--) { + MemNode* m = memops.at(i)->as_Mem(); + if (alignment(m) != top_align) { + memops.remove(i); + } } - } + } // while (memops.size() != 0 + set_align_to_ref(best_align_to_mem_ref); + #ifndef PRODUCT if (TraceSuperWord) { tty->print_cr("\nAfter find_adjacent_refs"); @@ -246,7 +326,7 @@ // Find a memory reference to align the loop induction variable to. // Looks first at stores then at loads, looking for a memory reference // with the largest number of references similar to it. -void SuperWord::find_align_to_ref(Node_List &memops) { +MemNode* SuperWord::find_align_to_ref(Node_List &memops) { GrowableArray cmp_ct(arena(), memops.size(), memops.size(), 0); // Count number of comparable memory ops @@ -270,8 +350,10 @@ } } - // Find Store (or Load) with the greatest number of "comparable" references + // Find Store (or Load) with the greatest number of "comparable" references, + // biggest vector size, smallest data size and smallest iv offset. int max_ct = 0; + int max_vw = 0; int max_idx = -1; int min_size = max_jint; int min_iv_offset = max_jint; @@ -278,12 +360,18 @@ for (uint j = 0; j < memops.size(); j++) { MemNode* s = memops.at(j)->as_Mem(); if (s->is_Store()) { + int vw = vector_width_in_bytes(velt_basic_type(s)); + assert(vw > 1, "sanity"); SWPointer p(s, this); - if (cmp_ct.at(j) > max_ct || - cmp_ct.at(j) == max_ct && (data_size(s) < min_size || - data_size(s) == min_size && - p.offset_in_bytes() < min_iv_offset)) { + if (cmp_ct.at(j) > max_ct || + cmp_ct.at(j) == max_ct && + (vw > max_vw || + vw == max_vw && + (data_size(s) < min_size || + data_size(s) == min_size && + (p.offset_in_bytes() < min_iv_offset)))) { max_ct = cmp_ct.at(j); + max_vw = vw; max_idx = j; min_size = data_size(s); min_iv_offset = p.offset_in_bytes(); @@ -295,12 +383,18 @@ for (uint j = 0; j < memops.size(); j++) { MemNode* s = memops.at(j)->as_Mem(); if (s->is_Load()) { + int vw = vector_width_in_bytes(velt_basic_type(s)); + assert(vw > 1, "sanity"); SWPointer p(s, this); - if (cmp_ct.at(j) > max_ct || - cmp_ct.at(j) == max_ct && (data_size(s) < min_size || - data_size(s) == min_size && - p.offset_in_bytes() < min_iv_offset)) { + if (cmp_ct.at(j) > max_ct || + cmp_ct.at(j) == max_ct && + (vw > max_vw || + vw == max_vw && + (data_size(s) < min_size || + data_size(s) == min_size && + (p.offset_in_bytes() < min_iv_offset)))) { max_ct = cmp_ct.at(j); + max_vw = vw; max_idx = j; min_size = data_size(s); min_iv_offset = p.offset_in_bytes(); @@ -309,10 +403,7 @@ } } - if (max_ct > 0) - set_align_to_ref(memops.at(max_idx)->as_Mem()); - -#ifndef PRODUCT +#ifdef ASSERT if (TraceSuperWord && Verbose) { tty->print_cr("\nVector memops after find_align_to_refs"); for (uint i = 0; i < memops.size(); i++) { @@ -321,6 +412,17 @@ } } #endif + + if (max_ct > 0) { +#ifdef ASSERT + if (TraceSuperWord) { + tty->print("\nVector align to node: "); + memops.at(max_idx)->as_Mem()->dump(); + } +#endif + return memops.at(max_idx)->as_Mem(); + } + return NULL; } //------------------------------ref_is_alignable--------------------------- @@ -341,7 +443,9 @@ // If initial offset from start of object is computable, // compute alignment within the vector. - int vw = vector_width_in_bytes(); + BasicType bt = velt_basic_type(p.mem()); + int vw = vector_width_in_bytes(bt); + assert(vw > 1, "sanity"); if (vw % span == 0) { Node* init_nd = pre_end->init_trip(); if (init_nd->is_Con() && p.invar() == NULL) { @@ -361,6 +465,26 @@ return false; } +//---------------------------get_iv_adjustment--------------------------- +// Calculate loop's iv adjustment for this memory ops. +int SuperWord::get_iv_adjustment(MemNode* mem_ref) { + SWPointer align_to_ref_p(mem_ref, this); + int offset = align_to_ref_p.offset_in_bytes(); + int scale = align_to_ref_p.scale_in_bytes(); + BasicType bt = velt_basic_type(mem_ref); + int vw = vector_width_in_bytes(bt); + assert(vw > 1, "sanity"); + int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; + int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; + +#ifndef PRODUCT + if (TraceSuperWord) + tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", + offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw); +#endif + return iv_adjustment; +} + //---------------------------dependence_graph--------------------------- // Construct dependency graph. // Add dependence edges to load/store nodes for memory dependence @@ -488,9 +612,13 @@ bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { // Do not use superword for non-primitives - if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) || - (s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type()))) + BasicType bt1 = velt_basic_type(s1); + BasicType bt2 = velt_basic_type(s2); + if(!is_java_primitive(bt1) || !is_java_primitive(bt2)) return false; + if (Matcher::max_vector_size(bt1) < 2) { + return false; // No vectors for this type + } if (isomorphic(s1, s2)) { if (independent(s1, s2)) { @@ -595,14 +723,16 @@ //------------------------------set_alignment--------------------------- void SuperWord::set_alignment(Node* s1, Node* s2, int align) { set_alignment(s1, align); - set_alignment(s2, align + data_size(s1)); + if (align == top_align || align == bottom_align) { + set_alignment(s2, align); + } else { + set_alignment(s2, align + data_size(s1)); + } } //------------------------------data_size--------------------------- int SuperWord::data_size(Node* s) { - const Type* t = velt_type(s); - BasicType bt = t->array_element_basic_type(); - int bsize = type2aelembytes(bt); + int bsize = type2aelembytes(velt_basic_type(s)); assert(bsize != 0, "valid size"); return bsize; } @@ -631,9 +761,9 @@ //------------------------------follow_use_defs--------------------------- // Extend the packset by visiting operand definitions of nodes in pack p bool SuperWord::follow_use_defs(Node_List* p) { + assert(p->size() == 2, "just checking"); Node* s1 = p->at(0); Node* s2 = p->at(1); - assert(p->size() == 2, "just checking"); assert(s1->req() == s2->req(), "just checking"); assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); @@ -718,7 +848,12 @@ for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; if (i1 != i2) { - return false; + if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) { + // Further analysis relies on operands position matching. + u2->swap_edges(i1, i2); + } else { + return false; + } } } while (i1 < ct); return true; @@ -727,7 +862,7 @@ //------------------------------est_savings--------------------------- // Estimate the savings from executing s1 and s2 as a pack int SuperWord::est_savings(Node* s1, Node* s2) { - int save = 2 - 1; // 2 operations per instruction in packed form + int save_in = 2 - 1; // 2 operations per instruction in packed form // inputs for (uint i = 1; i < s1->req(); i++) { @@ -735,11 +870,11 @@ Node* x2 = s2->in(i); if (x1 != x2) { if (are_adjacent_refs(x1, x2)) { - save += adjacent_profit(x1, x2); + save_in += adjacent_profit(x1, x2); } else if (!in_packset(x1, x2)) { - save -= pack_cost(2); + save_in -= pack_cost(2); } else { - save += unpack_cost(2); + save_in += unpack_cost(2); } } } @@ -746,6 +881,7 @@ // uses of result uint ct = 0; + int save_use = 0; for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { Node* s1_use = s1->fast_out(i); for (int j = 0; j < _packset.length(); j++) { @@ -756,7 +892,7 @@ if (p->at(p->size()-1) == s2_use) { ct++; if (are_adjacent_refs(s1_use, s2_use)) { - save += adjacent_profit(s1_use, s2_use); + save_use += adjacent_profit(s1_use, s2_use); } } } @@ -764,10 +900,10 @@ } } - if (ct < s1->outcnt()) save += unpack_cost(1); - if (ct < s2->outcnt()) save += unpack_cost(1); + if (ct < s1->outcnt()) save_use += unpack_cost(1); + if (ct < s2->outcnt()) save_use += unpack_cost(1); - return save; + return MAX2(save_in, save_use); } //------------------------------costs--------------------------- @@ -778,8 +914,9 @@ //------------------------------combine_packs--------------------------- // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last void SuperWord::combine_packs() { - bool changed; - do { + bool changed = true; + // Combine packs regardless max vector size. + while (changed) { changed = false; for (int i = 0; i < _packset.length(); i++) { Node_List* p1 = _packset.at(i); @@ -787,6 +924,7 @@ for (int j = 0; j < _packset.length(); j++) { Node_List* p2 = _packset.at(j); if (p2 == NULL) continue; + if (i == j) continue; if (p1->at(p1->size()-1) == p2->at(0)) { for (uint k = 1; k < p2->size(); k++) { p1->push(p2->at(k)); @@ -796,8 +934,39 @@ } } } - } while (changed); + } + // Split packs which have size greater then max vector size. + for (int i = 0; i < _packset.length(); i++) { + Node_List* p1 = _packset.at(i); + if (p1 != NULL) { + BasicType bt = velt_basic_type(p1->at(0)); + uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector + assert(is_power_of_2(max_vlen), "sanity"); + uint psize = p1->size(); + if (!is_power_of_2(psize)) { + // Skip pack which can't be vector. + // case1: for(...) { a[i] = i; } elements values are different (i+x) + // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store + _packset.at_put(i, NULL); + continue; + } + if (psize > max_vlen) { + Node_List* pack = new Node_List(); + for (uint j = 0; j < psize; j++) { + pack->push(p1->at(j)); + if (pack->size() >= max_vlen) { + assert(is_power_of_2(pack->size()), "sanity"); + _packset.append(pack); + pack = new Node_List(); + } + } + _packset.at_put(i, NULL); + } + } + } + + // Compress list. for (int i = _packset.length() - 1; i >= 0; i--) { Node_List* p1 = _packset.at(i); if (p1 == NULL) { @@ -880,8 +1049,7 @@ // Can code be generated for pack p? bool SuperWord::implemented(Node_List* p) { Node* p0 = p->at(0); - int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0)); - return vopc > 0 && Matcher::has_match_rule(vopc); + return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0)); } //------------------------------profitable--------------------------- @@ -939,37 +1107,42 @@ } //-------------------------------remove_and_insert------------------- -//remove "current" from its current position in the memory graph and insert -//it after the appropriate insertion point (lip or uip) +// Remove "current" from its current position in the memory graph and insert +// it after the appropriate insertion point (lip or uip). void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, Node *uip, Unique_Node_List &sched_before) { Node* my_mem = current->in(MemNode::Memory); - _igvn.hash_delete(current); - _igvn.hash_delete(my_mem); + bool sched_up = sched_before.member(current); - //remove current_store from its current position in the memmory graph + // remove current_store from its current position in the memmory graph for (DUIterator i = current->outs(); current->has_out(i); i++) { Node* use = current->out(i); if (use->is_Mem()) { assert(use->in(MemNode::Memory) == current, "must be"); - _igvn.hash_delete(use); if (use == prev) { // connect prev to my_mem - use->set_req(MemNode::Memory, my_mem); + _igvn.hash_delete(use); + use->set_req(MemNode::Memory, my_mem); + _igvn._worklist.push(use); + --i; //deleted this edge; rescan position } else if (sched_before.member(use)) { - _igvn.hash_delete(uip); - use->set_req(MemNode::Memory, uip); + if (!sched_up) { // Will be moved together with current + _igvn.hash_delete(use); + use->set_req(MemNode::Memory, uip); + _igvn._worklist.push(use); + --i; //deleted this edge; rescan position + } } else { - _igvn.hash_delete(lip); - use->set_req(MemNode::Memory, lip); + if (sched_up) { // Will be moved together with current + _igvn.hash_delete(use); + use->set_req(MemNode::Memory, lip); + _igvn._worklist.push(use); + --i; //deleted this edge; rescan position + } } - _igvn._worklist.push(use); - --i; //deleted this edge; rescan position } } - bool sched_up = sched_before.member(current); Node *insert_pt = sched_up ? uip : lip; - _igvn.hash_delete(insert_pt); // all uses of insert_pt's memory state should use current's instead for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { @@ -982,10 +1155,10 @@ --i; //deleted this edge; rescan position } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) { uint pos; //lip (lower insert point) must be the last one in the memory slice - _igvn.hash_delete(use); for (pos=1; pos < use->req(); pos++) { if (use->in(pos) == insert_pt) break; } + _igvn.hash_delete(use); use->set_req(pos, current); _igvn._worklist.push(use); --i; @@ -993,6 +1166,7 @@ } //connect current to insert_pt + _igvn.hash_delete(current); current->set_req(MemNode::Memory, insert_pt); _igvn._worklist.push(current); } @@ -1031,7 +1205,7 @@ if (use->is_Mem() && use != previous) memops.push(use); } - if(current == first) break; + if (current == first) break; previous = current; current = current->in(MemNode::Memory)->as_Mem(); } @@ -1044,15 +1218,16 @@ Node *s2 = memops.at(j); if (!independent(s1, s2)) { if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { - schedule_before_pack.push(s1); //s1 must be scheduled before + schedule_before_pack.push(s1); // s1 must be scheduled before Node_List* mem_pk = my_pack(s1); if (mem_pk != NULL) { for (uint ii = 0; ii < mem_pk->size(); ii++) { - Node* s = mem_pk->at(ii); // follow partner + Node* s = mem_pk->at(ii); // follow partner if (memops.member(s) && !schedule_before_pack.member(s)) schedule_before_pack.push(s); } } + break; } } } @@ -1059,12 +1234,21 @@ } } - MemNode* lower_insert_pt = last; Node* upper_insert_pt = first->in(MemNode::Memory); + // Following code moves loads connected to upper_insert_pt below aliased stores. + // Collect such loads here and reconnect them back to upper_insert_pt later. + memops.clear(); + for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) { + Node* use = upper_insert_pt->out(i); + if (!use->is_Store()) + memops.push(use); + } + + MemNode* lower_insert_pt = last; previous = last; //previous store in pk current = last->in(MemNode::Memory)->as_Mem(); - //start scheduling from "last" to "first" + // start scheduling from "last" to "first" while (true) { assert(in_bb(current), "stay in block"); assert(in_pack(previous, pk), "previous stays in pack"); @@ -1072,7 +1256,6 @@ if (in_pack(current, pk)) { // Forward users of my memory state (except "previous) to my input memory state - _igvn.hash_delete(current); for (DUIterator i = current->outs(); current->has_out(i); i++) { Node* use = current->out(i); if (use->is_Mem() && use != previous) { @@ -1079,10 +1262,8 @@ assert(use->in(MemNode::Memory) == current, "must be"); _igvn.hash_delete(use); if (schedule_before_pack.member(use)) { - _igvn.hash_delete(upper_insert_pt); use->set_req(MemNode::Memory, upper_insert_pt); } else { - _igvn.hash_delete(lower_insert_pt); use->set_req(MemNode::Memory, lower_insert_pt); } _igvn._worklist.push(use); @@ -1097,6 +1278,16 @@ if (current == first) break; current = my_mem->as_Mem(); } // end while + + // Reconect loads back to upper_insert_pt. + for (uint i = 0; i < memops.size(); i++) { + Node *ld = memops.at(i); + if (ld->in(MemNode::Memory) != upper_insert_pt) { + _igvn.hash_delete(ld); + ld->set_req(MemNode::Memory, upper_insert_pt); + _igvn._worklist.push(ld); + } + } } else if (pk->at(0)->is_Load()) { //load // all loads in the pack should have the same memory state. By default, // we use the memory state of the last load. However, if any load could @@ -1159,35 +1350,30 @@ Node* vn = NULL; Node* low_adr = p->at(0); Node* first = executed_first(p); + int opc = n->Opcode(); if (n->is_Load()) { - int opc = n->Opcode(); Node* ctl = n->in(MemNode::Control); Node* mem = first->in(MemNode::Memory); Node* adr = low_adr->in(MemNode::Address); const TypePtr* atyp = n->adr_type(); - vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen); - + vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n)); } else if (n->is_Store()) { // Promote value to be stored to vector Node* val = vector_opd(p, MemNode::ValueIn); - - int opc = n->Opcode(); Node* ctl = n->in(MemNode::Control); Node* mem = first->in(MemNode::Memory); Node* adr = low_adr->in(MemNode::Address); const TypePtr* atyp = n->adr_type(); - vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); - + vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); } else if (n->req() == 3) { // Promote operands to vector Node* in1 = vector_opd(p, 1); Node* in2 = vector_opd(p, 2); - vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n)); - + vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); } else { ShouldNotReachHere(); } - + assert(vn != NULL, "sanity"); _phase->_igvn.register_new_node_with_optimizer(vn); _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); for (uint j = 0; j < p->size(); j++) { @@ -1195,6 +1381,12 @@ _igvn.replace_node(pm, vn); } _igvn._worklist.push(vn); +#ifdef ASSERT + if (TraceSuperWord) { + tty->print("\nnew Vector node: "); + vn->dump(); + } +#endif } } } @@ -1217,10 +1409,10 @@ } if (same_opd) { - if (opd->is_Vector() || opd->is_VectorLoad()) { + if (opd->is_Vector() || opd->is_LoadVector()) { return opd; // input is matching vector } - assert(!opd->is_VectorStore(), "such vector is not expected here"); + assert(!opd->is_StoreVector(), "such vector is not expected here"); // Convert scalar input to vector with the same number of elements as // p0's vector. Use p0's type because size of operand's container in // vector should match p0's size regardless operand's size. @@ -1233,8 +1425,8 @@ } // Insert pack operation - const Type* p0_t = velt_type(p0); - PackNode* pk = PackNode::make(_phase->C, opd, p0_t); + BasicType bt = velt_basic_type(p0); + PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt); DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); ) for (uint i = 1; i < vlen; i++) { @@ -1242,7 +1434,7 @@ Node* in = pi->in(opd_idx); assert(my_pack(in) == NULL, "Should already have been unpacked"); assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); - pk->add_opd(in); + pk->add_opd(i, in); } _phase->_igvn.register_new_node_with_optimizer(pk); _phase->set_ctrl(pk, _phase->get_ctrl(opd)); @@ -1284,9 +1476,8 @@ _igvn.hash_delete(def); _igvn.hash_delete(use); int def_pos = alignment(def) / data_size(def); - const Type* def_t = velt_type(def); - Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t); + Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def)); _phase->_igvn.register_new_node_with_optimizer(ex); _phase->set_ctrl(ex, _phase->get_ctrl(def)); use->set_req(idx, ex); @@ -1294,7 +1485,7 @@ _igvn._worklist.push(use); bb_insert_after(ex, bb_idx(def)); - set_velt_type(ex, def_t); + set_velt_type(ex, velt_type(def)); } } @@ -1587,10 +1778,14 @@ if (!p.valid()) { return bottom_align; } + int vw = vector_width_in_bytes(velt_basic_type(s)); + if (vw < 2) { + return bottom_align; // No vectors for this type + } int offset = p.offset_in_bytes(); offset += iv_adjust_in_bytes; - int off_rem = offset % vector_width_in_bytes(); - int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes(); + int off_rem = offset % vw; + int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; return off_mod; } @@ -1615,7 +1810,8 @@ // (Start, end] half-open range defining which operands are vector void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) { switch (n->Opcode()) { - case Op_LoadB: case Op_LoadUS: + case Op_LoadB: case Op_LoadUB: + case Op_LoadS: case Op_LoadUS: case Op_LoadI: case Op_LoadL: case Op_LoadF: case Op_LoadD: case Op_LoadP: @@ -1733,6 +1929,7 @@ assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); SWPointer align_to_ref_p(align_to_ref, this); + assert(align_to_ref_p.valid(), "sanity"); // Given: // lim0 == original pre loop limit @@ -1785,10 +1982,12 @@ // N = (V - (e - lim0)) % V // lim = lim0 - (V - (e - lim0)) % V + int vw = vector_width_in_bytes(velt_basic_type(align_to_ref)); + assert(vw > 1, "sanity"); int stride = iv_stride(); int scale = align_to_ref_p.scale_in_bytes(); int elt_size = align_to_ref_p.memory_size(); - int v_align = vector_width_in_bytes() / elt_size; + int v_align = vw / elt_size; int k = align_to_ref_p.offset_in_bytes() / elt_size; Node *kn = _igvn.intcon(k); @@ -1807,6 +2006,25 @@ } _phase->_igvn.register_new_node_with_optimizer(e); _phase->set_ctrl(e, pre_ctrl); + } + if (vw > ObjectAlignmentInBytes) { + // incorporate base e +/- base && Mask >>> log2(elt) + Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw))); + Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); + _phase->_igvn.register_new_node_with_optimizer(xbase); + Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask); + _phase->_igvn.register_new_node_with_optimizer(masked_xbase); +#ifdef _LP64 + masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase); + _phase->_igvn.register_new_node_with_optimizer(masked_xbase); +#endif + Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); + Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); + _phase->_igvn.register_new_node_with_optimizer(bref); + _phase->set_ctrl(bref, pre_ctrl); + e = new (_phase->C, 3) AddINode(e, bref); + _phase->_igvn.register_new_node_with_optimizer(e); + _phase->set_ctrl(e, pre_ctrl); } // compute e +/- lim0 --- old/src/share/vm/opto/superword.hpp Sat Jun 2 20:04:21 2012 +++ new/src/share/vm/opto/superword.hpp Sat Jun 2 20:04:21 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -264,7 +264,10 @@ _iv = lp->as_CountedLoop()->phi()->as_Phi(); } int iv_stride() { return lp()->as_CountedLoop()->stride_con(); } - int vector_width_in_bytes() { return Matcher::vector_width_in_bytes(); } + int vector_width_in_bytes(BasicType bt) { + return MIN2(ABS(iv_stride())*type2aelembytes(bt), + Matcher::vector_width_in_bytes(bt)); + } MemNode* align_to_ref() { return _align_to_ref; } void set_align_to_ref(MemNode* m) { _align_to_ref = m; } @@ -298,6 +301,7 @@ // vector element type const Type* velt_type(Node* n) { return _node_info.adr_at(bb_idx(n))->_velt_type; } + BasicType velt_basic_type(Node* n) { return velt_type(n)->array_element_basic_type(); } void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; } // my_pack @@ -311,7 +315,9 @@ // Find the adjacent memory references and create pack pairs for them. void find_adjacent_refs(); // Find a memory reference to align the loop induction variable to. - void find_align_to_ref(Node_List &memops); + MemNode* find_align_to_ref(Node_List &memops); + // Calculate loop's iv adjustment for this memory ops. + int get_iv_adjustment(MemNode* mem); // Can the preloop align the reference to position zero in the vector? bool ref_is_alignable(SWPointer& p); // Construct dependency graph. @@ -462,6 +468,7 @@ Node* base() { return _base; } Node* adr() { return _adr; } + MemNode* mem() { return _mem; } int scale_in_bytes() { return _scale; } Node* invar() { return _invar; } bool negate_invar() { return _negate_invar; } --- old/src/share/vm/opto/type.cpp Sat Jun 2 20:04:21 2012 +++ new/src/share/vm/opto/type.cpp Sat Jun 2 20:04:21 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -60,6 +60,10 @@ T_ILLEGAL, // Tuple T_ARRAY, // Array + T_ILLEGAL, // VectorS + T_ILLEGAL, // VectorD + T_ILLEGAL, // VectorX + T_ILLEGAL, // VectorY T_ADDRESS, // AnyPtr // shows up in factory methods for NULL_PTR T_ADDRESS, // RawPtr @@ -414,6 +418,24 @@ // get_zero_type() should not happen for T_CONFLICT _zero_type[T_CONFLICT]= NULL; + // Vector predefined types, it needs initialized _const_basic_type[]. + if (Matcher::vector_size_supported(T_BYTE,4)) { + TypeVect::VECTS = TypeVect::make(T_BYTE,4); + } + if (Matcher::vector_size_supported(T_FLOAT,2)) { + TypeVect::VECTD = TypeVect::make(T_FLOAT,2); + } + if (Matcher::vector_size_supported(T_FLOAT,4)) { + TypeVect::VECTX = TypeVect::make(T_FLOAT,4); + } + if (Matcher::vector_size_supported(T_FLOAT,8)) { + TypeVect::VECTY = TypeVect::make(T_FLOAT,8); + } + mreg2type[Op_VecS] = TypeVect::VECTS; + mreg2type[Op_VecD] = TypeVect::VECTD; + mreg2type[Op_VecX] = TypeVect::VECTX; + mreg2type[Op_VecY] = TypeVect::VECTY; + // Restore working type arena. current->set_type_arena(save); current->set_type_dict(NULL); @@ -668,6 +690,10 @@ Bad, // Tuple - handled in v-call Bad, // Array - handled in v-call + Bad, // VectorS - handled in v-call + Bad, // VectorD - handled in v-call + Bad, // VectorX - handled in v-call + Bad, // VectorY - handled in v-call Bad, // AnyPtr - handled in v-call Bad, // RawPtr - handled in v-call @@ -728,8 +754,8 @@ //------------------------------data------------------------------------------- const char * const Type::msg[Type::lastype] = { "bad","control","top","int:","long:","half", "narrowoop:", - "tuple:", "aryptr", - "anyptr:", "rawptr:", "java:", "inst:", "ary:", "klass:", + "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:", + "anyptr:", "rawptr:", "java:", "inst:", "aryptr:", "klass:", "func", "abIO", "return_address", "memory", "float_top", "ftcon:", "float", "double_top", "dblcon:", "double", @@ -790,7 +816,7 @@ //------------------------------isa_oop_ptr------------------------------------ // Return true if type is an oop pointer type. False for raw pointers. static char isa_oop_ptr_tbl[Type::lastype] = { - 0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*ary*/, + 0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*array*/, 0, 0, 0, 0/*vector*/, 0/*anyptr*/,0/*rawptr*/,1/*OopPtr*/,1/*InstPtr*/,1/*AryPtr*/,1/*KlassPtr*/, 0/*func*/,0,0/*return_address*/,0, /*floats*/0,0,0, /*doubles*/0,0,0, @@ -1926,6 +1952,121 @@ return false; } +//==============================TypeVect======================================= +// Convenience common pre-built types. +const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors +const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors +const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors +const TypeVect *TypeVect::VECTY = NULL; // 256-bit vectors + +//------------------------------make------------------------------------------- +const TypeVect* TypeVect::make(const Type *elem, uint length) { + BasicType elem_bt = elem->array_element_basic_type(); + assert(is_java_primitive(elem_bt), "only primitive types in vector"); + assert(length > 1 && is_power_of_2(length), "vector length is power of 2"); + assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); + int size = length * type2aelembytes(elem_bt); + switch (Matcher::vector_ideal_reg(size)) { + case Op_VecS: + return (TypeVect*)(new TypeVectS(elem, length))->hashcons(); + case Op_VecD: + case Op_RegD: + return (TypeVect*)(new TypeVectD(elem, length))->hashcons(); + case Op_VecX: + return (TypeVect*)(new TypeVectX(elem, length))->hashcons(); + case Op_VecY: + return (TypeVect*)(new TypeVectY(elem, length))->hashcons(); + } + ShouldNotReachHere(); + return NULL; +} + +//------------------------------meet------------------------------------------- +// Compute the MEET of two types. It returns a new Type object. +const Type *TypeVect::xmeet( const Type *t ) const { + // Perform a fast test for common case; meeting the same types together. + if( this == t ) return this; // Meeting same type-rep? + + // Current "this->_base" is Vector + switch (t->base()) { // switch on original type + + case Bottom: // Ye Olde Default + return t; + + default: // All else is a mistake + typerr(t); + + case VectorS: + case VectorD: + case VectorX: + case VectorY: { // Meeting 2 vectors? + const TypeVect* v = t->is_vect(); + assert( base() == v->base(), ""); + assert(length() == v->length(), ""); + assert(element_basic_type() == v->element_basic_type(), ""); + return TypeVect::make(_elem->xmeet(v->_elem), _length); + } + case Top: + break; + } + return this; +} + +//------------------------------xdual------------------------------------------ +// Dual: compute field-by-field dual +const Type *TypeVect::xdual() const { + return new TypeVect(base(), _elem->dual(), _length); +} + +//------------------------------eq--------------------------------------------- +// Structural equality check for Type representations +bool TypeVect::eq(const Type *t) const { + const TypeVect *v = t->is_vect(); + return (_elem == v->_elem) && (_length == v->_length); +} + +//------------------------------hash------------------------------------------- +// Type-specific hashing function. +int TypeVect::hash(void) const { + return (intptr_t)_elem + (intptr_t)_length; +} + +//------------------------------singleton-------------------------------------- +// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple +// constants (Ldi nodes). Vector is singleton if all elements are the same +// constant value (when vector is created with Replicate code). +bool TypeVect::singleton(void) const { +// There is no Con node for vectors yet. +// return _elem->singleton(); + return false; +} + +bool TypeVect::empty(void) const { + return _elem->empty(); +} + +//------------------------------dump2------------------------------------------ +#ifndef PRODUCT +void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const { + switch (base()) { + case VectorS: + st->print("vectors["); break; + case VectorD: + st->print("vectord["); break; + case VectorX: + st->print("vectorx["); break; + case VectorY: + st->print("vectory["); break; + default: + ShouldNotReachHere(); + } + st->print("%d]:{", _length); + _elem->dump2(d, depth, st); + st->print("}"); +} +#endif + + //============================================================================= // Convenience common pre-built types. const TypePtr *TypePtr::NULL_PTR; @@ -4140,7 +4281,7 @@ // Print a 'flattened' signature static const char * const flat_type_msg[Type::lastype] = { "bad","control","top","int","long","_", "narrowoop", - "tuple:", "array:", + "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:", "ptr", "rawptr", "ptr", "ptr", "ptr", "ptr", "func", "abIO", "return_address", "mem", "float_top", "ftcon:", "flt", --- old/src/share/vm/opto/type.hpp Sat Jun 2 20:04:22 2012 +++ new/src/share/vm/opto/type.hpp Sat Jun 2 20:04:22 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -51,6 +51,11 @@ class TypeNarrowOop; class TypeAry; class TypeTuple; +class TypeVect; +class TypeVectS; +class TypeVectD; +class TypeVectX; +class TypeVectY; class TypePtr; class TypeRawPtr; class TypeOopPtr; @@ -78,6 +83,10 @@ Tuple, // Method signature or object layout Array, // Array types + VectorS, // 32bit Vector types + VectorD, // 64bit Vector types + VectorX, // 128bit Vector types + VectorY, // 256bit Vector types AnyPtr, // Any old raw, klass, inst, or array pointer RawPtr, // Raw (non-oop) pointers @@ -222,6 +231,8 @@ const TypeF *isa_float_constant() const; // Returns NULL if not a FloatCon const TypeTuple *is_tuple() const; // Collection of fields, NOT a pointer const TypeAry *is_ary() const; // Array, NOT array pointer + const TypeVect *is_vect() const; // Vector + const TypeVect *isa_vect() const; // Returns NULL if not a Vector const TypePtr *is_ptr() const; // Asserts it is a ptr type const TypePtr *isa_ptr() const; // Returns NULL if not ptr type const TypeRawPtr *isa_rawptr() const; // NOT Java oop @@ -574,6 +585,69 @@ #endif }; +//------------------------------TypeVect--------------------------------------- +// Class of Vector Types +class TypeVect : public Type { + const Type* _elem; // Vector's element type + const uint _length; // Elements in vector (power of 2) + +protected: + TypeVect(TYPES t, const Type* elem, uint length) : Type(t), + _elem(elem), _length(length) {} + +public: + const Type* element_type() const { return _elem; } + BasicType element_basic_type() const { return _elem->array_element_basic_type(); } + uint length() const { return _length; } + uint length_in_bytes() const { + return _length * type2aelembytes(element_basic_type()); + } + + virtual bool eq(const Type *t) const; + virtual int hash() const; // Type specific hashing + virtual bool singleton(void) const; // TRUE if type is a singleton + virtual bool empty(void) const; // TRUE if type is vacuous + + static const TypeVect *make(const BasicType elem_bt, uint length) { + // Use bottom primitive type. + return make(get_const_basic_type(elem_bt), length); + } + // Used directly by Replicate nodes to construct singleton vector. + static const TypeVect *make(const Type* elem, uint length); + + virtual const Type *xmeet( const Type *t) const; + virtual const Type *xdual() const; // Compute dual right now. + + static const TypeVect *VECTS; + static const TypeVect *VECTD; + static const TypeVect *VECTX; + static const TypeVect *VECTY; + +#ifndef PRODUCT + virtual void dump2(Dict &d, uint, outputStream *st) const; // Specialized per-Type dumping +#endif +}; + +class TypeVectS : public TypeVect { + friend class TypeVect; + TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {} +}; + +class TypeVectD : public TypeVect { + friend class TypeVect; + TypeVectD(const Type* elem, uint length) : TypeVect(VectorD, elem, length) {} +}; + +class TypeVectX : public TypeVect { + friend class TypeVect; + TypeVectX(const Type* elem, uint length) : TypeVect(VectorX, elem, length) {} +}; + +class TypeVectY : public TypeVect { + friend class TypeVect; + TypeVectY(const Type* elem, uint length) : TypeVect(VectorY, elem, length) {} +}; + //------------------------------TypePtr---------------------------------------- // Class of machine Pointer Types: raw data, instances or arrays. // If the _base enum is AnyPtr, then this refers to all of the above. @@ -1113,6 +1187,15 @@ return (TypeAry*)this; } +inline const TypeVect *Type::is_vect() const { + assert( _base >= VectorS && _base <= VectorY, "Not a Vector" ); + return (TypeVect*)this; +} + +inline const TypeVect *Type::isa_vect() const { + return (_base >= VectorS && _base <= VectorY) ? (TypeVect*)this : NULL; +} + inline const TypePtr *Type::is_ptr() const { // AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between. assert(_base >= AnyPtr && _base <= KlassPtr, "Not a pointer"); --- old/src/share/vm/opto/vectornode.cpp Sat Jun 2 20:04:23 2012 +++ new/src/share/vm/opto/vectornode.cpp Sat Jun 2 20:04:22 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,141 +28,10 @@ //------------------------------VectorNode-------------------------------------- -// Return vector type for an element type and vector length. -const Type* VectorNode::vect_type(BasicType elt_bt, uint len) { - assert(len <= VectorNode::max_vlen(elt_bt), "len in range"); - switch(elt_bt) { - case T_BOOLEAN: - case T_BYTE: - switch(len) { - case 2: return TypeInt::CHAR; - case 4: return TypeInt::INT; - case 8: return TypeLong::LONG; - } - break; - case T_CHAR: - case T_SHORT: - switch(len) { - case 2: return TypeInt::INT; - case 4: return TypeLong::LONG; - } - break; - case T_INT: - switch(len) { - case 2: return TypeLong::LONG; - } - break; - case T_LONG: - break; - case T_FLOAT: - switch(len) { - case 2: return Type::DOUBLE; - } - break; - case T_DOUBLE: - break; - } - ShouldNotReachHere(); - return NULL; -} - -// Scalar promotion -VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) { - BasicType bt = opd_t->array_element_basic_type(); - assert(vlen <= VectorNode::max_vlen(bt), "vlen in range"); - switch (bt) { - case T_BOOLEAN: - case T_BYTE: - if (vlen == 16) return new (C, 2) Replicate16BNode(s); - if (vlen == 8) return new (C, 2) Replicate8BNode(s); - if (vlen == 4) return new (C, 2) Replicate4BNode(s); - break; - case T_CHAR: - if (vlen == 8) return new (C, 2) Replicate8CNode(s); - if (vlen == 4) return new (C, 2) Replicate4CNode(s); - if (vlen == 2) return new (C, 2) Replicate2CNode(s); - break; - case T_SHORT: - if (vlen == 8) return new (C, 2) Replicate8SNode(s); - if (vlen == 4) return new (C, 2) Replicate4SNode(s); - if (vlen == 2) return new (C, 2) Replicate2SNode(s); - break; - case T_INT: - if (vlen == 4) return new (C, 2) Replicate4INode(s); - if (vlen == 2) return new (C, 2) Replicate2INode(s); - break; - case T_LONG: - if (vlen == 2) return new (C, 2) Replicate2LNode(s); - break; - case T_FLOAT: - if (vlen == 4) return new (C, 2) Replicate4FNode(s); - if (vlen == 2) return new (C, 2) Replicate2FNode(s); - break; - case T_DOUBLE: - if (vlen == 2) return new (C, 2) Replicate2DNode(s); - break; - } - ShouldNotReachHere(); - return NULL; -} - -// Return initial Pack node. Additional operands added with add_opd() calls. -PackNode* PackNode::make(Compile* C, Node* s, const Type* opd_t) { - BasicType bt = opd_t->array_element_basic_type(); - switch (bt) { - case T_BOOLEAN: - case T_BYTE: - return new (C, 2) PackBNode(s); - case T_CHAR: - return new (C, 2) PackCNode(s); - case T_SHORT: - return new (C, 2) PackSNode(s); - case T_INT: - return new (C, 2) PackINode(s); - case T_LONG: - return new (C, 2) PackLNode(s); - case T_FLOAT: - return new (C, 2) PackFNode(s); - case T_DOUBLE: - return new (C, 2) PackDNode(s); - } - ShouldNotReachHere(); - return NULL; -} - -// Create a binary tree form for Packs. [lo, hi) (half-open) range -Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) { - int ct = hi - lo; - assert(is_power_of_2(ct), "power of 2"); - int mid = lo + ct/2; - Node* n1 = ct == 2 ? in(lo) : binaryTreePack(C, lo, mid); - Node* n2 = ct == 2 ? in(lo+1) : binaryTreePack(C, mid, hi ); - int rslt_bsize = ct * type2aelembytes(elt_basic_type()); - if (bottom_type()->is_floatingpoint()) { - switch (rslt_bsize) { - case 8: return new (C, 3) PackFNode(n1, n2); - case 16: return new (C, 3) PackDNode(n1, n2); - } - } else { - assert(bottom_type()->isa_int() || bottom_type()->isa_long(), "int or long"); - switch (rslt_bsize) { - case 2: return new (C, 3) Pack2x1BNode(n1, n2); - case 4: return new (C, 3) Pack2x2BNode(n1, n2); - case 8: return new (C, 3) PackINode(n1, n2); - case 16: return new (C, 3) PackLNode(n1, n2); - } - } - ShouldNotReachHere(); - return NULL; -} - // Return the vector operator for the specified scalar operation -// and vector length. One use is to check if the code generator +// and vector length. Also used to check if the code generator // supports the vector operation. -int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) { - BasicType bt = opd_t->array_element_basic_type(); - if (!(is_power_of_2(vlen) && vlen <= max_vlen(bt))) - return 0; // unimplemented +int VectorNode::opcode(int sopc, uint vlen, BasicType bt) { switch (sopc) { case Op_AddI: switch (bt) { @@ -221,13 +90,13 @@ case T_INT: return Op_LShiftVI; } ShouldNotReachHere(); - case Op_URShiftI: + case Op_RShiftI: switch (bt) { case T_BOOLEAN: - case T_BYTE: return Op_URShiftVB; - case T_CHAR: return Op_URShiftVC; - case T_SHORT: return Op_URShiftVS; - case T_INT: return Op_URShiftVI; + case T_BYTE: return Op_RShiftVB; + case T_CHAR: return Op_RShiftVC; + case T_SHORT: return Op_RShiftVS; + case T_INT: return Op_RShiftVI; } ShouldNotReachHere(); case Op_AndI: @@ -247,7 +116,7 @@ case Op_LoadL: case Op_LoadF: case Op_LoadD: - return VectorLoadNode::opcode(sopc, vlen); + return Op_LoadVector; case Op_StoreB: case Op_StoreC: @@ -255,208 +124,172 @@ case Op_StoreL: case Op_StoreF: case Op_StoreD: - return VectorStoreNode::opcode(sopc, vlen); + return Op_StoreVector; } return 0; // Unimplemented } -// Helper for above. -int VectorLoadNode::opcode(int sopc, uint vlen) { - switch (sopc) { - case Op_LoadB: - switch (vlen) { - case 2: return 0; // Unimplemented - case 4: return Op_Load4B; - case 8: return Op_Load8B; - case 16: return Op_Load16B; - } - break; - case Op_LoadUS: - switch (vlen) { - case 2: return Op_Load2C; - case 4: return Op_Load4C; - case 8: return Op_Load8C; - } - break; - case Op_LoadS: - switch (vlen) { - case 2: return Op_Load2S; - case 4: return Op_Load4S; - case 8: return Op_Load8S; - } - break; - case Op_LoadI: - switch (vlen) { - case 2: return Op_Load2I; - case 4: return Op_Load4I; - } - break; - case Op_LoadL: - if (vlen == 2) return Op_Load2L; - break; - case Op_LoadF: - switch (vlen) { - case 2: return Op_Load2F; - case 4: return Op_Load4F; - } - break; - case Op_LoadD: - if (vlen == 2) return Op_Load2D; - break; +bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { + if (is_java_primitive(bt) && + (vlen > 1) && is_power_of_2(vlen) && + Matcher::vector_size_supported(bt, vlen)) { + int vopc = VectorNode::opcode(opc, vlen, bt); + return vopc > 0 && Matcher::has_match_rule(vopc); } - return 0; // Unimplemented + return false; } -// Helper for above -int VectorStoreNode::opcode(int sopc, uint vlen) { - switch (sopc) { - case Op_StoreB: - switch (vlen) { - case 2: return 0; // Unimplemented - case 4: return Op_Store4B; - case 8: return Op_Store8B; - case 16: return Op_Store16B; - } - break; - case Op_StoreC: - switch (vlen) { - case 2: return Op_Store2C; - case 4: return Op_Store4C; - case 8: return Op_Store8C; - } - break; - case Op_StoreI: - switch (vlen) { - case 2: return Op_Store2I; - case 4: return Op_Store4I; - } - break; - case Op_StoreL: - if (vlen == 2) return Op_Store2L; - break; - case Op_StoreF: - switch (vlen) { - case 2: return Op_Store2F; - case 4: return Op_Store4F; - } - break; - case Op_StoreD: - if (vlen == 2) return Op_Store2D; - break; - } - return 0; // Unimplemented -} - // Return the vector version of a scalar operation node. -VectorNode* VectorNode::make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* opd_t) { - int vopc = opcode(sopc, vlen, opd_t); +VectorNode* VectorNode::make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt) { + const TypeVect* vt = TypeVect::make(bt, vlen); + int vopc = VectorNode::opcode(opc, vlen, bt); switch (vopc) { - case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vlen); - case Op_AddVC: return new (C, 3) AddVCNode(n1, n2, vlen); - case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vlen); - case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vlen); - case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vlen); - case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vlen); - case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vlen); + case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vt); + case Op_AddVC: return new (C, 3) AddVCNode(n1, n2, vt); + case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vt); + case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vt); + case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vt); + case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vt); + case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vt); - case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vlen); - case Op_SubVC: return new (C, 3) SubVCNode(n1, n2, vlen); - case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vlen); - case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vlen); - case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vlen); - case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vlen); - case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vlen); + case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vt); + case Op_SubVC: return new (C, 3) SubVCNode(n1, n2, vt); + case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vt); + case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vt); + case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vt); + case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vt); + case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vt); - case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vlen); - case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vlen); + case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vt); + case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vt); - case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vlen); - case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vlen); + case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vt); + case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vt); - case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vlen); - case Op_LShiftVC: return new (C, 3) LShiftVCNode(n1, n2, vlen); - case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vlen); - case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vlen); + case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vt); + case Op_LShiftVC: return new (C, 3) LShiftVCNode(n1, n2, vt); + case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vt); + case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vt); - case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vlen); - case Op_URShiftVC: return new (C, 3) URShiftVCNode(n1, n2, vlen); - case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vlen); - case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vlen); + case Op_RShiftVB: return new (C, 3) RShiftVBNode(n1, n2, vt); + case Op_RShiftVC: return new (C, 3) RShiftVCNode(n1, n2, vt); + case Op_RShiftVS: return new (C, 3) RShiftVSNode(n1, n2, vt); + case Op_RShiftVI: return new (C, 3) RShiftVINode(n1, n2, vt); - case Op_AndV: return new (C, 3) AndVNode(n1, n2, vlen, opd_t->array_element_basic_type()); - case Op_OrV: return new (C, 3) OrVNode (n1, n2, vlen, opd_t->array_element_basic_type()); - case Op_XorV: return new (C, 3) XorVNode(n1, n2, vlen, opd_t->array_element_basic_type()); + case Op_AndV: return new (C, 3) AndVNode(n1, n2, vt); + case Op_OrV: return new (C, 3) OrVNode (n1, n2, vt); + case Op_XorV: return new (C, 3) XorVNode(n1, n2, vt); } ShouldNotReachHere(); return NULL; + } -// Return the vector version of a scalar load node. -VectorLoadNode* VectorLoadNode::make(Compile* C, int opc, Node* ctl, Node* mem, - Node* adr, const TypePtr* atyp, uint vlen) { - int vopc = opcode(opc, vlen); +// Scalar promotion +VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) { + BasicType bt = opd_t->array_element_basic_type(); + const TypeVect* vt = opd_t->singleton() ? TypeVect::make(opd_t, vlen) + : TypeVect::make(bt, vlen); + switch (bt) { + case T_BOOLEAN: + case T_BYTE: + return new (C, 2) ReplicateBNode(s, vt); + case T_CHAR: + return new (C, 2) ReplicateCNode(s, vt); + case T_SHORT: + return new (C, 2) ReplicateSNode(s, vt); + case T_INT: + return new (C, 2) ReplicateINode(s, vt); + case T_LONG: + return new (C, 2) ReplicateLNode(s, vt); + case T_FLOAT: + return new (C, 2) ReplicateFNode(s, vt); + case T_DOUBLE: + return new (C, 2) ReplicateDNode(s, vt); + } + ShouldNotReachHere(); + return NULL; +} - switch(vopc) { - case Op_Load16B: return new (C, 3) Load16BNode(ctl, mem, adr, atyp); - case Op_Load8B: return new (C, 3) Load8BNode(ctl, mem, adr, atyp); - case Op_Load4B: return new (C, 3) Load4BNode(ctl, mem, adr, atyp); +// Return initial Pack node. Additional operands added with add_opd() calls. +PackNode* PackNode::make(Compile* C, Node* s, uint vlen, BasicType bt) { + const TypeVect* vt = TypeVect::make(bt, vlen); + switch (bt) { + case T_BOOLEAN: + case T_BYTE: + return new (C, vlen+1) PackBNode(s, vt); + case T_CHAR: + return new (C, vlen+1) PackCNode(s, vt); + case T_SHORT: + return new (C, vlen+1) PackSNode(s, vt); + case T_INT: + return new (C, vlen+1) PackINode(s, vt); + case T_LONG: + return new (C, vlen+1) PackLNode(s, vt); + case T_FLOAT: + return new (C, vlen+1) PackFNode(s, vt); + case T_DOUBLE: + return new (C, vlen+1) PackDNode(s, vt); + } + ShouldNotReachHere(); + return NULL; +} - case Op_Load8C: return new (C, 3) Load8CNode(ctl, mem, adr, atyp); - case Op_Load4C: return new (C, 3) Load4CNode(ctl, mem, adr, atyp); - case Op_Load2C: return new (C, 3) Load2CNode(ctl, mem, adr, atyp); +// Create a binary tree form for Packs. [lo, hi) (half-open) range +Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) { + int ct = hi - lo; + assert(is_power_of_2(ct), "power of 2"); + if (ct == 2) { + PackNode* pk = PackNode::make(C, in(lo), 2, vect_type()->element_basic_type()); + pk->add_opd(1, in(lo+1)); + return pk; - case Op_Load8S: return new (C, 3) Load8SNode(ctl, mem, adr, atyp); - case Op_Load4S: return new (C, 3) Load4SNode(ctl, mem, adr, atyp); - case Op_Load2S: return new (C, 3) Load2SNode(ctl, mem, adr, atyp); + } else { + int mid = lo + ct/2; + Node* n1 = binaryTreePack(C, lo, mid); + Node* n2 = binaryTreePack(C, mid, hi ); - case Op_Load4I: return new (C, 3) Load4INode(ctl, mem, adr, atyp); - case Op_Load2I: return new (C, 3) Load2INode(ctl, mem, adr, atyp); - - case Op_Load2L: return new (C, 3) Load2LNode(ctl, mem, adr, atyp); - - case Op_Load4F: return new (C, 3) Load4FNode(ctl, mem, adr, atyp); - case Op_Load2F: return new (C, 3) Load2FNode(ctl, mem, adr, atyp); - - case Op_Load2D: return new (C, 3) Load2DNode(ctl, mem, adr, atyp); + BasicType bt = vect_type()->element_basic_type(); + switch (bt) { + case T_BOOLEAN: + case T_BYTE: + return new (C, 3) PackSNode(n1, n2, TypeVect::make(T_SHORT, 2)); + case T_CHAR: + case T_SHORT: + return new (C, 3) PackINode(n1, n2, TypeVect::make(T_INT, 2)); + case T_INT: + return new (C, 3) PackLNode(n1, n2, TypeVect::make(T_LONG, 2)); + case T_LONG: + return new (C, 3) Pack2LNode(n1, n2, TypeVect::make(T_LONG, 2)); + case T_FLOAT: + return new (C, 3) PackDNode(n1, n2, TypeVect::make(T_DOUBLE, 2)); + case T_DOUBLE: + return new (C, 3) Pack2DNode(n1, n2, TypeVect::make(T_DOUBLE, 2)); + } + ShouldNotReachHere(); } - ShouldNotReachHere(); return NULL; } +// Return the vector version of a scalar load node. +LoadVectorNode* LoadVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem, + Node* adr, const TypePtr* atyp, uint vlen, BasicType bt) { + const TypeVect* vt = TypeVect::make(bt, vlen); + return new (C, 3) LoadVectorNode(ctl, mem, adr, atyp, vt); + return NULL; +} + // Return the vector version of a scalar store node. -VectorStoreNode* VectorStoreNode::make(Compile* C, int opc, Node* ctl, Node* mem, +StoreVectorNode* StoreVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem, Node* adr, const TypePtr* atyp, Node* val, uint vlen) { - int vopc = opcode(opc, vlen); - - switch(vopc) { - case Op_Store16B: return new (C, 4) Store16BNode(ctl, mem, adr, atyp, val); - case Op_Store8B: return new (C, 4) Store8BNode(ctl, mem, adr, atyp, val); - case Op_Store4B: return new (C, 4) Store4BNode(ctl, mem, adr, atyp, val); - - case Op_Store8C: return new (C, 4) Store8CNode(ctl, mem, adr, atyp, val); - case Op_Store4C: return new (C, 4) Store4CNode(ctl, mem, adr, atyp, val); - case Op_Store2C: return new (C, 4) Store2CNode(ctl, mem, adr, atyp, val); - - case Op_Store4I: return new (C, 4) Store4INode(ctl, mem, adr, atyp, val); - case Op_Store2I: return new (C, 4) Store2INode(ctl, mem, adr, atyp, val); - - case Op_Store2L: return new (C, 4) Store2LNode(ctl, mem, adr, atyp, val); - - case Op_Store4F: return new (C, 4) Store4FNode(ctl, mem, adr, atyp, val); - case Op_Store2F: return new (C, 4) Store2FNode(ctl, mem, adr, atyp, val); - - case Op_Store2D: return new (C, 4) Store2DNode(ctl, mem, adr, atyp, val); - } - ShouldNotReachHere(); - return NULL; + return new (C, 4) StoreVectorNode(ctl, mem, adr, atyp, val); } // Extract a scalar element of vector. -Node* ExtractNode::make(Compile* C, Node* v, uint position, const Type* opd_t) { - BasicType bt = opd_t->array_element_basic_type(); - assert(position < VectorNode::max_vlen(bt), "pos in range"); +Node* ExtractNode::make(Compile* C, Node* v, uint position, BasicType bt) { + assert((int)position < Matcher::max_vector_size(bt), "pos in range"); ConINode* pos = ConINode::make(C, (int)position); switch (bt) { case T_BOOLEAN: @@ -478,3 +311,4 @@ ShouldNotReachHere(); return NULL; } + --- old/src/share/vm/opto/vectornode.hpp Sat Jun 2 20:04:23 2012 +++ new/src/share/vm/opto/vectornode.hpp Sat Jun 2 20:04:23 2012 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,49 +31,33 @@ //------------------------------VectorNode-------------------------------------- // Vector Operation -class VectorNode : public Node { - virtual uint size_of() const { return sizeof(*this); } - protected: - uint _length; // vector length - virtual BasicType elt_basic_type() const = 0; // Vector element basic type - - static const Type* vect_type(BasicType elt_bt, uint len); - static const Type* vect_type(const Type* elt_type, uint len) { - return vect_type(elt_type->array_element_basic_type(), len); - } - +class VectorNode : public TypeNode { public: - friend class VectorLoadNode; // For vect_type - friend class VectorStoreNode; // ditto. - VectorNode(Node* n1, uint vlen) : Node(NULL, n1), _length(vlen) { + VectorNode(Node* n1, const TypeVect* vt) : TypeNode(vt, 2) { init_class_id(Class_Vector); + init_req(1, n1); } - VectorNode(Node* n1, Node* n2, uint vlen) : Node(NULL, n1, n2), _length(vlen) { + VectorNode(Node* n1, Node* n2, const TypeVect* vt) : TypeNode(vt, 3) { init_class_id(Class_Vector); + init_req(1, n1); + init_req(2, n2); } - virtual int Opcode() const; - uint length() const { return _length; } // Vector length + const TypeVect* vect_type() const { return type()->is_vect(); } + uint length() const { return vect_type()->length(); } // Vector length - static uint max_vlen(BasicType bt) { // max vector length - return (uint)(Matcher::vector_width_in_bytes() / type2aelembytes(bt)); - } + virtual int Opcode() const; - // Element and vector type - const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); } - const Type* vect_type() const { return vect_type(elt_basic_type(), length()); } + virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); } - virtual const Type *bottom_type() const { return vect_type(); } - virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); } - - // Vector opcode from scalar opcode - static int opcode(int sopc, uint vlen, const Type* opd_t); - static VectorNode* scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t); - static VectorNode* make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* elt_t); + static VectorNode* make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt); + static int opcode(int opc, uint vlen, BasicType bt); + static bool implemented(int opc, uint vlen, BasicType bt); + }; //===========================Vector=ALU=Operations==================================== @@ -81,10 +65,8 @@ //------------------------------AddVBNode--------------------------------------- // Vector add byte class AddVBNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } public: - AddVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + AddVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -91,10 +73,8 @@ //------------------------------AddVCNode--------------------------------------- // Vector add char class AddVCNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } public: - AddVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + AddVCNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -101,10 +81,8 @@ //------------------------------AddVSNode--------------------------------------- // Vector add short class AddVSNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } public: - AddVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + AddVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -111,10 +89,8 @@ //------------------------------AddVINode--------------------------------------- // Vector add int class AddVINode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } public: - AddVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + AddVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -121,10 +97,8 @@ //------------------------------AddVLNode--------------------------------------- // Vector add long class AddVLNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_LONG; } public: - AddVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + AddVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -131,10 +105,8 @@ //------------------------------AddVFNode--------------------------------------- // Vector add float class AddVFNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } public: - AddVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + AddVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -141,10 +113,8 @@ //------------------------------AddVDNode--------------------------------------- // Vector add double class AddVDNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_DOUBLE; } public: - AddVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + AddVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -151,10 +121,8 @@ //------------------------------SubVBNode--------------------------------------- // Vector subtract byte class SubVBNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } public: - SubVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + SubVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -161,10 +129,8 @@ //------------------------------SubVCNode--------------------------------------- // Vector subtract char class SubVCNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } public: - SubVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + SubVCNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -171,10 +137,8 @@ //------------------------------SubVSNode--------------------------------------- // Vector subtract short class SubVSNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } public: - SubVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + SubVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -181,10 +145,8 @@ //------------------------------SubVINode--------------------------------------- // Vector subtract int class SubVINode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } public: - SubVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + SubVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -191,10 +153,8 @@ //------------------------------SubVLNode--------------------------------------- // Vector subtract long class SubVLNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_LONG; } public: - SubVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + SubVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -201,10 +161,8 @@ //------------------------------SubVFNode--------------------------------------- // Vector subtract float class SubVFNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } public: - SubVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + SubVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -211,10 +169,8 @@ //------------------------------SubVDNode--------------------------------------- // Vector subtract double class SubVDNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_DOUBLE; } public: - SubVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + SubVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -221,10 +177,8 @@ //------------------------------MulVFNode--------------------------------------- // Vector multiply float class MulVFNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } public: - MulVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + MulVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -231,10 +185,8 @@ //------------------------------MulVDNode--------------------------------------- // Vector multiply double class MulVDNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_DOUBLE; } public: - MulVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + MulVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -241,10 +193,8 @@ //------------------------------DivVFNode--------------------------------------- // Vector divide float class DivVFNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } public: - DivVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + DivVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -251,10 +201,8 @@ //------------------------------DivVDNode--------------------------------------- // Vector Divide double class DivVDNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_DOUBLE; } public: - DivVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + DivVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -261,10 +209,8 @@ //------------------------------LShiftVBNode--------------------------------------- // Vector lshift byte class LShiftVBNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } public: - LShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + LShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -271,10 +217,8 @@ //------------------------------LShiftVCNode--------------------------------------- // Vector lshift chars class LShiftVCNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } public: - LShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + LShiftVCNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -281,10 +225,8 @@ //------------------------------LShiftVSNode--------------------------------------- // Vector lshift shorts class LShiftVSNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } public: - LShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + LShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -291,50 +233,40 @@ //------------------------------LShiftVINode--------------------------------------- // Vector lshift ints class LShiftVINode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } public: - LShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + LShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; //------------------------------URShiftVBNode--------------------------------------- // Vector urshift bytes -class URShiftVBNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } +class RShiftVBNode : public VectorNode { public: - URShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + RShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; //------------------------------URShiftVCNode--------------------------------------- // Vector urshift char -class URShiftVCNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } +class RShiftVCNode : public VectorNode { public: - URShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + RShiftVCNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; //------------------------------URShiftVSNode--------------------------------------- // Vector urshift shorts -class URShiftVSNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } +class RShiftVSNode : public VectorNode { public: - URShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + RShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; //------------------------------URShiftVINode--------------------------------------- // Vector urshift ints -class URShiftVINode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } +class RShiftVINode : public VectorNode { public: - URShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {} + RShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -341,11 +273,8 @@ //------------------------------AndVNode--------------------------------------- // Vector and class AndVNode : public VectorNode { - protected: - BasicType _bt; - virtual BasicType elt_basic_type() const { return _bt; } public: - AndVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {} + AndVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -352,11 +281,8 @@ //------------------------------OrVNode--------------------------------------- // Vector or class OrVNode : public VectorNode { - protected: - BasicType _bt; - virtual BasicType elt_basic_type() const { return _bt; } public: - OrVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {} + OrVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; @@ -363,607 +289,145 @@ //------------------------------XorVNode--------------------------------------- // Vector xor class XorVNode : public VectorNode { - protected: - BasicType _bt; - virtual BasicType elt_basic_type() const { return _bt; } public: - XorVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {} + XorVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; -//================================= M E M O R Y ================================== +//================================= M E M O R Y =============================== - -//------------------------------VectorLoadNode-------------------------------------- -// Vector Load from memory -class VectorLoadNode : public LoadNode { - virtual uint size_of() const { return sizeof(*this); } - - protected: - virtual BasicType elt_basic_type() const = 0; // Vector element basic type - // For use in constructor - static const Type* vect_type(const Type* elt_type, uint len) { - return VectorNode::vect_type(elt_type, len); - } - +//------------------------------LoadVectorNode--------------------------------- +// Load Vector from memory +class LoadVectorNode : public LoadNode { public: - VectorLoadNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *rt) - : LoadNode(c,mem,adr,at,rt) { - init_class_id(Class_VectorLoad); + LoadVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt) + : LoadNode(c, mem, adr, at, vt) { + init_class_id(Class_LoadVector); } - virtual int Opcode() const; - virtual uint length() const = 0; // Vector length + const TypeVect* vect_type() const { return type()->is_vect(); } + uint length() const { return vect_type()->length(); } // Vector length - // Element and vector type - const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); } - const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); } + virtual int Opcode() const; - virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); } + virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(memory_size()); } virtual BasicType memory_type() const { return T_VOID; } - virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); } + virtual int memory_size() const { return vect_type()->length_in_bytes(); } - // Vector opcode from scalar opcode - static int opcode(int sopc, uint vlen); + virtual int store_Opcode() const { return Op_StoreVector; } - static VectorLoadNode* make(Compile* C, int opc, Node* ctl, Node* mem, - Node* adr, const TypePtr* atyp, uint vlen); + static LoadVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem, + Node* adr, const TypePtr* atyp, uint vlen, BasicType bt); }; -//------------------------------Load16BNode-------------------------------------- -// Vector load of 16 bytes (8bits signed) from memory -class Load16BNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } +//------------------------------StoreVectorNode-------------------------------- +// Store Vector to memory +class StoreVectorNode : public StoreNode { public: - Load16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,16)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store16B; } - virtual uint length() const { return 16; } -}; + StoreVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) + : StoreNode(c, mem, adr, at, val) { + assert(val->is_Vector() || val->is_LoadVector(), "sanity"); + init_class_id(Class_StoreVector); + } -//------------------------------Load8BNode-------------------------------------- -// Vector load of 8 bytes (8bits signed) from memory -class Load8BNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } - public: - Load8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store8B; } - virtual uint length() const { return 8; } -}; + const TypeVect* vect_type() const { return in(MemNode::ValueIn)->bottom_type()->is_vect(); } + uint length() const { return vect_type()->length(); } // Vector length -//------------------------------Load4BNode-------------------------------------- -// Vector load of 4 bytes (8bits signed) from memory -class Load4BNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } - public: - Load4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {} virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store4B; } - virtual uint length() const { return 4; } -}; -//------------------------------Load8CNode-------------------------------------- -// Vector load of 8 chars (16bits unsigned) from memory -class Load8CNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } - public: - Load8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store8C; } - virtual uint length() const { return 8; } -}; - -//------------------------------Load4CNode-------------------------------------- -// Vector load of 4 chars (16bits unsigned) from memory -class Load4CNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } - public: - Load4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store4C; } - virtual uint length() const { return 4; } -}; - -//------------------------------Load2CNode-------------------------------------- -// Vector load of 2 chars (16bits unsigned) from memory -class Load2CNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } - public: - Load2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store2C; } - virtual uint length() const { return 2; } -}; - -//------------------------------Load8SNode-------------------------------------- -// Vector load of 8 shorts (16bits signed) from memory -class Load8SNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } - public: - Load8SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store8C; } - virtual uint length() const { return 8; } -}; - -//------------------------------Load4SNode-------------------------------------- -// Vector load of 4 shorts (16bits signed) from memory -class Load4SNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } - public: - Load4SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store4C; } - virtual uint length() const { return 4; } -}; - -//------------------------------Load2SNode-------------------------------------- -// Vector load of 2 shorts (16bits signed) from memory -class Load2SNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } - public: - Load2SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store2C; } - virtual uint length() const { return 2; } -}; - -//------------------------------Load4INode-------------------------------------- -// Vector load of 4 integers (32bits signed) from memory -class Load4INode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } - public: - Load4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store4I; } - virtual uint length() const { return 4; } -}; - -//------------------------------Load2INode-------------------------------------- -// Vector load of 2 integers (32bits signed) from memory -class Load2INode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } - public: - Load2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT) - : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store2I; } - virtual uint length() const { return 2; } -}; - -//------------------------------Load2LNode-------------------------------------- -// Vector load of 2 longs (64bits signed) from memory -class Load2LNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_LONG; } - public: - Load2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong *tl = TypeLong::LONG) - : VectorLoadNode(c,mem,adr,at,vect_type(tl,2)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store2L; } - virtual uint length() const { return 2; } -}; - -//------------------------------Load4FNode-------------------------------------- -// Vector load of 4 floats (32bits) from memory -class Load4FNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } - public: - Load4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT) - : VectorLoadNode(c,mem,adr,at,vect_type(t,4)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store4F; } - virtual uint length() const { return 4; } -}; - -//------------------------------Load2FNode-------------------------------------- -// Vector load of 2 floats (32bits) from memory -class Load2FNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } - public: - Load2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT) - : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store2F; } - virtual uint length() const { return 2; } -}; - -//------------------------------Load2DNode-------------------------------------- -// Vector load of 2 doubles (64bits) from memory -class Load2DNode : public VectorLoadNode { - protected: - virtual BasicType elt_basic_type() const { return T_DOUBLE; } - public: - Load2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::DOUBLE) - : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {} - virtual int Opcode() const; - virtual int store_Opcode() const { return Op_Store2D; } - virtual uint length() const { return 2; } -}; - - -//------------------------------VectorStoreNode-------------------------------------- -// Vector Store to memory -class VectorStoreNode : public StoreNode { - virtual uint size_of() const { return sizeof(*this); } - - protected: - virtual BasicType elt_basic_type() const = 0; // Vector element basic type - - public: - VectorStoreNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : StoreNode(c,mem,adr,at,val) { - init_class_id(Class_VectorStore); - } - virtual int Opcode() const; - - virtual uint length() const = 0; // Vector length - - // Element and vector type - const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); } - const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); } - - virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); } + virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(memory_size()); } virtual BasicType memory_type() const { return T_VOID; } - virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); } + virtual int memory_size() const { return vect_type()->length_in_bytes(); } - // Vector opcode from scalar opcode - static int opcode(int sopc, uint vlen); - - static VectorStoreNode* make(Compile* C, int opc, Node* ctl, Node* mem, + static StoreVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem, Node* adr, const TypePtr* atyp, Node* val, uint vlen); }; -//------------------------------Store16BNode-------------------------------------- -// Vector store of 16 bytes (8bits signed) to memory -class Store16BNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } - public: - Store16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} - virtual int Opcode() const; - virtual uint length() const { return 16; } -}; -//------------------------------Store8BNode-------------------------------------- -// Vector store of 8 bytes (8bits signed) to memory -class Store8BNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } - public: - Store8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} - virtual int Opcode() const; - virtual uint length() const { return 8; } -}; +//=========================Promote_Scalar_to_Vector============================ -//------------------------------Store4BNode-------------------------------------- -// Vector store of 4 bytes (8bits signed) to memory -class Store4BNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } +//------------------------------ReplicateBNode--------------------------------- +// Replicate byte scalar to be vector +class ReplicateBNode : public VectorNode { public: - Store4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} + ReplicateBNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {} virtual int Opcode() const; - virtual uint length() const { return 4; } }; -//------------------------------Store8CNode-------------------------------------- -// Vector store of 8 chars (16bits signed/unsigned) to memory -class Store8CNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } +//------------------------------ReplicateCNode--------------------------------- +// Replicate char scalar to be vector +class ReplicateCNode : public VectorNode { public: - Store8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} + ReplicateCNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {} virtual int Opcode() const; - virtual uint length() const { return 8; } }; -//------------------------------Store4CNode-------------------------------------- -// Vector store of 4 chars (16bits signed/unsigned) to memory -class Store4CNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } +//------------------------------ReplicateSNode--------------------------------- +// Replicate short scalar to be vector +class ReplicateSNode : public VectorNode { public: - Store4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} + ReplicateSNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {} virtual int Opcode() const; - virtual uint length() const { return 4; } }; -//------------------------------Store2CNode-------------------------------------- -// Vector store of 2 chars (16bits signed/unsigned) to memory -class Store2CNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } +//------------------------------ReplicateINode--------------------------------- +// Replicate int scalar to be vector +class ReplicateINode : public VectorNode { public: - Store2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} + ReplicateINode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {} virtual int Opcode() const; - virtual uint length() const { return 2; } }; -//------------------------------Store4INode-------------------------------------- -// Vector store of 4 integers (32bits signed) to memory -class Store4INode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } +//------------------------------ReplicateLNode--------------------------------- +// Replicate long scalar to be vector +class ReplicateLNode : public VectorNode { public: - Store4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} + ReplicateLNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {} virtual int Opcode() const; - virtual uint length() const { return 4; } }; -//------------------------------Store2INode-------------------------------------- -// Vector store of 2 integers (32bits signed) to memory -class Store2INode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } +//------------------------------ReplicateFNode--------------------------------- +// Replicate float scalar to be vector +class ReplicateFNode : public VectorNode { public: - Store2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} + ReplicateFNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {} virtual int Opcode() const; - virtual uint length() const { return 2; } }; -//------------------------------Store2LNode-------------------------------------- -// Vector store of 2 longs (64bits signed) to memory -class Store2LNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_LONG; } +//------------------------------ReplicateDNode--------------------------------- +// Replicate double scalar to be vector +class ReplicateDNode : public VectorNode { public: - Store2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} + ReplicateDNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {} virtual int Opcode() const; - virtual uint length() const { return 2; } }; -//------------------------------Store4FNode-------------------------------------- -// Vector store of 4 floats (32bits) to memory -class Store4FNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } - public: - Store4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} - virtual int Opcode() const; - virtual uint length() const { return 4; } -}; +//========================Pack_Scalars_into_a_Vector=========================== -//------------------------------Store2FNode-------------------------------------- -// Vector store of 2 floats (32bits) to memory -class Store2FNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } - public: - Store2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} - virtual int Opcode() const; - virtual uint length() const { return 2; } -}; - -//------------------------------Store2DNode-------------------------------------- -// Vector store of 2 doubles (64bits) to memory -class Store2DNode : public VectorStoreNode { - protected: - virtual BasicType elt_basic_type() const { return T_DOUBLE; } - public: - Store2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : VectorStoreNode(c,mem,adr,at,val) {} - virtual int Opcode() const; - virtual uint length() const { return 2; } -}; - -//=========================Promote_Scalar_to_Vector==================================== - -//------------------------------Replicate16BNode--------------------------------------- -// Replicate byte scalar to be vector of 16 bytes -class Replicate16BNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } - public: - Replicate16BNode(Node* in1) : VectorNode(in1, 16) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate8BNode--------------------------------------- -// Replicate byte scalar to be vector of 8 bytes -class Replicate8BNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } - public: - Replicate8BNode(Node* in1) : VectorNode(in1, 8) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate4BNode--------------------------------------- -// Replicate byte scalar to be vector of 4 bytes -class Replicate4BNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } - public: - Replicate4BNode(Node* in1) : VectorNode(in1, 4) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate8CNode--------------------------------------- -// Replicate char scalar to be vector of 8 chars -class Replicate8CNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } - public: - Replicate8CNode(Node* in1) : VectorNode(in1, 8) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate4CNode--------------------------------------- -// Replicate char scalar to be vector of 4 chars -class Replicate4CNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } - public: - Replicate4CNode(Node* in1) : VectorNode(in1, 4) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate2CNode--------------------------------------- -// Replicate char scalar to be vector of 2 chars -class Replicate2CNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } - public: - Replicate2CNode(Node* in1) : VectorNode(in1, 2) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate8SNode--------------------------------------- -// Replicate short scalar to be vector of 8 shorts -class Replicate8SNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } - public: - Replicate8SNode(Node* in1) : VectorNode(in1, 8) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate4SNode--------------------------------------- -// Replicate short scalar to be vector of 4 shorts -class Replicate4SNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } - public: - Replicate4SNode(Node* in1) : VectorNode(in1, 4) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate2SNode--------------------------------------- -// Replicate short scalar to be vector of 2 shorts -class Replicate2SNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } - public: - Replicate2SNode(Node* in1) : VectorNode(in1, 2) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate4INode--------------------------------------- -// Replicate int scalar to be vector of 4 ints -class Replicate4INode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } - public: - Replicate4INode(Node* in1) : VectorNode(in1, 4) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate2INode--------------------------------------- -// Replicate int scalar to be vector of 2 ints -class Replicate2INode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } - public: - Replicate2INode(Node* in1) : VectorNode(in1, 2) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate2LNode--------------------------------------- -// Replicate long scalar to be vector of 2 longs -class Replicate2LNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_LONG; } - public: - Replicate2LNode(Node* in1) : VectorNode(in1, 2) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate4FNode--------------------------------------- -// Replicate float scalar to be vector of 4 floats -class Replicate4FNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } - public: - Replicate4FNode(Node* in1) : VectorNode(in1, 4) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate2FNode--------------------------------------- -// Replicate float scalar to be vector of 2 floats -class Replicate2FNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } - public: - Replicate2FNode(Node* in1) : VectorNode(in1, 2) {} - virtual int Opcode() const; -}; - -//------------------------------Replicate2DNode--------------------------------------- -// Replicate double scalar to be vector of 2 doubles -class Replicate2DNode : public VectorNode { - protected: - virtual BasicType elt_basic_type() const { return T_DOUBLE; } - public: - Replicate2DNode(Node* in1) : VectorNode(in1, 2) {} - virtual int Opcode() const; -}; - -//========================Pack_Scalars_into_a_Vector============================== - //------------------------------PackNode--------------------------------------- // Pack parent class (not for code generation). class PackNode : public VectorNode { public: - PackNode(Node* in1) : VectorNode(in1, 1) {} - PackNode(Node* in1, Node* n2) : VectorNode(in1, n2, 2) {} + PackNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {} + PackNode(Node* in1, Node* n2, const TypeVect* vt) : VectorNode(in1, n2, vt) {} virtual int Opcode() const; - void add_opd(Node* n) { - add_req(n); - _length++; - assert(_length == req() - 1, "vector length matches edge count"); + void add_opd(uint i, Node* n) { + init_req(i+1, n); } // Create a binary tree form for Packs. [lo, hi) (half-open) range Node* binaryTreePack(Compile* C, int lo, int hi); - static PackNode* make(Compile* C, Node* s, const Type* elt_t); + static PackNode* make(Compile* C, Node* s, uint vlen, BasicType bt); }; //------------------------------PackBNode--------------------------------------- // Pack byte scalars into vector class PackBNode : public PackNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } public: - PackBNode(Node* in1) : PackNode(in1) {} + PackBNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {} virtual int Opcode() const; }; @@ -970,10 +434,8 @@ //------------------------------PackCNode--------------------------------------- // Pack char scalars into vector class PackCNode : public PackNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } public: - PackCNode(Node* in1) : PackNode(in1) {} + PackCNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {} virtual int Opcode() const; }; @@ -980,10 +442,9 @@ //------------------------------PackSNode--------------------------------------- // Pack short scalars into a vector class PackSNode : public PackNode { - protected: - virtual BasicType elt_basic_type() const { return T_SHORT; } public: - PackSNode(Node* in1) : PackNode(in1) {} + PackSNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {} + PackSNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {} virtual int Opcode() const; }; @@ -990,11 +451,9 @@ //------------------------------PackINode--------------------------------------- // Pack integer scalars into a vector class PackINode : public PackNode { - protected: - virtual BasicType elt_basic_type() const { return T_INT; } public: - PackINode(Node* in1) : PackNode(in1) {} - PackINode(Node* in1, Node* in2) : PackNode(in1, in2) {} + PackINode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {} + PackINode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {} virtual int Opcode() const; }; @@ -1001,22 +460,26 @@ //------------------------------PackLNode--------------------------------------- // Pack long scalars into a vector class PackLNode : public PackNode { - protected: - virtual BasicType elt_basic_type() const { return T_LONG; } public: - PackLNode(Node* in1) : PackNode(in1) {} - PackLNode(Node* in1, Node* in2) : PackNode(in1, in2) {} + PackLNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {} + PackLNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {} virtual int Opcode() const; }; +//------------------------------Pack2LNode-------------------------------------- +// Pack 2 long scalars into a vector +class Pack2LNode : public PackNode { + public: + Pack2LNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {} + virtual int Opcode() const; +}; + //------------------------------PackFNode--------------------------------------- // Pack float scalars into vector class PackFNode : public PackNode { - protected: - virtual BasicType elt_basic_type() const { return T_FLOAT; } public: - PackFNode(Node* in1) : PackNode(in1) {} - PackFNode(Node* in1, Node* in2) : PackNode(in1, in2) {} + PackFNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {} + PackFNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {} virtual int Opcode() const; }; @@ -1023,39 +486,20 @@ //------------------------------PackDNode--------------------------------------- // Pack double scalars into a vector class PackDNode : public PackNode { - protected: - virtual BasicType elt_basic_type() const { return T_DOUBLE; } public: - PackDNode(Node* in1) : PackNode(in1) {} - PackDNode(Node* in1, Node* in2) : PackNode(in1, in2) {} + PackDNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {} + PackDNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {} virtual int Opcode() const; }; -// The Pack2xN nodes assist code generation. They are created from -// Pack4C, etc. nodes in final_graph_reshape in the form of a -// balanced, binary tree. - -//------------------------------Pack2x1BNode----------------------------------------- -// Pack 2 1-byte integers into vector of 2 bytes -class Pack2x1BNode : public PackNode { - protected: - virtual BasicType elt_basic_type() const { return T_BYTE; } +//------------------------------Pack2DNode-------------------------------------- +// Pack 2 double scalars into a vector +class Pack2DNode : public PackNode { public: - Pack2x1BNode(Node *in1, Node* in2) : PackNode(in1, in2) {} + Pack2DNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {} virtual int Opcode() const; - virtual uint ideal_reg() const { return Op_RegI; } }; -//------------------------------Pack2x2BNode--------------------------------------- -// Pack 2 2-byte integers into vector of 4 bytes -class Pack2x2BNode : public PackNode { - protected: - virtual BasicType elt_basic_type() const { return T_CHAR; } - public: - Pack2x2BNode(Node *in1, Node* in2) : PackNode(in1, in2) {} - virtual int Opcode() const; - virtual uint ideal_reg() const { return Op_RegI; } -}; //========================Extract_Scalar_from_Vector=============================== @@ -1069,7 +513,7 @@ virtual int Opcode() const; uint pos() const { return in(2)->get_int(); } - static Node* make(Compile* C, Node* v, uint position, const Type* opd_t); + static Node* make(Compile* C, Node* v, uint position, BasicType bt); }; //------------------------------ExtractBNode--------------------------------------- --- old/src/share/vm/runtime/vmStructs.cpp Sat Jun 2 20:04:24 2012 +++ new/src/share/vm/runtime/vmStructs.cpp Sat Jun 2 20:04:23 2012 @@ -1967,57 +1967,22 @@ declare_c2_type(LShiftVCNode, VectorNode) \ declare_c2_type(LShiftVSNode, VectorNode) \ declare_c2_type(LShiftVINode, VectorNode) \ - declare_c2_type(URShiftVBNode, VectorNode) \ - declare_c2_type(URShiftVCNode, VectorNode) \ - declare_c2_type(URShiftVSNode, VectorNode) \ - declare_c2_type(URShiftVINode, VectorNode) \ + declare_c2_type(RShiftVBNode, VectorNode) \ + declare_c2_type(RShiftVCNode, VectorNode) \ + declare_c2_type(RShiftVSNode, VectorNode) \ + declare_c2_type(RShiftVINode, VectorNode) \ declare_c2_type(AndVNode, VectorNode) \ declare_c2_type(OrVNode, VectorNode) \ declare_c2_type(XorVNode, VectorNode) \ - declare_c2_type(VectorLoadNode, LoadNode) \ - declare_c2_type(Load16BNode, VectorLoadNode) \ - declare_c2_type(Load8BNode, VectorLoadNode) \ - declare_c2_type(Load4BNode, VectorLoadNode) \ - declare_c2_type(Load8CNode, VectorLoadNode) \ - declare_c2_type(Load4CNode, VectorLoadNode) \ - declare_c2_type(Load2CNode, VectorLoadNode) \ - declare_c2_type(Load8SNode, VectorLoadNode) \ - declare_c2_type(Load4SNode, VectorLoadNode) \ - declare_c2_type(Load2SNode, VectorLoadNode) \ - declare_c2_type(Load4INode, VectorLoadNode) \ - declare_c2_type(Load2INode, VectorLoadNode) \ - declare_c2_type(Load2LNode, VectorLoadNode) \ - declare_c2_type(Load4FNode, VectorLoadNode) \ - declare_c2_type(Load2FNode, VectorLoadNode) \ - declare_c2_type(Load2DNode, VectorLoadNode) \ - declare_c2_type(VectorStoreNode, StoreNode) \ - declare_c2_type(Store16BNode, VectorStoreNode) \ - declare_c2_type(Store8BNode, VectorStoreNode) \ - declare_c2_type(Store4BNode, VectorStoreNode) \ - declare_c2_type(Store8CNode, VectorStoreNode) \ - declare_c2_type(Store4CNode, VectorStoreNode) \ - declare_c2_type(Store2CNode, VectorStoreNode) \ - declare_c2_type(Store4INode, VectorStoreNode) \ - declare_c2_type(Store2INode, VectorStoreNode) \ - declare_c2_type(Store2LNode, VectorStoreNode) \ - declare_c2_type(Store4FNode, VectorStoreNode) \ - declare_c2_type(Store2FNode, VectorStoreNode) \ - declare_c2_type(Store2DNode, VectorStoreNode) \ - declare_c2_type(Replicate16BNode, VectorNode) \ - declare_c2_type(Replicate8BNode, VectorNode) \ - declare_c2_type(Replicate4BNode, VectorNode) \ - declare_c2_type(Replicate8CNode, VectorNode) \ - declare_c2_type(Replicate4CNode, VectorNode) \ - declare_c2_type(Replicate2CNode, VectorNode) \ - declare_c2_type(Replicate8SNode, VectorNode) \ - declare_c2_type(Replicate4SNode, VectorNode) \ - declare_c2_type(Replicate2SNode, VectorNode) \ - declare_c2_type(Replicate4INode, VectorNode) \ - declare_c2_type(Replicate2INode, VectorNode) \ - declare_c2_type(Replicate2LNode, VectorNode) \ - declare_c2_type(Replicate4FNode, VectorNode) \ - declare_c2_type(Replicate2FNode, VectorNode) \ - declare_c2_type(Replicate2DNode, VectorNode) \ + declare_c2_type(LoadVectorNode, LoadNode) \ + declare_c2_type(StoreVectorNode, StoreNode) \ + declare_c2_type(ReplicateBNode, VectorNode) \ + declare_c2_type(ReplicateCNode, VectorNode) \ + declare_c2_type(ReplicateSNode, VectorNode) \ + declare_c2_type(ReplicateINode, VectorNode) \ + declare_c2_type(ReplicateLNode, VectorNode) \ + declare_c2_type(ReplicateFNode, VectorNode) \ + declare_c2_type(ReplicateDNode, VectorNode) \ declare_c2_type(PackNode, VectorNode) \ declare_c2_type(PackBNode, PackNode) \ declare_c2_type(PackCNode, PackNode) \ @@ -2026,8 +1991,8 @@ declare_c2_type(PackLNode, PackNode) \ declare_c2_type(PackFNode, PackNode) \ declare_c2_type(PackDNode, PackNode) \ - declare_c2_type(Pack2x1BNode, PackNode) \ - declare_c2_type(Pack2x2BNode, PackNode) \ + declare_c2_type(Pack2LNode, PackNode) \ + declare_c2_type(Pack2DNode, PackNode) \ declare_c2_type(ExtractNode, Node) \ declare_c2_type(ExtractBNode, ExtractNode) \ declare_c2_type(ExtractCNode, ExtractNode) \ --- /dev/null Sat Jun 2 20:04:24 2012 +++ new/test/compiler/7119644/TestByteDoubleVect.java Sat Jun 2 20:04:24 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteDoubleVect + */ + +public class TestByteDoubleVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Byte + Double vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a1 = new byte[ARRLEN]; + byte[] a2 = new byte[ARRLEN]; + double[] b1 = new double[ARRLEN]; + double[] b2 = new double[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103.; + } + } + static void test_vi_neg(byte[] a, double[] b, byte c, double d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(byte[] a, byte[] b, double[] c, double[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(byte[] a, double[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103.; + } + } + static void test_vi_oppos(byte[] a, double[] b, byte c, double d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(byte[] a, byte[] b, double[] c, double[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(byte[] a, double[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103.; + } + } + static void test_vi_aln(byte[] a, double[] b, byte c, double d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(byte[] a, byte[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(byte[] a, byte[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(byte[] a, double[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103.; + } + } + static void test_vi_unaln(byte[] a, double[] b, byte c, double d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(byte[] a, byte[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(byte[] a, byte[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, byte elem, byte val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, double elem, double val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:25 2012 +++ new/test/compiler/7119644/TestByteFloatVect.java Sat Jun 2 20:04:25 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteFloatVect + */ + +public class TestByteFloatVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Byte + Float vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a1 = new byte[ARRLEN]; + byte[] a2 = new byte[ARRLEN]; + float[] b1 = new float[ARRLEN]; + float[] b2 = new float[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103.f; + } + } + static void test_vi_neg(byte[] a, float[] b, byte c, float d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(byte[] a, byte[] b, float[] c, float[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(byte[] a, float[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103.f; + } + } + static void test_vi_oppos(byte[] a, float[] b, byte c, float d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(byte[] a, byte[] b, float[] c, float[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(byte[] a, float[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103.f; + } + } + static void test_vi_aln(byte[] a, float[] b, byte c, float d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(byte[] a, byte[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(byte[] a, byte[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(byte[] a, float[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103.f; + } + } + static void test_vi_unaln(byte[] a, float[] b, byte c, float d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(byte[] a, byte[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(byte[] a, byte[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, byte elem, byte val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, float elem, float val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:25 2012 +++ new/test/compiler/7119644/TestByteIntVect.java Sat Jun 2 20:04:25 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteIntVect + */ + +public class TestByteIntVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Byte + Integer vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a1 = new byte[ARRLEN]; + byte[] a2 = new byte[ARRLEN]; + int[] b1 = new int[ARRLEN]; + int[] b2 = new int[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_vi_neg(byte[] a, int[] b, byte c, int d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(byte[] a, byte[] b, int[] c, int[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(byte[] a, int[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_vi_oppos(byte[] a, int[] b, byte c, int d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(byte[] a, byte[] b, int[] c, int[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(byte[] a, int[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_aln(byte[] a, int[] b, byte c, int d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(byte[] a, byte[] b, int[] c, int[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(byte[] a, byte[] b, int[] c, int[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(byte[] a, int[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_unaln(byte[] a, int[] b, byte c, int d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(byte[] a, byte[] b, int[] c, int[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(byte[] a, byte[] b, int[] c, int[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, byte elem, byte val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:25 2012 +++ new/test/compiler/7119644/TestByteLongVect.java Sat Jun 2 20:04:25 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteLongVect + */ + +public class TestByteLongVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Byte + Long vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a1 = new byte[ARRLEN]; + byte[] a2 = new byte[ARRLEN]; + long[] b1 = new long[ARRLEN]; + long[] b2 = new long[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_vi_neg(byte[] a, long[] b, byte c, long d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(byte[] a, byte[] b, long[] c, long[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(byte[] a, long[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_vi_oppos(byte[] a, long[] b, byte c, long d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(byte[] a, byte[] b, long[] c, long[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(byte[] a, long[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_aln(byte[] a, long[] b, byte c, long d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(byte[] a, byte[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(byte[] a, byte[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(byte[] a, long[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_unaln(byte[] a, long[] b, byte c, long d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(byte[] a, byte[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(byte[] a, byte[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, byte elem, byte val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:26 2012 +++ new/test/compiler/7119644/TestByteShortVect.java Sat Jun 2 20:04:26 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteShortVect + */ + +public class TestByteShortVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Byte + Short vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a1 = new byte[ARRLEN]; + byte[] a2 = new byte[ARRLEN]; + short[] b1 = new short[ARRLEN]; + short[] b2 = new short[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_vi_neg(byte[] a, short[] b, byte c, short d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(byte[] a, byte[] b, short[] c, short[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(byte[] a, short[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_vi_oppos(byte[] a, short[] b, byte c, short d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(byte[] a, byte[] b, short[] c, short[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(byte[] a, short[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_aln(byte[] a, short[] b, byte c, short d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(byte[] a, byte[] b, short[] c, short[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(byte[] a, byte[] b, short[] c, short[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(byte[] a, short[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_unaln(byte[] a, short[] b, byte c, short d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(byte[] a, byte[] b, short[] c, short[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(byte[] a, byte[] b, short[] c, short[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, byte elem, byte val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:26 2012 +++ new/test/compiler/7119644/TestByteVect.java Sat Jun 2 20:04:26 2012 @@ -0,0 +1,953 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteVect + */ + +public class TestByteVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Byte vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a1 = new byte[ARRLEN]; + byte[] a2 = new byte[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + } + } + static void test_vi_neg(byte[] a, byte b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b; + } + } + static void test_cp_neg(byte[] a, byte[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + } + } + static void test_2ci_neg(byte[] a, byte[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_2vi_neg(byte[] a, byte[] b, byte c, byte d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_ci_oppos(byte[] a) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + } + } + static void test_vi_oppos(byte[] a, byte b) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[limit-i] = b; + } + } + static void test_cp_oppos(byte[] a, byte[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + } + } + static void test_2ci_oppos(byte[] a, byte[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_2vi_oppos(byte[] a, byte[] b, byte c, byte d) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_ci_off(byte[] a) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123; + } + } + static void test_vi_off(byte[] a, byte b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b; + } + } + static void test_cp_off(byte[] a, byte[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b[i+OFFSET]; + } + } + static void test_2ci_off(byte[] a, byte[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123; + b[i+OFFSET] = -103; + } + } + static void test_2vi_off(byte[] a, byte[] b, byte c, byte d) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = c; + b[i+OFFSET] = d; + } + } + static void test_ci_inv(byte[] a, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123; + } + } + static void test_vi_inv(byte[] a, byte b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b; + } + } + static void test_cp_inv(byte[] a, byte[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b[i+k]; + } + } + static void test_2ci_inv(byte[] a, byte[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123; + b[i+k] = -103; + } + } + static void test_2vi_inv(byte[] a, byte[] b, byte c, byte d, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = c; + b[i+k] = d; + } + } + static void test_ci_scl(byte[] a) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123; + } + } + static void test_vi_scl(byte[] a, byte b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b; + } + } + static void test_cp_scl(byte[] a, byte[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b[i*SCALE]; + } + } + static void test_2ci_scl(byte[] a, byte[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123; + b[i*SCALE] = -103; + } + } + static void test_2vi_scl(byte[] a, byte[] b, byte c, byte d) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = c; + b[i*SCALE] = d; + } + } + static void test_cp_alndst(byte[] a, byte[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + } + } + static void test_cp_alnsrc(byte[] a, byte[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + } + } + static void test_2ci_aln(byte[] a, byte[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_2vi_aln(byte[] a, byte[] b, byte c, byte d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_unalndst(byte[] a, byte[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + } + } + static void test_cp_unalnsrc(byte[] a, byte[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + } + } + static void test_2ci_unaln(byte[] a, byte[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_2vi_unaln(byte[] a, byte[] b, byte c, byte d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + + static int verify(String text, int i, byte elem, byte val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:27 2012 +++ new/test/compiler/7119644/TestDoubleVect.java Sat Jun 2 20:04:27 2012 @@ -0,0 +1,953 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestDoubleVect + */ + +public class TestDoubleVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Double vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + double[] a1 = new double[ARRLEN]; + double[] a2 = new double[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123.; + } + } + static void test_vi_neg(double[] a, double b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b; + } + } + static void test_cp_neg(double[] a, double[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + } + } + static void test_2ci_neg(double[] a, double[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = -123.; + b[i] = -103.; + } + } + static void test_2vi_neg(double[] a, double[] b, double c, double d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_ci_oppos(double[] a) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123.; + } + } + static void test_vi_oppos(double[] a, double b) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[limit-i] = b; + } + } + static void test_cp_oppos(double[] a, double[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + } + } + static void test_2ci_oppos(double[] a, double[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123.; + b[i] = -103.; + } + } + static void test_2vi_oppos(double[] a, double[] b, double c, double d) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_ci_off(double[] a) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123.; + } + } + static void test_vi_off(double[] a, double b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b; + } + } + static void test_cp_off(double[] a, double[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b[i+OFFSET]; + } + } + static void test_2ci_off(double[] a, double[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123.; + b[i+OFFSET] = -103.; + } + } + static void test_2vi_off(double[] a, double[] b, double c, double d) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = c; + b[i+OFFSET] = d; + } + } + static void test_ci_inv(double[] a, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123.; + } + } + static void test_vi_inv(double[] a, double b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b; + } + } + static void test_cp_inv(double[] a, double[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b[i+k]; + } + } + static void test_2ci_inv(double[] a, double[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123.; + b[i+k] = -103.; + } + } + static void test_2vi_inv(double[] a, double[] b, double c, double d, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = c; + b[i+k] = d; + } + } + static void test_ci_scl(double[] a) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123.; + } + } + static void test_vi_scl(double[] a, double b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b; + } + } + static void test_cp_scl(double[] a, double[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b[i*SCALE]; + } + } + static void test_2ci_scl(double[] a, double[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123.; + b[i*SCALE] = -103.; + } + } + static void test_2vi_scl(double[] a, double[] b, double c, double d) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = c; + b[i*SCALE] = d; + } + } + static void test_cp_alndst(double[] a, double[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + } + } + static void test_cp_alnsrc(double[] a, double[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + } + } + static void test_2ci_aln(double[] a, double[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123.; + b[i] = -103.; + } + } + static void test_2vi_aln(double[] a, double[] b, double c, double d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_unalndst(double[] a, double[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + } + } + static void test_cp_unalnsrc(double[] a, double[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + } + } + static void test_2ci_unaln(double[] a, double[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123.; + b[i] = -103.; + } + } + static void test_2vi_unaln(double[] a, double[] b, double c, double d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + + static int verify(String text, int i, double elem, double val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:27 2012 +++ new/test/compiler/7119644/TestFloatDoubleVect.java Sat Jun 2 20:04:27 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestFloatDoubleVect + */ + +public class TestFloatDoubleVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Float + Double vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + float[] a1 = new float[ARRLEN]; + float[] a2 = new float[ARRLEN]; + double[] b1 = new double[ARRLEN]; + double[] b2 = new double[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123.f; + b[i] = -103.; + } + } + static void test_vi_neg(float[] a, double[] b, float c, double d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(float[] a, float[] b, double[] c, double[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(float[] a, double[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123.f; + b[i] = -103.; + } + } + static void test_vi_oppos(float[] a, double[] b, float c, double d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(float[] a, float[] b, double[] c, double[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(float[] a, double[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123.f; + b[i] = -103.; + } + } + static void test_vi_aln(float[] a, double[] b, float c, double d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(float[] a, float[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(float[] a, float[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(float[] a, double[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123.f; + b[i] = -103.; + } + } + static void test_vi_unaln(float[] a, double[] b, float c, double d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(float[] a, float[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(float[] a, float[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, float elem, float val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, double elem, double val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:28 2012 +++ new/test/compiler/7119644/TestFloatVect.java Sat Jun 2 20:04:27 2012 @@ -0,0 +1,953 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestFloatVect + */ + +public class TestFloatVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Float vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + float[] a1 = new float[ARRLEN]; + float[] a2 = new float[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123.f; + } + } + static void test_vi_neg(float[] a, float b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b; + } + } + static void test_cp_neg(float[] a, float[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + } + } + static void test_2ci_neg(float[] a, float[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = -123.f; + b[i] = -103.f; + } + } + static void test_2vi_neg(float[] a, float[] b, float c, float d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_ci_oppos(float[] a) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123.f; + } + } + static void test_vi_oppos(float[] a, float b) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[limit-i] = b; + } + } + static void test_cp_oppos(float[] a, float[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + } + } + static void test_2ci_oppos(float[] a, float[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123.f; + b[i] = -103.f; + } + } + static void test_2vi_oppos(float[] a, float[] b, float c, float d) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_ci_off(float[] a) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123.f; + } + } + static void test_vi_off(float[] a, float b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b; + } + } + static void test_cp_off(float[] a, float[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b[i+OFFSET]; + } + } + static void test_2ci_off(float[] a, float[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123.f; + b[i+OFFSET] = -103.f; + } + } + static void test_2vi_off(float[] a, float[] b, float c, float d) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = c; + b[i+OFFSET] = d; + } + } + static void test_ci_inv(float[] a, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123.f; + } + } + static void test_vi_inv(float[] a, float b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b; + } + } + static void test_cp_inv(float[] a, float[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b[i+k]; + } + } + static void test_2ci_inv(float[] a, float[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123.f; + b[i+k] = -103.f; + } + } + static void test_2vi_inv(float[] a, float[] b, float c, float d, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = c; + b[i+k] = d; + } + } + static void test_ci_scl(float[] a) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123.f; + } + } + static void test_vi_scl(float[] a, float b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b; + } + } + static void test_cp_scl(float[] a, float[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b[i*SCALE]; + } + } + static void test_2ci_scl(float[] a, float[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123.f; + b[i*SCALE] = -103.f; + } + } + static void test_2vi_scl(float[] a, float[] b, float c, float d) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = c; + b[i*SCALE] = d; + } + } + static void test_cp_alndst(float[] a, float[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + } + } + static void test_cp_alnsrc(float[] a, float[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + } + } + static void test_2ci_aln(float[] a, float[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123.f; + b[i] = -103.f; + } + } + static void test_2vi_aln(float[] a, float[] b, float c, float d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_unalndst(float[] a, float[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + } + } + static void test_cp_unalnsrc(float[] a, float[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + } + } + static void test_2ci_unaln(float[] a, float[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123.f; + b[i] = -103.f; + } + } + static void test_2vi_unaln(float[] a, float[] b, float c, float d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + + static int verify(String text, int i, float elem, float val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:28 2012 +++ new/test/compiler/7119644/TestIntDoubleVect.java Sat Jun 2 20:04:28 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntDoubleVect + */ + +public class TestIntDoubleVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Integer + Double vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + int[] a1 = new int[ARRLEN]; + int[] a2 = new int[ARRLEN]; + double[] b1 = new double[ARRLEN]; + double[] b2 = new double[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103.; + } + } + static void test_vi_neg(int[] a, double[] b, int c, double d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(int[] a, int[] b, double[] c, double[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(int[] a, double[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103.; + } + } + static void test_vi_oppos(int[] a, double[] b, int c, double d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(int[] a, int[] b, double[] c, double[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(int[] a, double[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103.; + } + } + static void test_vi_aln(int[] a, double[] b, int c, double d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(int[] a, int[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(int[] a, int[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(int[] a, double[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103.; + } + } + static void test_vi_unaln(int[] a, double[] b, int c, double d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(int[] a, int[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(int[] a, int[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, double elem, double val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:28 2012 +++ new/test/compiler/7119644/TestIntFloatVect.java Sat Jun 2 20:04:28 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntFloatVect + */ + +public class TestIntFloatVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Integer + Float vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + int[] a1 = new int[ARRLEN]; + int[] a2 = new int[ARRLEN]; + float[] b1 = new float[ARRLEN]; + float[] b2 = new float[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103.f; + } + } + static void test_vi_neg(int[] a, float[] b, int c, float d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(int[] a, int[] b, float[] c, float[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(int[] a, float[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103.f; + } + } + static void test_vi_oppos(int[] a, float[] b, int c, float d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(int[] a, int[] b, float[] c, float[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(int[] a, float[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103.f; + } + } + static void test_vi_aln(int[] a, float[] b, int c, float d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(int[] a, int[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(int[] a, int[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(int[] a, float[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103.f; + } + } + static void test_vi_unaln(int[] a, float[] b, int c, float d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(int[] a, int[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(int[] a, int[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, float elem, float val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:29 2012 +++ new/test/compiler/7119644/TestIntLongVect.java Sat Jun 2 20:04:29 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntLongVect + */ + +public class TestIntLongVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Integer + Long vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + int[] a1 = new int[ARRLEN]; + int[] a2 = new int[ARRLEN]; + long[] b1 = new long[ARRLEN]; + long[] b2 = new long[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_vi_neg(int[] a, long[] b, int c, long d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(int[] a, int[] b, long[] c, long[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(int[] a, long[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_vi_oppos(int[] a, long[] b, int c, long d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(int[] a, int[] b, long[] c, long[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(int[] a, long[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_aln(int[] a, long[] b, int c, long d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(int[] a, int[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(int[] a, int[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(int[] a, long[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_unaln(int[] a, long[] b, int c, long d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(int[] a, int[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(int[] a, int[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:29 2012 +++ new/test/compiler/7119644/TestIntVect.java Sat Jun 2 20:04:29 2012 @@ -0,0 +1,953 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntVect + */ + +public class TestIntVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Integer vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + int[] a1 = new int[ARRLEN]; + int[] a2 = new int[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + } + } + static void test_vi_neg(int[] a, int b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b; + } + } + static void test_cp_neg(int[] a, int[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + } + } + static void test_2ci_neg(int[] a, int[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_2vi_neg(int[] a, int[] b, int c, int d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_ci_oppos(int[] a) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + } + } + static void test_vi_oppos(int[] a, int b) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[limit-i] = b; + } + } + static void test_cp_oppos(int[] a, int[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + } + } + static void test_2ci_oppos(int[] a, int[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_2vi_oppos(int[] a, int[] b, int c, int d) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_ci_off(int[] a) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123; + } + } + static void test_vi_off(int[] a, int b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b; + } + } + static void test_cp_off(int[] a, int[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b[i+OFFSET]; + } + } + static void test_2ci_off(int[] a, int[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123; + b[i+OFFSET] = -103; + } + } + static void test_2vi_off(int[] a, int[] b, int c, int d) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = c; + b[i+OFFSET] = d; + } + } + static void test_ci_inv(int[] a, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123; + } + } + static void test_vi_inv(int[] a, int b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b; + } + } + static void test_cp_inv(int[] a, int[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b[i+k]; + } + } + static void test_2ci_inv(int[] a, int[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123; + b[i+k] = -103; + } + } + static void test_2vi_inv(int[] a, int[] b, int c, int d, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = c; + b[i+k] = d; + } + } + static void test_ci_scl(int[] a) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123; + } + } + static void test_vi_scl(int[] a, int b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b; + } + } + static void test_cp_scl(int[] a, int[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b[i*SCALE]; + } + } + static void test_2ci_scl(int[] a, int[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123; + b[i*SCALE] = -103; + } + } + static void test_2vi_scl(int[] a, int[] b, int c, int d) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = c; + b[i*SCALE] = d; + } + } + static void test_cp_alndst(int[] a, int[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + } + } + static void test_cp_alnsrc(int[] a, int[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + } + } + static void test_2ci_aln(int[] a, int[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_2vi_aln(int[] a, int[] b, int c, int d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_unalndst(int[] a, int[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + } + } + static void test_cp_unalnsrc(int[] a, int[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + } + } + static void test_2ci_unaln(int[] a, int[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_2vi_unaln(int[] a, int[] b, int c, int d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:30 2012 +++ new/test/compiler/7119644/TestLongDoubleVect.java Sat Jun 2 20:04:29 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestLongDoubleVect + */ + +public class TestLongDoubleVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Long + Double vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + long[] a1 = new long[ARRLEN]; + long[] a2 = new long[ARRLEN]; + double[] b1 = new double[ARRLEN]; + double[] b2 = new double[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103.; + } + } + static void test_vi_neg(long[] a, double[] b, long c, double d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(long[] a, long[] b, double[] c, double[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(long[] a, double[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103.; + } + } + static void test_vi_oppos(long[] a, double[] b, long c, double d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(long[] a, long[] b, double[] c, double[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(long[] a, double[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103.; + } + } + static void test_vi_aln(long[] a, double[] b, long c, double d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(long[] a, long[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(long[] a, long[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(long[] a, double[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103.; + } + } + static void test_vi_unaln(long[] a, double[] b, long c, double d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(long[] a, long[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(long[] a, long[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, double elem, double val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:30 2012 +++ new/test/compiler/7119644/TestLongFloatVect.java Sat Jun 2 20:04:30 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestLongFloatVect + */ + +public class TestLongFloatVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Long + Float vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + long[] a1 = new long[ARRLEN]; + long[] a2 = new long[ARRLEN]; + float[] b1 = new float[ARRLEN]; + float[] b2 = new float[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103.f; + } + } + static void test_vi_neg(long[] a, float[] b, long c, float d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(long[] a, long[] b, float[] c, float[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(long[] a, float[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103.f; + } + } + static void test_vi_oppos(long[] a, float[] b, long c, float d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(long[] a, long[] b, float[] c, float[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(long[] a, float[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103.f; + } + } + static void test_vi_aln(long[] a, float[] b, long c, float d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(long[] a, long[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(long[] a, long[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(long[] a, float[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103.f; + } + } + static void test_vi_unaln(long[] a, float[] b, long c, float d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(long[] a, long[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(long[] a, long[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, float elem, float val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:30 2012 +++ new/test/compiler/7119644/TestLongVect.java Sat Jun 2 20:04:30 2012 @@ -0,0 +1,953 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestLongVect + */ + +public class TestLongVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Long vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + long[] a1 = new long[ARRLEN]; + long[] a2 = new long[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + } + } + static void test_vi_neg(long[] a, long b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b; + } + } + static void test_cp_neg(long[] a, long[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + } + } + static void test_2ci_neg(long[] a, long[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_2vi_neg(long[] a, long[] b, long c, long d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_ci_oppos(long[] a) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + } + } + static void test_vi_oppos(long[] a, long b) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[limit-i] = b; + } + } + static void test_cp_oppos(long[] a, long[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + } + } + static void test_2ci_oppos(long[] a, long[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_2vi_oppos(long[] a, long[] b, long c, long d) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_ci_off(long[] a) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123; + } + } + static void test_vi_off(long[] a, long b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b; + } + } + static void test_cp_off(long[] a, long[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b[i+OFFSET]; + } + } + static void test_2ci_off(long[] a, long[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123; + b[i+OFFSET] = -103; + } + } + static void test_2vi_off(long[] a, long[] b, long c, long d) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = c; + b[i+OFFSET] = d; + } + } + static void test_ci_inv(long[] a, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123; + } + } + static void test_vi_inv(long[] a, long b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b; + } + } + static void test_cp_inv(long[] a, long[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b[i+k]; + } + } + static void test_2ci_inv(long[] a, long[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123; + b[i+k] = -103; + } + } + static void test_2vi_inv(long[] a, long[] b, long c, long d, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = c; + b[i+k] = d; + } + } + static void test_ci_scl(long[] a) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123; + } + } + static void test_vi_scl(long[] a, long b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b; + } + } + static void test_cp_scl(long[] a, long[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b[i*SCALE]; + } + } + static void test_2ci_scl(long[] a, long[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123; + b[i*SCALE] = -103; + } + } + static void test_2vi_scl(long[] a, long[] b, long c, long d) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = c; + b[i*SCALE] = d; + } + } + static void test_cp_alndst(long[] a, long[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + } + } + static void test_cp_alnsrc(long[] a, long[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + } + } + static void test_2ci_aln(long[] a, long[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_2vi_aln(long[] a, long[] b, long c, long d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_unalndst(long[] a, long[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + } + } + static void test_cp_unalnsrc(long[] a, long[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + } + } + static void test_2ci_unaln(long[] a, long[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_2vi_unaln(long[] a, long[] b, long c, long d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:31 2012 +++ new/test/compiler/7119644/TestShortDoubleVect.java Sat Jun 2 20:04:31 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortDoubleVect + */ + +public class TestShortDoubleVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Short + Double vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + short[] a1 = new short[ARRLEN]; + short[] a2 = new short[ARRLEN]; + double[] b1 = new double[ARRLEN]; + double[] b2 = new double[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103.; + } + } + static void test_vi_neg(short[] a, double[] b, short c, double d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(short[] a, short[] b, double[] c, double[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(short[] a, double[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103.; + } + } + static void test_vi_oppos(short[] a, double[] b, short c, double d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(short[] a, short[] b, double[] c, double[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(short[] a, double[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103.; + } + } + static void test_vi_aln(short[] a, double[] b, short c, double d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(short[] a, short[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(short[] a, short[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(short[] a, double[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103.; + } + } + static void test_vi_unaln(short[] a, double[] b, short c, double d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(short[] a, short[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(short[] a, short[] b, double[] c, double[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, double elem, double val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:31 2012 +++ new/test/compiler/7119644/TestShortFloatVect.java Sat Jun 2 20:04:31 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortFloatVect + */ + +public class TestShortFloatVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Short + Float vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + short[] a1 = new short[ARRLEN]; + short[] a2 = new short[ARRLEN]; + float[] b1 = new float[ARRLEN]; + float[] b2 = new float[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103.f; + } + } + static void test_vi_neg(short[] a, float[] b, short c, float d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(short[] a, short[] b, float[] c, float[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(short[] a, float[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103.f; + } + } + static void test_vi_oppos(short[] a, float[] b, short c, float d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(short[] a, short[] b, float[] c, float[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(short[] a, float[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103.f; + } + } + static void test_vi_aln(short[] a, float[] b, short c, float d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(short[] a, short[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(short[] a, short[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(short[] a, float[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103.f; + } + } + static void test_vi_unaln(short[] a, float[] b, short c, float d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(short[] a, short[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(short[] a, short[] b, float[] c, float[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, float elem, float val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:32 2012 +++ new/test/compiler/7119644/TestShortIntVect.java Sat Jun 2 20:04:31 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortIntVect + */ + +public class TestShortIntVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Short + Integer vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + short[] a1 = new short[ARRLEN]; + short[] a2 = new short[ARRLEN]; + int[] b1 = new int[ARRLEN]; + int[] b2 = new int[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_vi_neg(short[] a, int[] b, short c, int d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(short[] a, short[] b, int[] c, int[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(short[] a, int[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_vi_oppos(short[] a, int[] b, short c, int d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(short[] a, short[] b, int[] c, int[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(short[] a, int[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_aln(short[] a, int[] b, short c, int d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(short[] a, short[] b, int[] c, int[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(short[] a, short[] b, int[] c, int[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(short[] a, int[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_unaln(short[] a, int[] b, short c, int d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(short[] a, short[] b, int[] c, int[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(short[] a, short[] b, int[] c, int[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:32 2012 +++ new/test/compiler/7119644/TestShortLongVect.java Sat Jun 2 20:04:32 2012 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortLongVect + */ + +public class TestShortLongVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Short + Long vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + short[] a1 = new short[ARRLEN]; + short[] a2 = new short[ARRLEN]; + long[] b1 = new long[ARRLEN]; + long[] b2 = new long[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_vi_neg(short[] a, long[] b, short c, long d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_cp_neg(short[] a, short[] b, long[] c, long[] d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + c[i] = d[i]; + } + } + static void test_ci_oppos(short[] a, long[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_vi_oppos(short[] a, long[] b, short c, long d) { + int limit = a.length-1; + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_cp_oppos(short[] a, short[] b, long[] c, long[] d) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + c[limit-i] = d[i]; + } + } + static void test_ci_aln(short[] a, long[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_aln(short[] a, long[] b, short c, long d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_alndst(short[] a, short[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + c[i+ALIGN_OFF] = d[i]; + } + } + static void test_cp_alnsrc(short[] a, short[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + c[i] = d[i+ALIGN_OFF]; + } + } + static void test_ci_unaln(short[] a, long[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_vi_unaln(short[] a, long[] b, short c, long d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + static void test_cp_unalndst(short[] a, short[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + c[i+UNALIGN_OFF] = d[i]; + } + } + static void test_cp_unalnsrc(short[] a, short[] b, long[] c, long[] d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + c[i] = d[i+UNALIGN_OFF]; + } + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null Sat Jun 2 20:04:32 2012 +++ new/test/compiler/7119644/TestShortVect.java Sat Jun 2 20:04:32 2012 @@ -0,0 +1,953 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7119644 + * @summary Increase superword's vector size up to 256 bits + * + * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortVect + */ + +public class TestShortVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int OFFSET = 3; + private static final int SCALE = 2; + private static final int ALIGN_OFF = 8; + private static final int UNALIGN_OFF = 5; + + public static void main(String args[]) { + System.out.println("Testing Short vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + short[] a1 = new short[ARRLEN]; + short[] a2 = new short[ARRLEN]; + System.out.println("Warmup"); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + start = System.currentTimeMillis(); + for (int i=0; i= 0; i-=1) { + a[i] = -123; + } + } + static void test_vi_neg(short[] a, short b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b; + } + } + static void test_cp_neg(short[] a, short[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = b[i]; + } + } + static void test_2ci_neg(short[] a, short[] b) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = -123; + b[i] = -103; + } + } + static void test_2vi_neg(short[] a, short[] b, short c, short d) { + for (int i = a.length-1; i >= 0; i-=1) { + a[i] = c; + b[i] = d; + } + } + static void test_ci_oppos(short[] a) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + } + } + static void test_vi_oppos(short[] a, short b) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[limit-i] = b; + } + } + static void test_cp_oppos(short[] a, short[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[i] = b[limit-i]; + } + } + static void test_2ci_oppos(short[] a, short[] b) { + int limit = a.length-1; + for (int i = 0; i < a.length; i+=1) { + a[limit-i] = -123; + b[i] = -103; + } + } + static void test_2vi_oppos(short[] a, short[] b, short c, short d) { + int limit = a.length-1; + for (int i = limit; i >= 0; i-=1) { + a[i] = c; + b[limit-i] = d; + } + } + static void test_ci_off(short[] a) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123; + } + } + static void test_vi_off(short[] a, short b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b; + } + } + static void test_cp_off(short[] a, short[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = b[i+OFFSET]; + } + } + static void test_2ci_off(short[] a, short[] b) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = -123; + b[i+OFFSET] = -103; + } + } + static void test_2vi_off(short[] a, short[] b, short c, short d) { + for (int i = 0; i < a.length-OFFSET; i+=1) { + a[i+OFFSET] = c; + b[i+OFFSET] = d; + } + } + static void test_ci_inv(short[] a, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123; + } + } + static void test_vi_inv(short[] a, short b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b; + } + } + static void test_cp_inv(short[] a, short[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = b[i+k]; + } + } + static void test_2ci_inv(short[] a, short[] b, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = -123; + b[i+k] = -103; + } + } + static void test_2vi_inv(short[] a, short[] b, short c, short d, int k) { + for (int i = 0; i < a.length-k; i+=1) { + a[i+k] = c; + b[i+k] = d; + } + } + static void test_ci_scl(short[] a) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123; + } + } + static void test_vi_scl(short[] a, short b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b; + } + } + static void test_cp_scl(short[] a, short[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = b[i*SCALE]; + } + } + static void test_2ci_scl(short[] a, short[] b) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = -123; + b[i*SCALE] = -103; + } + } + static void test_2vi_scl(short[] a, short[] b, short c, short d) { + for (int i = 0; i*SCALE < a.length; i+=1) { + a[i*SCALE] = c; + b[i*SCALE] = d; + } + } + static void test_cp_alndst(short[] a, short[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = b[i]; + } + } + static void test_cp_alnsrc(short[] a, short[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = b[i+ALIGN_OFF]; + } + } + static void test_2ci_aln(short[] a, short[] b) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i+ALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_2vi_aln(short[] a, short[] b, short c, short d) { + for (int i = 0; i < a.length-ALIGN_OFF; i+=1) { + a[i] = c; + b[i+ALIGN_OFF] = d; + } + } + static void test_cp_unalndst(short[] a, short[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = b[i]; + } + } + static void test_cp_unalnsrc(short[] a, short[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = b[i+UNALIGN_OFF]; + } + } + static void test_2ci_unaln(short[] a, short[] b) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i+UNALIGN_OFF] = -123; + b[i] = -103; + } + } + static void test_2vi_unaln(short[] a, short[] b, short c, short d) { + for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) { + a[i] = c; + b[i+UNALIGN_OFF] = d; + } + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +}