src/cpu/x86/vm/x86_32.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File
*** old/src/cpu/x86/vm/x86_32.ad	Fri May  8 11:59:31 2015
--- new/src/cpu/x86/vm/x86_32.ad	Fri May  8 11:59:30 2015

*** 99,108 ****
--- 99,119 ----
  reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
+ //
+ // Empty fill registers, which are never used, but supply alignment to xmm regs
+ //
+ reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
+ reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
+ reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
+ reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
+ reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
+ reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
+ reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
+ reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
  
  // Specify priority of register selection within phases of register
  // allocation.  Highest priority is first.  A useful heuristic is to
  // give registers a low priority when they are required by machine
  // instructions, like EAX and EDX.  Registers which are used as

*** 110,120 ****
--- 121,132 ----
  // For the Intel integer registers, the equivalent Long pairs are
  // EDX:EAX, EBX:ECX, and EDI:EBP.
  alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
                      FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
                      FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
!                     FPR6L, FPR6H, FPR7L, FPR7H );
!                     FPR6L, FPR6H, FPR7L, FPR7H,
+                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  
  
  //----------Architecture Description Register Classes--------------------------
  // Several register classes are automatically defined based upon information in
  // this architecture description.

*** 277,288 ****
--- 289,302 ----
    Compile* C = Compile::current();
    if (C->in_24_bit_fp_mode()) {
      size += 6; // fldcw
    }
    if (C->max_vector_size() > 16) {
+     if(UseAVX <= 2) {
        size += 3; // vzeroupper
      }
+   }
    return size;
  }
  
  // !!!!! Special hack to get all type of calls to specify the byte offset
  //       from the start of the call to the point where the return address

*** 765,774 ****
--- 779,794 ----
  }
  
  // Helper for XMM registers.  Extra opcode bits, limited syntax.
  static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
                           int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
+   int in_size_in_bits = Assembler::EVEX_32bit;
+   int evex_encoding = 0;
+   if (reg_lo+1 == reg_hi) {
+     in_size_in_bits = Assembler::EVEX_64bit;
+     evex_encoding = Assembler::VEX_W;
+   }
    if (cbuf) {
      MacroAssembler _masm(cbuf);
      if (reg_lo+1 == reg_hi) { // double move?
        if (is_load) {
          __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));

*** 797,807 ****
--- 817,837 ----
        else         st->print("MOVSS  [ESP + #%d],%s",
                                offset, Matcher::regName[reg_lo]);
      }
  #endif
    }
!   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
!   bool is_single_byte = false;
+   if ((UseAVX > 2) && (offset != 0)) {
+     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
+   }
+   int offset_size = 0;
+   if (UseAVX > 2 ) {
+     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
+   } else {
+     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
+   }
+   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
    return size+5+offset_size;
  }
  
  

*** 833,844 ****
--- 863,874 ----
        }
      }
  #endif
    }
    // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
!   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
!   int sz = (UseAVX > 2) ? 6 : 4;
    if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
        UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
    return size + sz;
  }
  

*** 852,862 ****
--- 882,892 ----
  #ifndef PRODUCT
    } else if (!do_size) {
      st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  #endif
    }
!   return (UseAVX> 2) ? 6 : 4;
  }
  
  
  static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                                   int src_hi, int dst_hi, int size, outputStream* st ) {

*** 868,878 ****
--- 898,908 ----
  #ifndef PRODUCT
    } else if (!do_size) {
      st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  #endif
    }
!   return (UseAVX> 2) ? 6 : 4;
  }
  
  static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
    if( cbuf ) {
      emit_opcode(*cbuf, 0x8B );

*** 939,951 ****
--- 969,980 ----
      src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
      dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
      calc_size += 3+src_offset_size + 3+dst_offset_size;
      break;
    case Op_VecX:
      calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
      break;
    case Op_VecY:
+   case Op_VecZ:
      calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
      break;
    default:
      ShouldNotReachHere();
    }

*** 972,981 ****
--- 1001,1015 ----
      case Op_VecY:
        __ vmovdqu(Address(rsp, -32), xmm0);
        __ vmovdqu(xmm0, Address(rsp, src_offset));
        __ vmovdqu(Address(rsp, dst_offset), xmm0);
        __ vmovdqu(xmm0, Address(rsp, -32));
+     case Op_VecZ:
+       __ evmovdqu(Address(rsp, -64), xmm0, 2);
+       __ evmovdqu(xmm0, Address(rsp, src_offset), 2);
+       __ evmovdqu(Address(rsp, dst_offset), xmm0, 2);
+       __ evmovdqu(xmm0, Address(rsp, -64), 2);
        break;
      default:
        ShouldNotReachHere();
      }
      int size = __ offset() - offset;

*** 1007,1016 ****
--- 1041,1056 ----
        st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
                  "vmovdqu xmm0, [rsp + #%d]\n\t"
                  "vmovdqu [rsp + #%d], xmm0\n\t"
                  "vmovdqu xmm0, [rsp - #32]",
                  src_offset, dst_offset);
+     case Op_VecZ:
+       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
+                 "vmovdqu xmm0, [rsp + #%d]\n\t"
+                 "vmovdqu [rsp + #%d], xmm0\n\t"
+                 "vmovdqu xmm0, [rsp - #64]",
+                 src_offset, dst_offset);
        break;
      default:
        ShouldNotReachHere();
      }
  #endif

*** 1040,1050 ****
--- 1080,1090 ----
  
    if (bottom_type()->isa_vect() != NULL) {
      uint ireg = ideal_reg();
      assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
      assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
!     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
      if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
        // mem -> mem
        int src_offset = ra_->reg2offset(src_first);
        int dst_offset = ra_->reg2offset(dst_first);
        return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);

*** 3996,4020 ****
--- 4036,4093 ----
  %}
  
  // XMM Float register operands
  operand regF() %{
    predicate( UseSSE>=1 );
!   constraint(ALLOC_IN_RC(float_reg_legacy));
    match(RegF);
    format %{ %}
    interface(REG_INTER);
  %}
  
  // XMM Double register operands
  operand regD() %{
    predicate( UseSSE>=2 );
!   constraint(ALLOC_IN_RC(double_reg_legacy));
    match(RegD);
    format %{ %}
    interface(REG_INTER);
  %}
  
+ // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
+ // runtime code generation via reg_class_dynamic.
+ operand vecS() %{
+   constraint(ALLOC_IN_RC(vectors_reg_legacy));
+   match(VecS);
+ 
+   format %{ %}
+   interface(REG_INTER);
+ %}
+ 
+ operand vecD() %{
+   constraint(ALLOC_IN_RC(vectord_reg_legacy));
+   match(VecD);
+ 
+   format %{ %}
+   interface(REG_INTER);
+ %}
+ 
+ operand vecX() %{
+   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
+   match(VecX);
+ 
+   format %{ %}
+   interface(REG_INTER);
+ %}
+ 
+ operand vecY() %{
+   constraint(ALLOC_IN_RC(vectory_reg_legacy));
+   match(VecY);
+ 
+   format %{ %}
+   interface(REG_INTER);
+ %}
  
  //----------Memory Operands----------------------------------------------------
  // Direct Memory Operand
  operand direct(immP addr) %{
    match(addr);

*** 11166,11176 ****
--- 11239,11248 ----
    format %{ "MOV    $dst,$src.lo" %}
    ins_encode(enc_CopyL_Lo(dst,src));
    ins_pipe( ialu_reg_reg );
  %}
  
  
  instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
    match(Set dst (MoveF2I src));
    effect( DEF dst, USE src );
    ins_cost(100);
    format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
src/cpu/x86/vm/x86_32.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File