< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page

        

@@ -45,10 +45,11 @@
 #include "runtime/safepointMechanism.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/thread.hpp"
 #include "utilities/macros.hpp"
+#include "vmreg_x86.inline.hpp"
 #include "crc32c.h"
 #ifdef COMPILER2
 #include "opto/intrinsicnode.hpp"
 #endif
 

@@ -2423,10 +2424,14 @@
   pass_arg1(this, arg_1);
   pass_arg0(this, arg_0);
   call_VM_leaf(entry_point, 3);
 }
 
+void MacroAssembler::super_call_VM_leaf(address entry_point) {
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
   pass_arg0(this, arg_0);
   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 }
 

@@ -3417,10 +3422,55 @@
     // nothing to do, (later) access of M[reg + offset]
     // will provoke OS NULL exception if reg = NULL
   }
 }
 
+void MacroAssembler::test_klass_is_value(Register klass, Register temp_reg, Label& is_value) {
+  movl(temp_reg, Address(klass, Klass::access_flags_offset()));
+  testl(temp_reg, JVM_ACC_VALUE);
+  jcc(Assembler::notZero, is_value);
+}
+
+void MacroAssembler::test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable) {
+  movl(temp_reg, flags);
+  shrl(temp_reg, ConstantPoolCacheEntry::is_flattenable_field_shift);
+  andl(temp_reg, 0x1);
+  testl(temp_reg, temp_reg);
+  jcc(Assembler::notZero, is_flattenable);
+}
+
+void MacroAssembler::test_field_is_not_flattenable(Register flags, Register temp_reg, Label& notFlattenable) {
+  movl(temp_reg, flags);
+  shrl(temp_reg, ConstantPoolCacheEntry::is_flattenable_field_shift);
+  andl(temp_reg, 0x1);
+  testl(temp_reg, temp_reg);
+  jcc(Assembler::zero, notFlattenable);
+}
+
+void MacroAssembler::test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened) {
+  movl(temp_reg, flags);
+  shrl(temp_reg, ConstantPoolCacheEntry::is_flattened_field_shift);
+  andl(temp_reg, 0x1);
+  testl(temp_reg, temp_reg);
+  jcc(Assembler::notZero, is_flattened);
+}
+
+void MacroAssembler::test_flat_array_klass(Register klass, Register temp_reg,
+                                           Label& is_flat_array) {
+  movl(temp_reg, Address(klass, Klass::layout_helper_offset()));
+  sarl(temp_reg, Klass::_lh_array_tag_shift);
+  cmpl(temp_reg, Klass::_lh_array_tag_vt_value);
+  jcc(Assembler::equal, is_flat_array);
+}
+
+
+void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg,
+                                         Label& is_flat_array) {
+  load_klass(temp_reg, oop);
+  test_flat_array_klass(temp_reg, temp_reg, is_flat_array);
+}
+
 void MacroAssembler::os_breakpoint() {
   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
   // (e.g., MSVC can't call ps() otherwise)
   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 }

@@ -4469,11 +4519,15 @@
     bind(L);
   }
 }
 
 void MacroAssembler::verify_oop(Register reg, const char* s) {
-  if (!VerifyOops) return;
+  if (!VerifyOops || VerifyAdapterSharing) {
+    // Below address of the code string confuses VerifyAdapterSharing
+    // because it may differ between otherwise equivalent adapters.
+    return;
+  }
 
   // Pass register number to verify_oop_subroutine
   const char* b = NULL;
   {
     ResourceMark rm;

@@ -4559,11 +4613,15 @@
   return Address(rsp, scale_reg, scale_factor, offset);
 }
 
 
 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-  if (!VerifyOops) return;
+  if (!VerifyOops || VerifyAdapterSharing) {
+    // Below address of the code string confuses VerifyAdapterSharing
+    // because it may differ between otherwise equivalent adapters.
+    return;
+  }
 
   // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
   // Pass register number to verify_oop_subroutine
   const char* b = NULL;
   {

@@ -5435,11 +5493,16 @@
 }
 
 #endif // _LP64
 
 // C2 compiled method's prolog code.
-void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
+void MacroAssembler::verified_entry(Compile* C, int sp_inc) {
+  int framesize = C->frame_size_in_bytes();
+  int bangsize = C->bang_size_in_bytes();
+  bool fp_mode_24b = C->in_24_bit_fp_mode();
+  int stack_bang_size = C->need_stack_bang(bangsize) ? bangsize : 0;
+  bool is_stub = C->stub_function() != NULL;
 
   // WARNING: Initial instruction MUST be 5 bytes or longer so that
   // NativeJump::patch_verified_entry will be able to patch out the entry
   // code safely. The push to verify stack depth is ok at 5 bytes,
   // the frame allocation can be either 3 or 6 bytes. So if we don't do

@@ -5488,10 +5551,16 @@
         addptr(rbp, framesize);
       }
     }
   }
 
+  if (C->needs_stack_repair()) {
+    // Save stack increment (also account for fixed framesize and rbp)
+    assert((sp_inc & (StackAlignmentInBytes-1)) == 0, "stack increment not aligned");
+    movptr(Address(rsp, C->sp_inc_offset()), sp_inc + framesize + wordSize);
+  }
+
   if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
     framesize -= wordSize;
     movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
   }
 

@@ -5524,18 +5593,20 @@
     bs->nmethod_entry_barrier(this);
   }
 }
 
 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
-void MacroAssembler::xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp) {
+void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp) {
   // cnt - number of qwords (8-byte words).
   // base - start address, qword aligned.
   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
+  movdq(xtmp, val);
   if (UseAVX >= 2) {
-    vpxor(xtmp, xtmp, xtmp, AVX_256bit);
+    punpcklqdq(xtmp, xtmp);
+    vinserti128_high(xtmp, xtmp);
   } else {
-    pxor(xtmp, xtmp);
+    punpcklqdq(xtmp, xtmp);
   }
   jmp(L_zero_64_bytes);
 
   BIND(L_loop);
   if (UseAVX >= 2) {

@@ -5575,26 +5646,307 @@
   decrement(cnt);
   jccb(Assembler::greaterEqual, L_sloop);
   BIND(L_end);
 }
 
-void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, bool is_large) {
+// Move a value between registers/stack slots and update the reg_state
+bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[], int ret_off) {
+  if (reg_state[to->value()] == reg_written) {
+    return true; // Already written
+  }
+  if (from != to && bt != T_VOID) {
+    if (reg_state[to->value()] == reg_readonly) {
+      return false; // Not yet writable
+    }
+    if (from->is_reg()) {
+      if (to->is_reg()) {
+        if (from->is_XMMRegister()) {
+          if (bt == T_DOUBLE) {
+            movdbl(to->as_XMMRegister(), from->as_XMMRegister());
+          } else {
+            assert(bt == T_FLOAT, "must be float");
+            movflt(to->as_XMMRegister(), from->as_XMMRegister());
+          }
+        } else {
+          movq(to->as_Register(), from->as_Register());
+        }
+      } else {
+        int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
+        assert(st_off != ret_off, "overwriting return address at %d", st_off);
+        Address to_addr = Address(rsp, st_off);
+        if (from->is_XMMRegister()) {
+          if (bt == T_DOUBLE) {
+            movdbl(to_addr, from->as_XMMRegister());
+          } else {
+            assert(bt == T_FLOAT, "must be float");
+            movflt(to_addr, from->as_XMMRegister());
+          }
+        } else {
+          movq(to_addr, from->as_Register());
+        }
+      }
+    } else {
+      Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize);
+      if (to->is_reg()) {
+        if (to->is_XMMRegister()) {
+          if (bt == T_DOUBLE) {
+            movdbl(to->as_XMMRegister(), from_addr);
+          } else {
+            assert(bt == T_FLOAT, "must be float");
+            movflt(to->as_XMMRegister(), from_addr);
+          }
+        } else {
+          movq(to->as_Register(), from_addr);
+        }
+      } else {
+        int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
+        assert(st_off != ret_off, "overwriting return address at %d", st_off);
+        movq(r13, from_addr);
+        movq(Address(rsp, st_off), r13);
+      }
+    }
+  }
+  // Update register states
+  reg_state[from->value()] = reg_writable;
+  reg_state[to->value()] = reg_written;
+  return true;
+}
+
+// Read all fields from a value type oop and store the values in registers/stack slots
+bool MacroAssembler::unpack_value_helper(const GrowableArray<SigEntry>* sig, int& sig_index, VMReg from, VMRegPair* regs_to, int& to_index, RegState reg_state[], int ret_off) {
+  Register fromReg = from->is_reg() ? from->as_Register() : noreg;
+  assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
+
+  int vt = 1;
+  bool done = true;
+  bool mark_done = true;
+  do {
+    sig_index--;
+    BasicType bt = sig->at(sig_index)._bt;
+    if (bt == T_VALUETYPE) {
+      vt--;
+    } else if (bt == T_VOID &&
+               sig->at(sig_index-1)._bt != T_LONG &&
+               sig->at(sig_index-1)._bt != T_DOUBLE) {
+      vt++;
+    } else if (SigEntry::is_reserved_entry(sig, sig_index)) {
+      to_index--; // Ignore this
+    } else {
+      assert(to_index >= 0, "invalid to_index");
+      VMRegPair pair_to = regs_to[to_index--];
+      VMReg to = pair_to.first();
+
+      if (bt == T_VOID) continue;
+
+      int idx = (int)to->value();
+      if (reg_state[idx] == reg_readonly) {
+         if (idx != from->value()) {
+           mark_done = false;
+         }
+         done = false;
+         continue;
+      } else if (reg_state[idx] == reg_written) {
+        continue;
+      } else {
+        assert(reg_state[idx] == reg_writable, "must be writable");
+        reg_state[idx] = reg_written;
+       }
+
+      if (fromReg == noreg) {
+        int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
+        movq(r10, Address(rsp, st_off));
+        fromReg = r10;
+      }
+
+      int off = sig->at(sig_index)._offset;
+      assert(off > 0, "offset in object should be positive");
+      bool is_oop = (bt == T_OBJECT || bt == T_ARRAY);
+
+      Address fromAddr = Address(fromReg, off);
+      bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
+      if (!to->is_XMMRegister()) {
+        Register dst = to->is_stack() ? r13 : to->as_Register();
+        if (is_oop) {
+          load_heap_oop(dst, fromAddr);
+        } else {
+          load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
+        }
+        if (to->is_stack()) {
+          int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
+          assert(st_off != ret_off, "overwriting return address at %d", st_off);
+          movq(Address(rsp, st_off), dst);
+        }
+      } else {
+        if (bt == T_DOUBLE) {
+          movdbl(to->as_XMMRegister(), fromAddr);
+        } else {
+          assert(bt == T_FLOAT, "must be float");
+          movflt(to->as_XMMRegister(), fromAddr);
+        }
+      }
+    }
+  } while (vt != 0);
+  if (mark_done && reg_state[from->value()] != reg_written) {
+    // This is okay because no one else will write to that slot
+    reg_state[from->value()] = reg_writable;
+  }
+  return done;
+}
+
+// Unpack all value type arguments passed as oops
+void MacroAssembler::unpack_value_args(Compile* C, bool receiver_only) {
+  assert(C->has_scalarized_args(), "value type argument scalarization is disabled");
+  Method* method = C->method()->get_Method();
+  const GrowableArray<SigEntry>* sig_cc = method->adapter()->get_sig_cc();
+  assert(sig_cc != NULL, "must have scalarized signature");
+
+  // Get unscalarized calling convention
+  BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, sig_cc->length());
+  int args_passed = 0;
+  if (!method->is_static()) {
+    sig_bt[args_passed++] = T_OBJECT;
+  }
+  if (!receiver_only) {
+    for (SignatureStream ss(method->signature()); !ss.at_return_type(); ss.next()) {
+      BasicType bt = ss.type();
+      sig_bt[args_passed++] = bt;
+      if (type2size[bt] == 2) {
+        sig_bt[args_passed++] = T_VOID;
+      }
+    }
+  } else {
+    // Only unpack the receiver, all other arguments are already scalarized
+    InstanceKlass* holder = method->method_holder();
+    int rec_len = holder->is_value() ? ValueKlass::cast(holder)->extended_sig()->length() : 1;
+    // Copy scalarized signature but skip receiver, value type delimiters and reserved entries
+    for (int i = 0; i < sig_cc->length(); i++) {
+      if (!SigEntry::is_reserved_entry(sig_cc, i)) {
+        if (SigEntry::skip_value_delimiters(sig_cc, i) && rec_len <= 0) {
+          sig_bt[args_passed++] = sig_cc->at(i)._bt;
+        }
+        rec_len--;
+      }
+    }
+  }
+  VMRegPair* regs = NEW_RESOURCE_ARRAY(VMRegPair, args_passed);
+  int args_on_stack = SharedRuntime::java_calling_convention(sig_bt, regs, args_passed, false);
+
+  // Get scalarized calling convention
+  int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt);
+  VMRegPair* regs_cc = NEW_RESOURCE_ARRAY(VMRegPair, sig_cc->length());
+  int args_on_stack_cc = SharedRuntime::java_calling_convention(sig_bt, regs_cc, args_passed_cc, false);
+
+  // Check if we need to extend the stack for unpacking
+  int sp_inc = (args_on_stack_cc - args_on_stack) * VMRegImpl::stack_slot_size;
+  if (sp_inc > 0) {
+    // Save the return address, adjust the stack (make sure it is properly
+    // 16-byte aligned) and copy the return address to the new top of the stack.
+    pop(r13);
+    sp_inc = align_up(sp_inc, StackAlignmentInBytes);
+    subptr(rsp, sp_inc);
+    push(r13);
+  } else {
+    // The scalarized calling convention needs less stack space than the unscalarized one.
+    // No need to extend the stack, the caller will take care of these adjustments.
+    sp_inc = 0;
+  }
+
+  // Initialize register/stack slot states (make all writable)
+  int max_stack = MAX2(args_on_stack + sp_inc/VMRegImpl::stack_slot_size, args_on_stack_cc);
+  int max_reg = VMRegImpl::stack2reg(max_stack)->value();
+  RegState* reg_state = NEW_RESOURCE_ARRAY(RegState, max_reg);
+  for (int i = 0; i < max_reg; ++i) {
+    reg_state[i] = reg_writable;
+  }
+  // Set all source registers/stack slots to readonly to prevent accidental overwriting
+  for (int i = 0; i < args_passed; ++i) {
+    VMReg reg = regs[i].first();
+    if (!reg->is_valid()) continue;
+    if (reg->is_stack()) {
+      // Update source stack location by adding stack increment
+      reg = VMRegImpl::stack2reg(reg->reg2stack() + sp_inc/VMRegImpl::stack_slot_size);
+      regs[i] = reg;
+    }
+    assert(reg->value() >= 0 && reg->value() < max_reg, "reg value out of bounds");
+    reg_state[reg->value()] = reg_readonly;
+  }
+
+  // Emit code for unpacking value type arguments
+  // We try multiple times and eventually start spilling to resolve (circular) dependencies
+  bool done = false;
+  for (int i = 0; i < 2*args_passed_cc && !done; ++i) {
+    done = true;
+    bool spill = (i > args_passed_cc); // Start spilling?
+    // Iterate over all arguments (in reverse)
+    for (int from_index = args_passed-1, to_index = args_passed_cc-1, sig_index = sig_cc->length()-1; sig_index >= 0; sig_index--) {
+      if (SigEntry::is_reserved_entry(sig_cc, sig_index)) {
+        to_index--; // Skip reserved entry
+      } else {
+        assert(from_index >= 0, "index out of bounds");
+        VMReg reg = regs[from_index].first();
+        if (spill && reg->is_valid() && reg_state[reg->value()] == reg_readonly) {
+          // Spill argument to be able to write the source and resolve circular dependencies
+          VMReg spill_reg = reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
+          bool res = move_helper(reg, spill_reg, T_DOUBLE, reg_state, sp_inc);
+          assert(res, "Spilling should not fail");
+          // Set spill_reg as new source and update state
+          reg = spill_reg;
+          regs[from_index].set1(reg);
+          reg_state[reg->value()] = reg_readonly;
+          spill = false; // Do not spill again in this round
+        }
+        BasicType bt = sig_cc->at(sig_index)._bt;
+        if (SigEntry::skip_value_delimiters(sig_cc, sig_index)) {
+          assert(to_index >= 0, "index out of bounds");
+          done &= move_helper(reg, regs_cc[to_index].first(), bt, reg_state, sp_inc);
+          to_index--;
+        } else if (!receiver_only || (from_index == 0 && bt == T_VOID)) {
+          done &= unpack_value_helper(sig_cc, sig_index, reg, regs_cc, to_index, reg_state, sp_inc);
+        } else {
+          continue;
+        }
+        from_index--;
+      }
+    }
+  }
+  guarantee(done, "Could not resolve circular dependency when unpacking value type arguments");
+
+  // Emit code for verified entry and save increment for stack repair on return
+  verified_entry(C, sp_inc);
+}
+
+// Restores the stack on return
+void MacroAssembler::restore_stack(Compile* C) {
+  int framesize = C->frame_size_in_bytes();
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+  // Remove word for return addr already pushed and RBP
+  framesize -= 2*wordSize;
+
+  if (C->needs_stack_repair()) {
+    // Restore rbp and repair rsp by adding the stack increment
+    movq(rbp, Address(rsp, framesize));
+    addq(rsp, Address(rsp, C->sp_inc_offset()));
+  } else {
+    if (framesize > 0) {
+      addq(rsp, framesize);
+    }
+    pop(rbp);
+  }
+}
+
+void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, bool is_large, bool word_copy_only) {
   // cnt - number of qwords (8-byte words).
   // base - start address, qword aligned.
   // is_large - if optimizers know cnt is larger than InitArrayShortSize
   assert(base==rdi, "base register must be edi for rep stos");
-  assert(tmp==rax,   "tmp register must be eax for rep stos");
+  assert(val==rax,   "tmp register must be eax for rep stos");
   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
   assert(InitArrayShortSize % BytesPerLong == 0,
     "InitArrayShortSize should be the multiple of BytesPerLong");
 
   Label DONE;
 
-  if (!is_large || !UseXMMForObjInit) {
-    xorptr(tmp, tmp);
-  }
-
   if (!is_large) {
     Label LOOP, LONG;
     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
     jccb(Assembler::greater, LONG);
 

@@ -5603,25 +5955,24 @@
     decrement(cnt);
     jccb(Assembler::negative, DONE); // Zero length
 
     // Use individual pointer-sized stores for small counts:
     BIND(LOOP);
-    movptr(Address(base, cnt, Address::times_ptr), tmp);
+    movptr(Address(base, cnt, Address::times_ptr), val);
     decrement(cnt);
     jccb(Assembler::greaterEqual, LOOP);
     jmpb(DONE);
 
     BIND(LONG);
   }
 
   // Use longer rep-prefixed ops for non-small counts:
-  if (UseFastStosb) {
+  if (UseFastStosb && !word_copy_only) {
     shlptr(cnt, 3); // convert to number of bytes
     rep_stosb();
   } else if (UseXMMForObjInit) {
-    movptr(tmp, base);
-    xmm_clear_mem(tmp, cnt, xtmp);
+    xmm_clear_mem(base, cnt, val, xtmp);
   } else {
     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
     rep_stos();
   }
 
< prev index next >