# HG changeset patch
# User aph
# Date 1461345837 0
#      Fri Apr 22 17:23:57 2016 +0000
# Node ID 3225a1eb6ffa160913e97d09ccd811f2c8987234
# Parent  0f1865d9ecda5d66c3a1ca2010697f4a368cf4da
8154957: AArch64: Better byte behavior
Summary:  The fix for 8132051 is needed for AArch64.
Reviewed-by: roland

diff --git a/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp b/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
+++ b/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
@@ -331,7 +331,7 @@
     length.load_item();
 
   }
-  if (needs_store_check) {
+  if (needs_store_check || x->check_boolean()) {
     value.load_item();
   } else {
     value.load_for_store(x->elt_type());
@@ -380,7 +380,8 @@
     // Seems to be a precise
     post_barrier(LIR_OprFact::address(array_addr), value.result());
   } else {
-    __ move(value.result(), array_addr, null_check_info);
+    LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
+    __ move(result, array_addr, null_check_info);
   }
 }
 
diff --git a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
@@ -40,7 +40,43 @@
 #include "runtime/thread.inline.hpp"
 
 
-// Implementation of InterpreterMacroAssembler
+void InterpreterMacroAssembler::narrow(Register result) {
+
+  // Get method->_constMethod->_result_type
+  ldr(rscratch1, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
+  ldr(rscratch1, Address(rscratch1, Method::const_offset()));
+  ldrb(rscratch1, Address(rscratch1, ConstMethod::result_type_offset()));
+
+  Label done, notBool, notByte, notChar;
+
+  // common case first
+  cmpw(rscratch1, T_INT);
+  br(Assembler::EQ, done);
+
+  // mask integer result to narrower return type.
+  cmpw(rscratch1, T_BOOLEAN);
+  br(Assembler::NE, notBool);
+  andw(result, result, 0x1);
+  b(done);
+
+  bind(notBool);
+  cmpw(rscratch1, T_BYTE);
+  br(Assembler::NE, notByte);
+  sbfx(result, result, 0, 8);
+  b(done);
+
+  bind(notByte);
+  cmpw(rscratch1, T_CHAR);
+  br(Assembler::NE, notChar);
+  ubfx(result, result, 0, 16);  // truncate upper 16 bits
+  b(done);
+
+  bind(notChar);
+  sbfx(result, result, 0, 16);     // sign-extend short
+
+  // Nothing to do for T_INT
+  bind(done);
+}
 
 void InterpreterMacroAssembler::jump_to_entry(address entry) {
   assert(entry, "Entry must have been generated by now");
@@ -81,6 +117,7 @@
                verify_oop(r0, state);               break;
     case ltos: ldr(r0, val_addr);                   break;
     case btos:                                   // fall through
+    case ztos:                                   // fall through
     case ctos:                                   // fall through
     case stos:                                   // fall through
     case itos: ldrw(r0, val_addr);                  break;
@@ -314,6 +351,7 @@
   switch (state) {
   case atos: pop_ptr();                 break;
   case btos:
+  case ztos:
   case ctos:
   case stos:
   case itos: pop_i();                   break;
@@ -331,6 +369,7 @@
   switch (state) {
   case atos: push_ptr();                break;
   case btos:
+  case ztos:
   case ctos:
   case stos:
   case itos: push_i();                  break;
diff --git a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
@@ -245,6 +245,9 @@
   void update_mdp_by_constant(Register mdp_in, int constant);
   void update_mdp_for_ret(Register return_bci);
 
+  // narrow int return value
+  void narrow(Register result);
+
   void profile_taken_branch(Register mdp, Register bumped_count);
   void profile_not_taken_branch(Register mdp);
   void profile_call(Register mdp);
diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@@ -1184,6 +1184,10 @@
                      Register result, Register cnt1,
                      int elem_size, bool is_string);
 
+  void fill_words(Register base, Register cnt, Register value);
+  void zero_words(Register base, Register cnt);
+  void zero_words(Register base, u_int64_t cnt);
+
   void byte_array_inflate(Register src, Register dst, Register len,
                           FloatRegister vtmp1, FloatRegister vtmp2,
                           FloatRegister vtmp3, Register tmp4);
diff --git a/src/cpu/aarch64/vm/templateTable_aarch64.cpp b/src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp
@@ -229,6 +229,7 @@
   switch (bc) {
   case Bytecodes::_fast_aputfield:
   case Bytecodes::_fast_bputfield:
+  case Bytecodes::_fast_zputfield:
   case Bytecodes::_fast_cputfield:
   case Bytecodes::_fast_dputfield:
   case Bytecodes::_fast_fputfield:
@@ -1082,6 +1083,17 @@
   // r1: index
   // r3: array
   index_check(r3, r1); // prefer index in r1
+
+  // Need to check whether array is boolean or byte
+  // since both types share the bastore bytecode.
+  __ load_klass(r2, r3);
+  __ ldrw(r2, Address(r2, Klass::layout_helper_offset()));
+  int diffbit_index = exact_log2(Klass::layout_helper_boolean_diffbit());
+  Label L_skip;
+  __ tbz(r2, diffbit_index, L_skip);
+  __ andw(r0, r0, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
+  __ bind(L_skip);
+
   __ lea(rscratch1, Address(r3, r1, Address::uxtw(0)));
   __ strb(r0, Address(rscratch1,
                       arrayOopDesc::base_offset_in_bytes(T_BYTE)));
@@ -2193,6 +2205,13 @@
   if (_desc->bytecode() == Bytecodes::_return)
     __ membar(MacroAssembler::StoreStore);
 
+  // Narrow result if state is itos but result type is smaller.
+  // Need to narrow in the return bytecode rather than in generate_return_entry
+  // since compiled code callers expect the result to already be narrowed.
+  if (state == itos) {
+    __ narrow(r0);
+  }
+
   __ remove_activation(state);
   __ ret(lr);
 }
@@ -2386,7 +2405,7 @@
 
   const Address field(obj, off);
 
-  Label Done, notByte, notInt, notShort, notChar,
+  Label Done, notByte, notBool, notInt, notShort, notChar,
               notLong, notFloat, notObj, notDouble;
 
   // x86 uses a shift and mask or wings it with a shift plus assert
@@ -2409,6 +2428,20 @@
   __ b(Done);
 
   __ bind(notByte);
+  __ cmp(flags, ztos);
+  __ br(Assembler::NE, notBool);
+
+  // ztos (same code as btos)
+  __ ldrsb(r0, field);
+  __ push(ztos);
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    // use btos rewriting, no truncating to t/f bit is needed for getfield.
+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
+  }
+  __ b(Done);
+
+  __ bind(notBool);
   __ cmp(flags, atos);
   __ br(Assembler::NE, notObj);
   // atos
@@ -2604,7 +2637,7 @@
   // field address
   const Address field(obj, off);
 
-  Label notByte, notInt, notShort, notChar,
+  Label notByte, notBool, notInt, notShort, notChar,
         notLong, notFloat, notObj, notDouble;
 
   // x86 uses a shift and mask or wings it with a shift plus assert
@@ -2629,6 +2662,22 @@
   }
 
   __ bind(notByte);
+  __ cmp(flags, ztos);
+  __ br(Assembler::NE, notBool);
+
+  // ztos
+  {
+    __ pop(ztos);
+    if (!is_static) pop_and_check_object(obj);
+    __ andw(r0, r0, 0x1);
+    __ strb(r0, field);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_zputfield, bc, r1, true, byte_no);
+    }
+    __ b(Done);
+  }
+
+  __ bind(notBool);
   __ cmp(flags, atos);
   __ br(Assembler::NE, notObj);
 
@@ -2783,6 +2832,7 @@
     switch (bytecode()) {          // load values into the jvalue object
     case Bytecodes::_fast_aputfield: __ push_ptr(r0); break;
     case Bytecodes::_fast_bputfield: // fall through
+    case Bytecodes::_fast_zputfield: // fall through
     case Bytecodes::_fast_sputfield: // fall through
     case Bytecodes::_fast_cputfield: // fall through
     case Bytecodes::_fast_iputfield: __ push_i(r0); break;
@@ -2808,6 +2858,7 @@
     switch (bytecode()) {             // restore tos values
     case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break;
     case Bytecodes::_fast_bputfield: // fall through
+    case Bytecodes::_fast_zputfield: // fall through
     case Bytecodes::_fast_sputfield: // fall through
     case Bytecodes::_fast_cputfield: // fall through
     case Bytecodes::_fast_iputfield: __ pop_i(r0); break;
@@ -2863,6 +2914,9 @@
   case Bytecodes::_fast_iputfield:
     __ strw(r0, field);
     break;
+  case Bytecodes::_fast_zputfield:
+    __ andw(r0, r0, 0x1);  // boolean is true if LSB is 1
+    // fall through to bputfield
   case Bytecodes::_fast_bputfield:
     __ strb(r0, field);
     break;