1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/shared/barrierSetAssembler.hpp"
  27 #include "gc/shared/collectedHeap.hpp"
  28 #include "runtime/thread.hpp"
  29 
  30 #define __ masm->
  31 
  32 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  33                                   Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) {
  34   bool in_heap = (decorators & IN_HEAP) != 0;
  35   bool in_native = (decorators & IN_NATIVE) != 0;
  36   switch (type) {
  37   case T_OBJECT:
  38   case T_ARRAY: {
  39     if (in_heap) {
  40 #ifdef AARCH64
  41       if (UseCompressedOops) {
  42         __ ldr_w(dst, src);
  43         __ decode_heap_oop(dst);
  44       } else
  45 #endif // AARCH64
  46       {
  47         __ ldr(dst, src);
  48       }
  49     } else {
  50       assert(in_native, "why else?");
  51       __ ldr(dst, src);
  52     }
  53     break;
  54   }
  55   case T_BOOLEAN: __ ldrb      (dst, src); break;
  56   case T_BYTE:    __ ldrsb     (dst, src); break;
  57   case T_CHAR:    __ ldrh      (dst, src); break;
  58   case T_SHORT:   __ ldrsh     (dst, src); break;
  59   case T_INT:     __ ldr_s32   (dst, src); break;
  60   case T_ADDRESS: __ ldr       (dst, src); break;
  61   case T_LONG:
  62 #ifdef AARCH64
  63     __ ldr                     (dst, src); break;
  64 #else
  65     assert(dst == noreg, "only to ltos");
  66     __ add                     (src.index(), src.index(), src.base());
  67     __ ldmia                   (src.index(), RegisterSet(R0_tos_lo) | RegisterSet(R1_tos_hi));
  68 #endif // AARCH64
  69     break;
  70 #ifdef __SOFTFP__
  71   case T_FLOAT:
  72     assert(dst == noreg, "only to ftos");
  73     __ ldr                     (R0_tos, src);
  74     break;
  75   case T_DOUBLE:
  76     assert(dst == noreg, "only to dtos");
  77     __ add                     (src.index(), src.index(), src.base());
  78     __ ldmia                   (src.index(), RegisterSet(R0_tos_lo) | RegisterSet(R1_tos_hi));
  79     break;
  80 #else
  81   case T_FLOAT:
  82     assert(dst == noreg, "only to ftos");
  83     __ add(src.index(), src.index(), src.base());
  84     __ ldr_float               (S0_tos, src.index());
  85     break;
  86   case T_DOUBLE:
  87     assert(dst == noreg, "only to dtos");
  88     __ add                     (src.index(), src.index(), src.base());
  89     __ ldr_double              (D0_tos, src.index());
  90     break;
  91 #endif
  92   default: Unimplemented();
  93   }
  94 
  95 }
  96 
  97 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
  98                                    Address obj, Register val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
  99   bool in_heap = (decorators & IN_HEAP) != 0;
 100   bool in_native = (decorators & IN_NATIVE) != 0;
 101   switch (type) {
 102   case T_OBJECT:
 103   case T_ARRAY: {
 104     if (in_heap) {
 105 #ifdef AARCH64
 106       if (UseCompressedOops) {
 107         assert(!dst.uses(src), "not enough registers");
 108         if (!is_null) {
 109           __ encode_heap_oop(src);
 110         }
 111         __ str_w(val, obj);
 112       } else
 113 #endif // AARCH64
 114       {
 115       __ str(val, obj);
 116       }
 117     } else {
 118       assert(in_native, "why else?");
 119       __ str(val, obj);
 120     }
 121     break;
 122   }
 123   case T_BOOLEAN:
 124     __ and_32(val, val, 1);
 125     __ strb(val, obj);
 126     break;
 127   case T_BYTE:    __ strb      (val, obj); break;
 128   case T_CHAR:    __ strh      (val, obj); break;
 129   case T_SHORT:   __ strh      (val, obj); break;
 130   case T_INT:     __ str       (val, obj); break;
 131   case T_ADDRESS: __ str       (val, obj); break;
 132   case T_LONG:
 133 #ifdef AARCH64
 134     __ str                     (val, obj); break;
 135 #else // AARCH64
 136     assert(val == noreg, "only tos");
 137     __ add                     (obj.index(), obj.index(), obj.base());
 138     __ stmia                   (obj.index(), RegisterSet(R0_tos_lo) | RegisterSet(R1_tos_hi));
 139 #endif // AARCH64
 140     break;
 141 #ifdef __SOFTFP__
 142   case T_FLOAT:
 143     assert(val == noreg, "only tos");
 144     __ str (R0_tos,  obj);
 145     break;
 146   case T_DOUBLE:
 147     assert(val == noreg, "only tos");
 148     __ add                     (obj.index(), obj.index(), obj.base());
 149     __ stmia                   (obj.index(), RegisterSet(R0_tos_lo) | RegisterSet(R1_tos_hi));
 150     break;
 151 #else
 152   case T_FLOAT:
 153     assert(val == noreg, "only tos");
 154     __ add                     (obj.index(), obj.index(), obj.base());
 155     __ str_float               (S0_tos,  obj.index());
 156     break;
 157   case T_DOUBLE:
 158     assert(val == noreg, "only tos");
 159     __ add                     (obj.index(), obj.index(), obj.base());
 160     __ str_double              (D0_tos,  obj.index());
 161     break;
 162 #endif
 163   default: Unimplemented();
 164   }
 165 }
 166 
 167 void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
 168                                      Register obj1, Register obj2) {
 169   __ cmp(obj1, obj2);
 170 }
 171 
 172 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
 173 void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1, Register tmp2,
 174                                  RegisterOrConstant size_expression, Label& slow_case) {
 175   if (!Universe::heap()->supports_inline_contig_alloc()) {
 176     __ b(slow_case);
 177     return;
 178   }
 179 
 180   CollectedHeap* ch = Universe::heap();
 181 
 182   const Register top_addr = tmp1;
 183   const Register heap_end = tmp2;
 184 
 185   if (size_expression.is_register()) {
 186     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
 187   } else {
 188     assert_different_registers(obj, obj_end, top_addr, heap_end);
 189   }
 190 
 191   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
 192   if (load_const) {
 193     __ mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
 194   } else {
 195     __ ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
 196   }
 197   // Calculate new heap_top by adding the size of the object
 198   Label retry;
 199   __ bind(retry);
 200 
 201 #ifdef AARCH64
 202   __ ldxr(obj, top_addr);
 203 #else
 204   __ ldr(obj, Address(top_addr));
 205 #endif // AARCH64
 206 
 207   __ ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
 208   __ add_rc(obj_end, obj, size_expression);
 209   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
 210   __ cmp(obj_end, obj);
 211   __ b(slow_case, lo);
 212   // Update heap_top if allocation succeeded
 213   __ cmp(obj_end, heap_end);
 214   __ b(slow_case, hi);
 215 
 216 #ifdef AARCH64
 217   __ stxr(heap_end/*scratched*/, obj_end, top_addr);
 218   __ cbnz_w(heap_end, retry);
 219 #else
 220   __ atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
 221   __ b(retry, ne);
 222 #endif // AARCH64
 223 
 224   incr_allocated_bytes(masm, size_expression, tmp1);
 225 }
 226 
 227 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
 228 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1,
 229                                  RegisterOrConstant size_expression, Label& slow_case) {
 230   const Register tlab_end = tmp1;
 231   assert_different_registers(obj, obj_end, tlab_end);
 232 
 233   __ ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
 234   __ ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
 235   __ add_rc(obj_end, obj, size_expression);
 236   __ cmp(obj_end, tlab_end);
 237   __ b(slow_case, hi);
 238   __ str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
 239 }
 240 
 241 void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, RegisterOrConstant size_in_bytes, Register tmp) {
 242 #ifdef AARCH64
 243   __ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
 244   __ add_rc(tmp, tmp, size_in_bytes);
 245   __ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
 246 #else
 247   // Bump total bytes allocated by this thread
 248   Label done;
 249 
 250   // Borrow the Rthread for alloc counter
 251   Register Ralloc = Rthread;
 252   __ add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
 253   __ ldr(tmp, Address(Ralloc));
 254   __ adds(tmp, tmp, size_in_bytes);
 255   __ str(tmp, Address(Ralloc), cc);
 256   __ b(done, cc);
 257 
 258   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
 259   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
 260   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
 261   Register low, high;
 262   // Select ether R0/R1 or R2/R3
 263 
 264   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
 265     low = R2;
 266     high  = R3;
 267   } else {
 268     low = R0;
 269     high  = R1;
 270   }
 271   __ push(RegisterSet(low, high));
 272 
 273   __ ldrd(low, Address(Ralloc));
 274   __ adds(low, low, size_in_bytes);
 275   __ adc(high, high, 0);
 276   __ strd(low, Address(Ralloc));
 277 
 278   __ pop(RegisterSet(low, high));
 279 
 280   __ bind(done);
 281 
 282   // Unborrow the Rthread
 283   __ sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
 284 #endif // AARCH64
 285 }