1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "gc/g1/g1BarrierSet.hpp"
  28 #include "gc/g1/g1BarrierSetAssembler.hpp"
  29 #include "gc/g1/g1CardTable.hpp"
  30 #include "gc/g1/g1ThreadLocalData.hpp"
  31 #include "gc/g1/heapRegion.hpp"
  32 #include "gc/shared/collectedHeap.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "runtime/sharedRuntime.hpp"
  35 #include "utilities/macros.hpp"
  36 
  37 #define __ masm->
  38 
  39 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
  40                                                             Register addr, Register count) {
  41   bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0;
  42 
  43   if (!dest_uninitialized) {
  44     Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
  45 #ifndef _LP64
  46     __ push(thread);
  47     __ get_thread(thread);
  48 #endif
  49 
  50     Label filtered;
  51     Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
  52     // Is marking active?
  53     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
  54       __ cmpl(in_progress, 0);
  55     } else {
  56       assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
  57       __ cmpb(in_progress, 0);
  58     }
  59 
  60     NOT_LP64(__ pop(thread);)
  61 
  62     __ jcc(Assembler::equal, filtered);
  63 
  64     __ pusha();                      // push registers
  65 #ifdef _LP64
  66     if (count == c_rarg0) {
  67       if (addr == c_rarg1) {
  68         // exactly backwards!!
  69         __ xchgptr(c_rarg1, c_rarg0);
  70       } else {
  71         __ movptr(c_rarg1, count);
  72         __ movptr(c_rarg0, addr);
  73       }
  74     } else {
  75       __ movptr(c_rarg0, addr);
  76       __ movptr(c_rarg1, count);
  77     }
  78     if (UseCompressedOops) {
  79       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_narrow_oop_entry), 2);
  80     } else {
  81       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry), 2);
  82     }
  83 #else
  84     __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry),
  85                     addr, count);
  86 #endif
  87     __ popa();
  88 
  89     __ bind(filtered);
  90   }
  91 }
  92 
  93 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
  94                                                              Register addr, Register count, Register tmp) {
  95   __ pusha();             // push registers (overkill)
  96 #ifdef _LP64
  97   if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
  98     assert_different_registers(c_rarg1, addr);
  99     __ mov(c_rarg1, count);
 100     __ mov(c_rarg0, addr);
 101   } else {
 102     assert_different_registers(c_rarg0, count);
 103     __ mov(c_rarg0, addr);
 104     __ mov(c_rarg1, count);
 105   }
 106   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry), 2);
 107 #else
 108   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry),
 109                   addr, count);
 110 #endif
 111   __ popa();
 112 }
 113 
 114 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 115                                     Register dst, Address src, Register tmp1, Register tmp_thread) {
 116   bool on_oop = type == T_OBJECT || type == T_ARRAY;
 117   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
 118   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
 119   bool on_reference = on_weak || on_phantom;
 120   ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
 121   if (on_oop && on_reference) {
 122     const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
 123     NOT_LP64(__ get_thread(thread));
 124 
 125     // Generate the G1 pre-barrier code to log the value of
 126     // the referent field in an SATB buffer.
 127     g1_write_barrier_pre(masm /* masm */,
 128                          noreg /* obj */,
 129                          dst /* pre_val */,
 130                          thread /* thread */,
 131                          tmp1 /* tmp */,
 132                          true /* tosca_live */,
 133                          true /* expand_call */);
 134   }
 135 }
 136 
 137 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
 138                                                  Register obj,
 139                                                  Register pre_val,
 140                                                  Register thread,
 141                                                  Register tmp,
 142                                                  bool tosca_live,
 143                                                  bool expand_call) {
 144   // If expand_call is true then we expand the call_VM_leaf macro
 145   // directly to skip generating the check by
 146   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
 147 
 148 #ifdef _LP64
 149   assert(thread == r15_thread, "must be");
 150 #endif // _LP64
 151 
 152   Label done;
 153   Label runtime;
 154 
 155   assert(pre_val != noreg, "check this code");
 156 
 157   if (obj != noreg) {
 158     assert_different_registers(obj, pre_val, tmp);
 159     assert(pre_val != rax, "check this code");
 160   }
 161 
 162   Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
 163   Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
 164   Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
 165 
 166   // Is marking active?
 167   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 168     __ cmpl(in_progress, 0);
 169   } else {
 170     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 171     __ cmpb(in_progress, 0);
 172   }
 173   __ jcc(Assembler::equal, done);
 174 
 175   // Do we need to load the previous value?
 176   if (obj != noreg) {
 177     __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
 178   }
 179 
 180   // Is the previous value null?
 181   __ cmpptr(pre_val, (int32_t) NULL_WORD);
 182   __ jcc(Assembler::equal, done);
 183 
 184   // Can we store original value in the thread's buffer?
 185   // Is index == 0?
 186   // (The index field is typed as size_t.)
 187 
 188   __ movptr(tmp, index);                   // tmp := *index_adr
 189   __ cmpptr(tmp, 0);                       // tmp == 0?
 190   __ jcc(Assembler::equal, runtime);       // If yes, goto runtime
 191 
 192   __ subptr(tmp, wordSize);                // tmp := tmp - wordSize
 193   __ movptr(index, tmp);                   // *index_adr := tmp
 194   __ addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
 195 
 196   // Record the previous value
 197   __ movptr(Address(tmp, 0), pre_val);
 198   __ jmp(done);
 199 
 200   __ bind(runtime);
 201   // save the live input values
 202   if(tosca_live) __ push(rax);
 203 
 204   if (obj != noreg && obj != rax)
 205     __ push(obj);
 206 
 207   if (pre_val != rax)
 208     __ push(pre_val);
 209 
 210   // Calling the runtime using the regular call_VM_leaf mechanism generates
 211   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
 212   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
 213   //
 214   // If we care generating the pre-barrier without a frame (e.g. in the
 215   // intrinsified Reference.get() routine) then ebp might be pointing to
 216   // the caller frame and so this check will most likely fail at runtime.
 217   //
 218   // Expanding the call directly bypasses the generation of the check.
 219   // So when we do not have have a full interpreter frame on the stack
 220   // expand_call should be passed true.
 221 
 222   NOT_LP64( __ push(thread); )
 223 
 224   if (expand_call) {
 225     LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
 226 #ifdef _LP64
 227     if (c_rarg1 != thread) {
 228       __ mov(c_rarg1, thread);
 229     }
 230     if (c_rarg0 != pre_val) {
 231       __ mov(c_rarg0, pre_val);
 232     }
 233 #else
 234     __ push(thread);
 235     __ push(pre_val);
 236 #endif
 237     __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
 238   } else {
 239     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
 240   }
 241 
 242   NOT_LP64( __ pop(thread); )
 243 
 244   // save the live input values
 245   if (pre_val != rax)
 246     __ pop(pre_val);
 247 
 248   if (obj != noreg && obj != rax)
 249     __ pop(obj);
 250 
 251   if(tosca_live) __ pop(rax);
 252 
 253   __ bind(done);
 254 }
 255 
 256 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
 257                                                   Register store_addr,
 258                                                   Register new_val,
 259                                                   Register thread,
 260                                                   Register tmp,
 261                                                   Register tmp2) {
 262 #ifdef _LP64
 263   assert(thread == r15_thread, "must be");
 264 #endif // _LP64
 265 
 266   Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
 267   Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
 268 
 269   CardTableBarrierSet* ct =
 270     barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
 271   assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
 272 
 273   Label done;
 274   Label runtime;
 275 
 276   // Does store cross heap regions?
 277 
 278   __ movptr(tmp, store_addr);
 279   __ xorptr(tmp, new_val);
 280   __ shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
 281   __ jcc(Assembler::equal, done);
 282 
 283   // crosses regions, storing NULL?
 284 
 285   __ cmpptr(new_val, (int32_t) NULL_WORD);
 286   __ jcc(Assembler::equal, done);
 287 
 288   // storing region crossing non-NULL, is card already dirty?
 289 
 290   const Register card_addr = tmp;
 291   const Register cardtable = tmp2;
 292 
 293   __ movptr(card_addr, store_addr);
 294   __ shrptr(card_addr, CardTable::card_shift);
 295   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
 296   // a valid address and therefore is not properly handled by the relocation code.
 297   __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
 298   __ addptr(card_addr, cardtable);
 299 
 300   __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
 301   __ jcc(Assembler::equal, done);
 302 
 303   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
 304   __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
 305   __ jcc(Assembler::equal, done);
 306 
 307 
 308   // storing a region crossing, non-NULL oop, card is clean.
 309   // dirty card and log.
 310 
 311   __ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
 312 
 313   __ cmpl(queue_index, 0);
 314   __ jcc(Assembler::equal, runtime);
 315   __ subl(queue_index, wordSize);
 316   __ movptr(tmp2, buffer);
 317 #ifdef _LP64
 318   __ movslq(rscratch1, queue_index);
 319   __ addq(tmp2, rscratch1);
 320   __ movq(Address(tmp2, 0), card_addr);
 321 #else
 322   __ addl(tmp2, queue_index);
 323   __ movl(Address(tmp2, 0), card_addr);
 324 #endif
 325   __ jmp(done);
 326 
 327   __ bind(runtime);
 328   // save the live input values
 329   __ push(store_addr);
 330   __ push(new_val);
 331 #ifdef _LP64
 332   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
 333 #else
 334   __ push(thread);
 335   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
 336   __ pop(thread);
 337 #endif
 338   __ pop(new_val);
 339   __ pop(store_addr);
 340 
 341   __ bind(done);
 342 }
 343 
 344 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 345                                          Address dst, Register val, Register tmp1, Register tmp2) {
 346   bool in_heap = (decorators & IN_HEAP) != 0;
 347   bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
 348 
 349   bool needs_pre_barrier = in_heap || in_concurrent_root;
 350   bool needs_post_barrier = val != noreg && in_heap;
 351 
 352   Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);
 353   Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
 354   // flatten object address if needed
 355   // We do it regardless of precise because we need the registers
 356   if (dst.index() == noreg && dst.disp() == 0) {
 357     if (dst.base() != tmp1) {
 358       __ movptr(tmp1, dst.base());
 359     }
 360   } else {
 361     __ lea(tmp1, dst);
 362   }
 363 
 364 #ifndef _LP64
 365   InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm);
 366 #endif
 367 
 368   NOT_LP64(__ get_thread(rcx));
 369   NOT_LP64(imasm->save_bcp());
 370 
 371   if (needs_pre_barrier) {
 372     g1_write_barrier_pre(masm /*masm*/,
 373                          tmp1 /* obj */,
 374                          tmp2 /* pre_val */,
 375                          rthread /* thread */,
 376                          tmp3  /* tmp */,
 377                          val != noreg /* tosca_live */,
 378                          false /* expand_call */);
 379   }
 380   if (val == noreg) {
 381     BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
 382   } else {
 383     Register new_val = val;
 384     if (needs_post_barrier) {
 385       // G1 barrier needs uncompressed oop for region cross check.
 386       if (UseCompressedOops) {
 387         new_val = tmp2;
 388         __ movptr(new_val, val);
 389       }
 390     }
 391     BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
 392     if (needs_post_barrier) {
 393       g1_write_barrier_post(masm /*masm*/,
 394                             tmp1 /* store_adr */,
 395                             new_val /* new_val */,
 396                             rthread /* thread */,
 397                             tmp3 /* tmp */,
 398                             tmp2 /* tmp2 */);
 399     }
 400   }
 401   NOT_LP64(imasm->restore_bcp());
 402 }