1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "gc/g1/g1BarrierSet.hpp"
  28 #include "gc/g1/g1CardTable.hpp"
  29 #include "gc/g1/g1BarrierSetAssembler.hpp"
  30 #include "gc/g1/g1ThreadLocalData.hpp"
  31 #include "gc/g1/heapRegion.hpp"
  32 #include "interpreter/interp_masm.hpp"
  33 #include "runtime/sharedRuntime.hpp"
  34 #include "utilities/macros.hpp"
  35 
  36 #define __ masm->
  37 
  38 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
  39                                                             Register addr, Register count) {
  40   bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0;
  41   // With G1, don't generate the call if we statically know that the target in uninitialized
  42   if (!dest_uninitialized) {
  43     Register tmp = O5;
  44     assert_different_registers(addr, count, tmp);
  45     Label filtered;
  46     // Is marking active?
  47     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
  48       __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
  49     } else {
  50       guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
  51       __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
  52     }
  53     // Is marking active?
  54     __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
  55 
  56     __ save_frame(0);
  57     // Save the necessary global regs... will be used after.
  58     if (addr->is_global()) {
  59       __ mov(addr, L0);
  60     }
  61     if (count->is_global()) {
  62       __ mov(count, L1);
  63     }
  64     __ mov(addr->after_save(), O0);
  65     // Get the count into O1
  66     address slowpath = UseCompressedOops ? CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_narrow_oop_entry)
  67                                          : CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry);
  68     __ call(slowpath);
  69     __ delayed()->mov(count->after_save(), O1);
  70     if (addr->is_global()) {
  71       __ mov(L0, addr);
  72     }
  73     if (count->is_global()) {
  74       __ mov(L1, count);
  75     }
  76     __ restore();
  77 
  78     __ bind(filtered);
  79     DEBUG_ONLY(__ set(0xDEADC0DE, tmp);) // we have killed tmp
  80   }
  81 }
  82 
  83 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
  84                                                              Register addr, Register count, Register tmp) {
  85   // Get some new fresh output registers.
  86   __ save_frame(0);
  87   __ mov(addr->after_save(), O0);
  88   __ call(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry));
  89   __ delayed()->mov(count->after_save(), O1);
  90   __ restore();
  91 }
  92 
  93 #undef __
  94 
  95 static address satb_log_enqueue_with_frame = NULL;
  96 static u_char* satb_log_enqueue_with_frame_end = NULL;
  97 
  98 static address satb_log_enqueue_frameless = NULL;
  99 static u_char* satb_log_enqueue_frameless_end = NULL;
 100 
 101 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
 102 
 103 static void generate_satb_log_enqueue(bool with_frame) {
 104   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
 105   CodeBuffer buf(bb);
 106   MacroAssembler masm(&buf);
 107 
 108 #define __ masm.
 109 
 110   address start = __ pc();
 111   Register pre_val;
 112 
 113   Label refill, restart;
 114   if (with_frame) {
 115     __ save_frame(0);
 116     pre_val = I0;  // Was O0 before the save.
 117   } else {
 118     pre_val = O0;
 119   }
 120 
 121   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
 122   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
 123 
 124   assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) &&
 125          in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t),
 126          "check sizes in assembly below");
 127 
 128   __ bind(restart);
 129 
 130   // Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t
 131   // so ld_ptr is appropriate.
 132   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
 133 
 134   // index == 0?
 135   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
 136 
 137   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
 138   __ sub(L0, oopSize, L0);
 139 
 140   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
 141   if (!with_frame) {
 142     // Use return-from-leaf
 143     __ retl();
 144     __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
 145   } else {
 146     // Not delayed.
 147     __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
 148   }
 149   if (with_frame) {
 150     __ ret();
 151     __ delayed()->restore();
 152   }
 153   __ bind(refill);
 154 
 155   address handle_zero =
 156     CAST_FROM_FN_PTR(address,
 157                      &SATBMarkQueueSet::handle_zero_index_for_thread);
 158   // This should be rare enough that we can afford to save all the
 159   // scratch registers that the calling context might be using.
 160   __ mov(G1_scratch, L0);
 161   __ mov(G3_scratch, L1);
 162   __ mov(G4, L2);
 163   // We need the value of O0 above (for the write into the buffer), so we
 164   // save and restore it.
 165   __ mov(O0, L3);
 166   // Since the call will overwrite O7, we save and restore that, as well.
 167   __ mov(O7, L4);
 168   __ call_VM_leaf(L5, handle_zero, G2_thread);
 169   __ mov(L0, G1_scratch);
 170   __ mov(L1, G3_scratch);
 171   __ mov(L2, G4);
 172   __ mov(L3, O0);
 173   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 174   __ delayed()->mov(L4, O7);
 175 
 176   if (with_frame) {
 177     satb_log_enqueue_with_frame = start;
 178     satb_log_enqueue_with_frame_end = __ pc();
 179   } else {
 180     satb_log_enqueue_frameless = start;
 181     satb_log_enqueue_frameless_end = __ pc();
 182   }
 183 
 184 #undef __
 185 }
 186 
 187 #define __ masm->
 188 
 189 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
 190                                                  Register obj,
 191                                                  Register index,
 192                                                  int offset,
 193                                                  Register pre_val,
 194                                                  Register tmp,
 195                                                  bool preserve_o_regs) {
 196   Label filtered;
 197 
 198   if (obj == noreg) {
 199     // We are not loading the previous value so make
 200     // sure that we don't trash the value in pre_val
 201     // with the code below.
 202     assert_different_registers(pre_val, tmp);
 203   } else {
 204     // We will be loading the previous value
 205     // in this code so...
 206     assert(offset == 0 || index == noreg, "choose one");
 207     assert(pre_val == noreg, "check this code");
 208   }
 209 
 210   // Is marking active?
 211   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 212     __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 213   } else {
 214     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 215     __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 216   }
 217 
 218   // Is marking active?
 219   __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 220 
 221   // Do we need to load the previous value?
 222   if (obj != noreg) {
 223     // Load the previous value...
 224     if (index == noreg) {
 225       if (Assembler::is_simm13(offset)) {
 226         __ load_heap_oop(obj, offset, tmp);
 227       } else {
 228         __ set(offset, tmp);
 229         __ load_heap_oop(obj, tmp, tmp);
 230       }
 231     } else {
 232       __ load_heap_oop(obj, index, tmp);
 233     }
 234     // Previous value has been loaded into tmp
 235     pre_val = tmp;
 236   }
 237 
 238   assert(pre_val != noreg, "must have a real register");
 239 
 240   // Is the previous value null?
 241   __ cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
 242 
 243   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
 244   // case, pre_val will be a scratch G-reg, but there are some cases in
 245   // which it's an O-reg.  In the first case, do a normal call.  In the
 246   // latter, do a save here and call the frameless version.
 247 
 248   guarantee(pre_val->is_global() || pre_val->is_out(),
 249             "Or we need to think harder.");
 250 
 251   if (pre_val->is_global() && !preserve_o_regs) {
 252     __ call(satb_log_enqueue_with_frame);
 253     __ delayed()->mov(pre_val, O0);
 254   } else {
 255     __ save_frame(0);
 256     __ call(satb_log_enqueue_frameless);
 257     __ delayed()->mov(pre_val->after_save(), O0);
 258     __ restore();
 259   }
 260 
 261   __ bind(filtered);
 262 }
 263 
 264 #undef __
 265 
 266 static address dirty_card_log_enqueue = 0;
 267 static u_char* dirty_card_log_enqueue_end = 0;
 268 
 269 // This gets to assume that o0 contains the object address.
 270 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
 271   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
 272   CodeBuffer buf(bb);
 273   MacroAssembler masm(&buf);
 274 #define __ masm.
 275   address start = __ pc();
 276 
 277   Label not_already_dirty, restart, refill, young_card;
 278 
 279   __ srlx(O0, CardTable::card_shift, O0);
 280   AddressLiteral addrlit(byte_map_base);
 281   __ set(addrlit, O1); // O1 := <card table base>
 282   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 283 
 284   __ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
 285 
 286   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
 287   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 288 
 289   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
 290   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 291 
 292   __ bind(young_card);
 293   // We didn't take the branch, so we're already dirty: return.
 294   // Use return-from-leaf
 295   __ retl();
 296   __ delayed()->nop();
 297 
 298   // Not dirty.
 299   __ bind(not_already_dirty);
 300 
 301   // Get O0 + O1 into a reg by itself
 302   __ add(O0, O1, O3);
 303 
 304   // First, dirty it.
 305   __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
 306 
 307   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
 308   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
 309   __ bind(restart);
 310 
 311   // Load the index into the update buffer. DirtyCardQueue::_index is
 312   // a size_t so ld_ptr is appropriate here.
 313   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
 314 
 315   // index == 0?
 316   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
 317 
 318   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
 319   __ sub(L0, oopSize, L0);
 320 
 321   __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
 322   // Use return-from-leaf
 323   __ retl();
 324   __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
 325 
 326   __ bind(refill);
 327   address handle_zero =
 328     CAST_FROM_FN_PTR(address,
 329                      &DirtyCardQueueSet::handle_zero_index_for_thread);
 330   // This should be rare enough that we can afford to save all the
 331   // scratch registers that the calling context might be using.
 332   __ mov(G1_scratch, L3);
 333   __ mov(G3_scratch, L5);
 334   // We need the value of O3 above (for the write into the buffer), so we
 335   // save and restore it.
 336   __ mov(O3, L6);
 337   // Since the call will overwrite O7, we save and restore that, as well.
 338   __ mov(O7, L4);
 339 
 340   __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
 341   __ mov(L3, G1_scratch);
 342   __ mov(L5, G3_scratch);
 343   __ mov(L6, O3);
 344   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 345   __ delayed()->mov(L4, O7);
 346 
 347   dirty_card_log_enqueue = start;
 348   dirty_card_log_enqueue_end = __ pc();
 349   // XXX Should have a guarantee here about not going off the end!
 350   // Does it already do so?  Do an experiment...
 351 
 352 #undef __
 353 
 354 }
 355 
 356 #define __ masm->
 357 
 358 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) {
 359   Label filtered;
 360   MacroAssembler* post_filter_masm = masm;
 361 
 362   if (new_val == G0) return;
 363 
 364   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
 365 
 366   if (G1RSBarrierRegionFilter) {
 367     __ xor3(store_addr, new_val, tmp);
 368     __ srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
 369 
 370     __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 371   }
 372 
 373   // If the "store_addr" register is an "in" or "local" register, move it to
 374   // a scratch reg so we can pass it as an argument.
 375   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
 376   // Pick a scratch register different from "tmp".
 377   Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
 378   // Make sure we use up the delay slot!
 379   if (use_scr) {
 380     post_filter_masm->mov(store_addr, scr);
 381   } else {
 382     post_filter_masm->nop();
 383   }
 384   __ save_frame(0);
 385   __ call(dirty_card_log_enqueue);
 386   if (use_scr) {
 387     __ delayed()->mov(scr, O0);
 388   } else {
 389     __ delayed()->mov(store_addr->after_save(), O0);
 390   }
 391   __ restore();
 392 
 393   __ bind(filtered);
 394 }
 395 
 396 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 397                                          Register val, Address dst, Register tmp) {
 398   bool in_heap = (decorators & IN_HEAP) != 0;
 399   bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
 400 
 401   bool needs_pre_barrier = in_heap || in_concurrent_root;
 402   // No need for post barrier if storing NULL
 403   bool needs_post_barrier = val != G0 && in_heap;
 404 
 405   bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
 406   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
 407   bool precise = on_array || on_anonymous;
 408 
 409   Register index = dst.has_index() ? dst.index() : noreg;
 410   int disp = dst.has_disp() ? dst.disp() : 0;
 411 
 412   if (needs_pre_barrier) {
 413     // Load and record the previous value.
 414     g1_write_barrier_pre(masm, dst.base(), index, disp,
 415                          noreg /* pre_val */,
 416                          tmp, true /*preserve_o_regs*/);
 417   }
 418 
 419   Register new_val = val;
 420   if (needs_post_barrier) {
 421     // G1 barrier needs uncompressed oop for region cross check.
 422     if (UseCompressedOops && val != G0) {
 423       new_val = tmp;
 424       __ mov(val, new_val);
 425     }
 426   }
 427 
 428   BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp);
 429 
 430   if (needs_post_barrier) {
 431     Register base = dst.base();
 432     if (precise) {
 433       if (!dst.has_index()) {
 434         __ add(base, disp, base);
 435       } else {
 436         assert(!dst.has_disp(), "not supported yet");
 437         __ add(base, index, base);
 438       }
 439     }
 440     g1_write_barrier_post(masm, base, new_val, tmp);
 441   }
 442 }
 443 
 444 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 445                                     Address src, Register dst, Register tmp) {
 446   bool on_oop = type == T_OBJECT || type == T_ARRAY;
 447   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
 448   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
 449   bool on_reference = on_weak || on_phantom;
 450   // Load the value of the referent field.
 451   ModRefBarrierSetAssembler::load_at(masm, decorators, type, src, dst, tmp);
 452   if (on_oop && on_reference) {
 453     // Generate the G1 pre-barrier code to log the value of
 454     // the referent field in an SATB buffer. Note with
 455     // these parameters the pre-barrier does not generate
 456     // the load of the previous value
 457 
 458     Register pre_val = dst;
 459     bool saved = false;
 460     if (pre_val->is_in()) {
 461       // The g1_write_barrier_pre method assumes that the pre_val
 462       // is not in an input register.
 463       __ save_frame_and_mov(0, pre_val, O0);
 464       pre_val = O0;
 465       saved = true;
 466     }
 467 
 468     g1_write_barrier_pre(masm, noreg /* obj */, noreg /* index */, 0 /* offset */,
 469                          pre_val /* pre_val */,
 470                          tmp /* tmp */,
 471                          true /* preserve_o_regs */);
 472 
 473     if (saved) {
 474       __ restore();
 475     }
 476   }
 477 }
 478 
 479 #undef __
 480 
 481 void G1BarrierSetAssembler::barrier_stubs_init() {
 482   if (dirty_card_log_enqueue == 0) {
 483     G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
 484     CardTable *ct = bs->card_table();
 485     generate_dirty_card_log_enqueue(ct->byte_map_base());
 486     assert(dirty_card_log_enqueue != 0, "postcondition.");
 487   }
 488   if (satb_log_enqueue_with_frame == 0) {
 489     generate_satb_log_enqueue(true);
 490     assert(satb_log_enqueue_with_frame != 0, "postcondition.");
 491   }
 492   if (satb_log_enqueue_frameless == 0) {
 493     generate_satb_log_enqueue(false);
 494     assert(satb_log_enqueue_frameless != 0, "postcondition.");
 495   }
 496 }