1 /*
   2  * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/codeBuffer.hpp"
  28 #include "asm/macroAssembler.inline.hpp"
  29 #include "compiler/disassembler.hpp"
  30 #include "gc/shared/cardTable.hpp"
  31 #include "gc/shared/collectedHeap.inline.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "gc/shared/cardTableBarrierSet.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "oops/compressedOops.inline.hpp"
  37 #include "oops/klass.inline.hpp"
  38 #include "opto/compile.hpp"
  39 #include "opto/intrinsicnode.hpp"
  40 #include "opto/matcher.hpp"
  41 #include "prims/methodHandles.hpp"
  42 #include "registerSaver_s390.hpp"
  43 #include "runtime/biasedLocking.hpp"
  44 #include "runtime/icache.hpp"
  45 #include "runtime/interfaceSupport.inline.hpp"
  46 #include "runtime/objectMonitor.hpp"
  47 #include "runtime/os.hpp"
  48 #include "runtime/safepoint.hpp"
  49 #include "runtime/safepointMechanism.hpp"
  50 #include "runtime/sharedRuntime.hpp"
  51 #include "runtime/stubRoutines.hpp"
  52 #include "utilities/events.hpp"
  53 #include "utilities/macros.hpp"
  54 #if INCLUDE_ALL_GCS
  55 #include "gc/g1/g1BarrierSet.hpp"
  56 #include "gc/g1/g1CardTable.hpp"
  57 #include "gc/g1/g1CollectedHeap.inline.hpp"
  58 #include "gc/g1/heapRegion.hpp"
  59 #endif
  60 
  61 #include <ucontext.h>
  62 
  63 #define BLOCK_COMMENT(str) block_comment(str)
  64 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  65 
  66 // Move 32-bit register if destination and source are different.
  67 void MacroAssembler::lr_if_needed(Register rd, Register rs) {
  68   if (rs != rd) { z_lr(rd, rs); }
  69 }
  70 
  71 // Move register if destination and source are different.
  72 void MacroAssembler::lgr_if_needed(Register rd, Register rs) {
  73   if (rs != rd) { z_lgr(rd, rs); }
  74 }
  75 
  76 // Zero-extend 32-bit register into 64-bit register if destination and source are different.
  77 void MacroAssembler::llgfr_if_needed(Register rd, Register rs) {
  78   if (rs != rd) { z_llgfr(rd, rs); }
  79 }
  80 
  81 // Move float register if destination and source are different.
  82 void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) {
  83   if (rs != rd) { z_ldr(rd, rs); }
  84 }
  85 
  86 // Move integer register if destination and source are different.
  87 // It is assumed that shorter-than-int types are already
  88 // appropriately sign-extended.
  89 void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src,
  90                                         BasicType src_type) {
  91   assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types");
  92   assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types");
  93 
  94   if (dst_type == src_type) {
  95     lgr_if_needed(dst, src); // Just move all 64 bits.
  96     return;
  97   }
  98 
  99   switch (dst_type) {
 100     // Do not support these types for now.
 101     //  case T_BOOLEAN:
 102     case T_BYTE:  // signed byte
 103       switch (src_type) {
 104         case T_INT:
 105           z_lgbr(dst, src);
 106           break;
 107         default:
 108           ShouldNotReachHere();
 109       }
 110       return;
 111 
 112     case T_CHAR:
 113     case T_SHORT:
 114       switch (src_type) {
 115         case T_INT:
 116           if (dst_type == T_CHAR) {
 117             z_llghr(dst, src);
 118           } else {
 119             z_lghr(dst, src);
 120           }
 121           break;
 122         default:
 123           ShouldNotReachHere();
 124       }
 125       return;
 126 
 127     case T_INT:
 128       switch (src_type) {
 129         case T_BOOLEAN:
 130         case T_BYTE:
 131         case T_CHAR:
 132         case T_SHORT:
 133         case T_INT:
 134         case T_LONG:
 135         case T_OBJECT:
 136         case T_ARRAY:
 137         case T_VOID:
 138         case T_ADDRESS:
 139           lr_if_needed(dst, src);
 140           // llgfr_if_needed(dst, src);  // zero-extend (in case we need to find a bug).
 141           return;
 142 
 143         default:
 144           assert(false, "non-integer src type");
 145           return;
 146       }
 147     case T_LONG:
 148       switch (src_type) {
 149         case T_BOOLEAN:
 150         case T_BYTE:
 151         case T_CHAR:
 152         case T_SHORT:
 153         case T_INT:
 154           z_lgfr(dst, src); // sign extension
 155           return;
 156 
 157         case T_LONG:
 158         case T_OBJECT:
 159         case T_ARRAY:
 160         case T_VOID:
 161         case T_ADDRESS:
 162           lgr_if_needed(dst, src);
 163           return;
 164 
 165         default:
 166           assert(false, "non-integer src type");
 167           return;
 168       }
 169       return;
 170     case T_OBJECT:
 171     case T_ARRAY:
 172     case T_VOID:
 173     case T_ADDRESS:
 174       switch (src_type) {
 175         // These types don't make sense to be converted to pointers:
 176         //      case T_BOOLEAN:
 177         //      case T_BYTE:
 178         //      case T_CHAR:
 179         //      case T_SHORT:
 180 
 181         case T_INT:
 182           z_llgfr(dst, src); // zero extension
 183           return;
 184 
 185         case T_LONG:
 186         case T_OBJECT:
 187         case T_ARRAY:
 188         case T_VOID:
 189         case T_ADDRESS:
 190           lgr_if_needed(dst, src);
 191           return;
 192 
 193         default:
 194           assert(false, "non-integer src type");
 195           return;
 196       }
 197       return;
 198     default:
 199       assert(false, "non-integer dst type");
 200       return;
 201   }
 202 }
 203 
 204 // Move float register if destination and source are different.
 205 void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type,
 206                                          FloatRegister src, BasicType src_type) {
 207   assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types");
 208   assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types");
 209   if (dst_type == src_type) {
 210     ldr_if_needed(dst, src); // Just move all 64 bits.
 211   } else {
 212     switch (dst_type) {
 213       case T_FLOAT:
 214         assert(src_type == T_DOUBLE, "invalid float type combination");
 215         z_ledbr(dst, src);
 216         return;
 217       case T_DOUBLE:
 218         assert(src_type == T_FLOAT, "invalid float type combination");
 219         z_ldebr(dst, src);
 220         return;
 221       default:
 222         assert(false, "non-float dst type");
 223         return;
 224     }
 225   }
 226 }
 227 
 228 // Optimized emitter for reg to mem operations.
 229 // Uses modern instructions if running on modern hardware, classic instructions
 230 // otherwise. Prefers (usually shorter) classic instructions if applicable.
 231 // Data register (reg) cannot be used as work register.
 232 //
 233 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
 234 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
 235 void MacroAssembler::freg2mem_opt(FloatRegister reg,
 236                                   int64_t       disp,
 237                                   Register      index,
 238                                   Register      base,
 239                                   void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
 240                                   void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
 241                                   Register      scratch) {
 242   index = (index == noreg) ? Z_R0 : index;
 243   if (Displacement::is_shortDisp(disp)) {
 244     (this->*classic)(reg, disp, index, base);
 245   } else {
 246     if (Displacement::is_validDisp(disp)) {
 247       (this->*modern)(reg, disp, index, base);
 248     } else {
 249       if (scratch != Z_R0 && scratch != Z_R1) {
 250         (this->*modern)(reg, disp, index, base);      // Will fail with disp out of range.
 251       } else {
 252         if (scratch != Z_R0) {   // scratch == Z_R1
 253           if ((scratch == index) || (index == base)) {
 254             (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
 255           } else {
 256             add2reg(scratch, disp, base);
 257             (this->*classic)(reg, 0, index, scratch);
 258             if (base == scratch) {
 259               add2reg(base, -disp);  // Restore base.
 260             }
 261           }
 262         } else {   // scratch == Z_R0
 263           z_lgr(scratch, base);
 264           add2reg(base, disp);
 265           (this->*classic)(reg, 0, index, base);
 266           z_lgr(base, scratch);      // Restore base.
 267         }
 268       }
 269     }
 270   }
 271 }
 272 
 273 void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) {
 274   if (is_double) {
 275     freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std));
 276   } else {
 277     freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste));
 278   }
 279 }
 280 
 281 // Optimized emitter for mem to reg operations.
 282 // Uses modern instructions if running on modern hardware, classic instructions
 283 // otherwise. Prefers (usually shorter) classic instructions if applicable.
 284 // data register (reg) cannot be used as work register.
 285 //
 286 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
 287 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
 288 void MacroAssembler::mem2freg_opt(FloatRegister reg,
 289                                   int64_t       disp,
 290                                   Register      index,
 291                                   Register      base,
 292                                   void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
 293                                   void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
 294                                   Register      scratch) {
 295   index = (index == noreg) ? Z_R0 : index;
 296   if (Displacement::is_shortDisp(disp)) {
 297     (this->*classic)(reg, disp, index, base);
 298   } else {
 299     if (Displacement::is_validDisp(disp)) {
 300       (this->*modern)(reg, disp, index, base);
 301     } else {
 302       if (scratch != Z_R0 && scratch != Z_R1) {
 303         (this->*modern)(reg, disp, index, base);      // Will fail with disp out of range.
 304       } else {
 305         if (scratch != Z_R0) {   // scratch == Z_R1
 306           if ((scratch == index) || (index == base)) {
 307             (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
 308           } else {
 309             add2reg(scratch, disp, base);
 310             (this->*classic)(reg, 0, index, scratch);
 311             if (base == scratch) {
 312               add2reg(base, -disp);  // Restore base.
 313             }
 314           }
 315         } else {   // scratch == Z_R0
 316           z_lgr(scratch, base);
 317           add2reg(base, disp);
 318           (this->*classic)(reg, 0, index, base);
 319           z_lgr(base, scratch);      // Restore base.
 320         }
 321       }
 322     }
 323   }
 324 }
 325 
 326 void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) {
 327   if (is_double) {
 328     mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld));
 329   } else {
 330     mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le));
 331   }
 332 }
 333 
 334 // Optimized emitter for reg to mem operations.
 335 // Uses modern instructions if running on modern hardware, classic instructions
 336 // otherwise. Prefers (usually shorter) classic instructions if applicable.
 337 // Data register (reg) cannot be used as work register.
 338 //
 339 // Don't rely on register locking, instead pass a scratch register
 340 // (Z_R0 by default)
 341 // CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs!
 342 void MacroAssembler::reg2mem_opt(Register reg,
 343                                  int64_t  disp,
 344                                  Register index,
 345                                  Register base,
 346                                  void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
 347                                  void (MacroAssembler::*classic)(Register, int64_t, Register, Register),
 348                                  Register scratch) {
 349   index = (index == noreg) ? Z_R0 : index;
 350   if (Displacement::is_shortDisp(disp)) {
 351     (this->*classic)(reg, disp, index, base);
 352   } else {
 353     if (Displacement::is_validDisp(disp)) {
 354       (this->*modern)(reg, disp, index, base);
 355     } else {
 356       if (scratch != Z_R0 && scratch != Z_R1) {
 357         (this->*modern)(reg, disp, index, base);      // Will fail with disp out of range.
 358       } else {
 359         if (scratch != Z_R0) {   // scratch == Z_R1
 360           if ((scratch == index) || (index == base)) {
 361             (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
 362           } else {
 363             add2reg(scratch, disp, base);
 364             (this->*classic)(reg, 0, index, scratch);
 365             if (base == scratch) {
 366               add2reg(base, -disp);  // Restore base.
 367             }
 368           }
 369         } else {   // scratch == Z_R0
 370           if ((scratch == reg) || (scratch == base) || (reg == base)) {
 371             (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
 372           } else {
 373             z_lgr(scratch, base);
 374             add2reg(base, disp);
 375             (this->*classic)(reg, 0, index, base);
 376             z_lgr(base, scratch);    // Restore base.
 377           }
 378         }
 379       }
 380     }
 381   }
 382 }
 383 
 384 int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) {
 385   int store_offset = offset();
 386   if (is_double) {
 387     reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg));
 388   } else {
 389     reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st));
 390   }
 391   return store_offset;
 392 }
 393 
 394 // Optimized emitter for mem to reg operations.
 395 // Uses modern instructions if running on modern hardware, classic instructions
 396 // otherwise. Prefers (usually shorter) classic instructions if applicable.
 397 // Data register (reg) will be used as work register where possible.
 398 void MacroAssembler::mem2reg_opt(Register reg,
 399                                  int64_t  disp,
 400                                  Register index,
 401                                  Register base,
 402                                  void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
 403                                  void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) {
 404   index = (index == noreg) ? Z_R0 : index;
 405   if (Displacement::is_shortDisp(disp)) {
 406     (this->*classic)(reg, disp, index, base);
 407   } else {
 408     if (Displacement::is_validDisp(disp)) {
 409       (this->*modern)(reg, disp, index, base);
 410     } else {
 411       if ((reg == index) && (reg == base)) {
 412         z_sllg(reg, reg, 1);
 413         add2reg(reg, disp);
 414         (this->*classic)(reg, 0, noreg, reg);
 415       } else if ((reg == index) && (reg != Z_R0)) {
 416         add2reg(reg, disp);
 417         (this->*classic)(reg, 0, reg, base);
 418       } else if (reg == base) {
 419         add2reg(reg, disp);
 420         (this->*classic)(reg, 0, index, reg);
 421       } else if (reg != Z_R0) {
 422         add2reg(reg, disp, base);
 423         (this->*classic)(reg, 0, index, reg);
 424       } else { // reg == Z_R0 && reg != base here
 425         add2reg(base, disp);
 426         (this->*classic)(reg, 0, index, base);
 427         add2reg(base, -disp);
 428       }
 429     }
 430   }
 431 }
 432 
 433 void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) {
 434   if (is_double) {
 435     z_lg(reg, a);
 436   } else {
 437     mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l));
 438   }
 439 }
 440 
 441 void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) {
 442   mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf));
 443 }
 444 
 445 void MacroAssembler::and_imm(Register r, long mask,
 446                              Register tmp /* = Z_R0 */,
 447                              bool wide    /* = false */) {
 448   assert(wide || Immediate::is_simm32(mask), "mask value too large");
 449 
 450   if (!wide) {
 451     z_nilf(r, mask);
 452     return;
 453   }
 454 
 455   assert(r != tmp, " need a different temporary register !");
 456   load_const_optimized(tmp, mask);
 457   z_ngr(r, tmp);
 458 }
 459 
 460 // Calculate the 1's complement.
 461 // Note: The condition code is neither preserved nor correctly set by this code!!!
 462 // Note: (wide == false) does not protect the high order half of the target register
 463 //       from alteration. It only serves as optimization hint for 32-bit results.
 464 void MacroAssembler::not_(Register r1, Register r2, bool wide) {
 465 
 466   if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place.
 467     z_xilf(r1, -1);
 468     if (wide) {
 469       z_xihf(r1, -1);
 470     }
 471   } else { // Distinct src and dst registers.
 472     if (VM_Version::has_DistinctOpnds()) {
 473       load_const_optimized(r1, -1);
 474       z_xgrk(r1, r2, r1);
 475     } else {
 476       if (wide) {
 477         z_lgr(r1, r2);
 478         z_xilf(r1, -1);
 479         z_xihf(r1, -1);
 480       } else {
 481         z_lr(r1, r2);
 482         z_xilf(r1, -1);
 483       }
 484     }
 485   }
 486 }
 487 
 488 unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) {
 489   assert(lBitPos >=  0,      "zero is  leftmost bit position");
 490   assert(rBitPos <= 63,      "63   is rightmost bit position");
 491   assert(lBitPos <= rBitPos, "inverted selection interval");
 492   return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1));
 493 }
 494 
 495 // Helper function for the "Rotate_then_<logicalOP>" emitters.
 496 // Rotate src, then mask register contents such that only bits in range survive.
 497 // For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range.
 498 // For oneBits == true,  all bits not in range are set to 1. Useful for preserving all bits outside range.
 499 // The caller must ensure that the selected range only contains bits with defined value.
 500 void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos,
 501                                       int nRotate, bool src32bit, bool dst32bit, bool oneBits) {
 502   assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination");
 503   bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G).
 504   bool srl4rll = (nRotate <  0) && (-nRotate <= lBitPos);     // Substitute SRL(G) for RLL(G).
 505   //  Pre-determine which parts of dst will be zero after shift/rotate.
 506   bool llZero  =  sll4rll && (nRotate >= 16);
 507   bool lhZero  = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48));
 508   bool lfZero  = llZero && lhZero;
 509   bool hlZero  = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32));
 510   bool hhZero  =                                 (srl4rll && (nRotate <= -16));
 511   bool hfZero  = hlZero && hhZero;
 512 
 513   // rotate then mask src operand.
 514   // if oneBits == true,  all bits outside selected range are 1s.
 515   // if oneBits == false, all bits outside selected range are 0s.
 516   if (src32bit) {   // There might be garbage in the upper 32 bits which will get masked away.
 517     if (dst32bit) {
 518       z_rll(dst, src, nRotate);   // Copy and rotate, upper half of reg remains undisturbed.
 519     } else {
 520       if      (sll4rll) { z_sllg(dst, src,  nRotate); }
 521       else if (srl4rll) { z_srlg(dst, src, -nRotate); }
 522       else              { z_rllg(dst, src,  nRotate); }
 523     }
 524   } else {
 525     if      (sll4rll) { z_sllg(dst, src,  nRotate); }
 526     else if (srl4rll) { z_srlg(dst, src, -nRotate); }
 527     else              { z_rllg(dst, src,  nRotate); }
 528   }
 529 
 530   unsigned long  range_mask    = create_mask(lBitPos, rBitPos);
 531   unsigned int   range_mask_h  = (unsigned int)(range_mask >> 32);
 532   unsigned int   range_mask_l  = (unsigned int)range_mask;
 533   unsigned short range_mask_hh = (unsigned short)(range_mask >> 48);
 534   unsigned short range_mask_hl = (unsigned short)(range_mask >> 32);
 535   unsigned short range_mask_lh = (unsigned short)(range_mask >> 16);
 536   unsigned short range_mask_ll = (unsigned short)range_mask;
 537   // Works for z9 and newer H/W.
 538   if (oneBits) {
 539     if ((~range_mask_l) != 0)                { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s.
 540     if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); }
 541   } else {
 542     // All bits outside range become 0s
 543     if (((~range_mask_l) != 0) &&              !lfZero) {
 544       z_nilf(dst, range_mask_l);
 545     }
 546     if (((~range_mask_h) != 0) && !dst32bit && !hfZero) {
 547       z_nihf(dst, range_mask_h);
 548     }
 549   }
 550 }
 551 
 552 // Rotate src, then insert selected range from rotated src into dst.
 553 // Clear dst before, if requested.
 554 void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos,
 555                                         int nRotate, bool clear_dst) {
 556   // This version does not depend on src being zero-extended int2long.
 557   nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
 558   z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest.
 559 }
 560 
 561 // Rotate src, then and selected range from rotated src into dst.
 562 // Set condition code only if so requested. Otherwise it is unpredictable.
 563 // See performance note in macroAssembler_s390.hpp for important information.
 564 void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos,
 565                                      int nRotate, bool test_only) {
 566   guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
 567   // This version does not depend on src being zero-extended int2long.
 568   nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
 569   z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
 570 }
 571 
 572 // Rotate src, then or selected range from rotated src into dst.
 573 // Set condition code only if so requested. Otherwise it is unpredictable.
 574 // See performance note in macroAssembler_s390.hpp for important information.
 575 void MacroAssembler::rotate_then_or(Register dst, Register src,  int  lBitPos,  int  rBitPos,
 576                                     int nRotate, bool test_only) {
 577   guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
 578   // This version does not depend on src being zero-extended int2long.
 579   nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
 580   z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
 581 }
 582 
 583 // Rotate src, then xor selected range from rotated src into dst.
 584 // Set condition code only if so requested. Otherwise it is unpredictable.
 585 // See performance note in macroAssembler_s390.hpp for important information.
 586 void MacroAssembler::rotate_then_xor(Register dst, Register src,  int  lBitPos,  int  rBitPos,
 587                                      int nRotate, bool test_only) {
 588   guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
 589     // This version does not depend on src being zero-extended int2long.
 590   nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
 591   z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
 592 }
 593 
 594 void MacroAssembler::add64(Register r1, RegisterOrConstant inc) {
 595   if (inc.is_register()) {
 596     z_agr(r1, inc.as_register());
 597   } else { // constant
 598     intptr_t imm = inc.as_constant();
 599     add2reg(r1, imm);
 600   }
 601 }
 602 // Helper function to multiply the 64bit contents of a register by a 16bit constant.
 603 // The optimization tries to avoid the mghi instruction, since it uses the FPU for
 604 // calculation and is thus rather slow.
 605 //
 606 // There is no handling for special cases, e.g. cval==0 or cval==1.
 607 //
 608 // Returns len of generated code block.
 609 unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) {
 610   int block_start = offset();
 611 
 612   bool sign_flip = cval < 0;
 613   cval = sign_flip ? -cval : cval;
 614 
 615   BLOCK_COMMENT("Reg64*Con16 {");
 616 
 617   int bit1 = cval & -cval;
 618   if (bit1 == cval) {
 619     z_sllg(rval, rval, exact_log2(bit1));
 620     if (sign_flip) { z_lcgr(rval, rval); }
 621   } else {
 622     int bit2 = (cval-bit1) & -(cval-bit1);
 623     if ((bit1+bit2) == cval) {
 624       z_sllg(work, rval, exact_log2(bit1));
 625       z_sllg(rval, rval, exact_log2(bit2));
 626       z_agr(rval, work);
 627       if (sign_flip) { z_lcgr(rval, rval); }
 628     } else {
 629       if (sign_flip) { z_mghi(rval, -cval); }
 630       else           { z_mghi(rval,  cval); }
 631     }
 632   }
 633   BLOCK_COMMENT("} Reg64*Con16");
 634 
 635   int block_end = offset();
 636   return block_end - block_start;
 637 }
 638 
 639 // Generic operation r1 := r2 + imm.
 640 //
 641 // Should produce the best code for each supported CPU version.
 642 // r2 == noreg yields r1 := r1 + imm
 643 // imm == 0 emits either no instruction or r1 := r2 !
 644 // NOTES: 1) Don't use this function where fixed sized
 645 //           instruction sequences are required!!!
 646 //        2) Don't use this function if condition code
 647 //           setting is required!
 648 //        3) Despite being declared as int64_t, the parameter imm
 649 //           must be a simm_32 value (= signed 32-bit integer).
 650 void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) {
 651   assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong");
 652 
 653   if (r2 == noreg) { r2 = r1; }
 654 
 655   // Handle special case imm == 0.
 656   if (imm == 0) {
 657     lgr_if_needed(r1, r2);
 658     // Nothing else to do.
 659     return;
 660   }
 661 
 662   if (!PreferLAoverADD || (r2 == Z_R0)) {
 663     bool distinctOpnds = VM_Version::has_DistinctOpnds();
 664 
 665     // Can we encode imm in 16 bits signed?
 666     if (Immediate::is_simm16(imm)) {
 667       if (r1 == r2) {
 668         z_aghi(r1, imm);
 669         return;
 670       }
 671       if (distinctOpnds) {
 672         z_aghik(r1, r2, imm);
 673         return;
 674       }
 675       z_lgr(r1, r2);
 676       z_aghi(r1, imm);
 677       return;
 678     }
 679   } else {
 680     // Can we encode imm in 12 bits unsigned?
 681     if (Displacement::is_shortDisp(imm)) {
 682       z_la(r1, imm, r2);
 683       return;
 684     }
 685     // Can we encode imm in 20 bits signed?
 686     if (Displacement::is_validDisp(imm)) {
 687       // Always use LAY instruction, so we don't need the tmp register.
 688       z_lay(r1, imm, r2);
 689       return;
 690     }
 691 
 692   }
 693 
 694   // Can handle it (all possible values) with long immediates.
 695   lgr_if_needed(r1, r2);
 696   z_agfi(r1, imm);
 697 }
 698 
 699 // Generic operation r := b + x + d
 700 //
 701 // Addition of several operands with address generation semantics - sort of:
 702 //  - no restriction on the registers. Any register will do for any operand.
 703 //  - x == noreg: operand will be disregarded.
 704 //  - b == noreg: will use (contents of) result reg as operand (r := r + d).
 705 //  - x == Z_R0:  just disregard
 706 //  - b == Z_R0:  use as operand. This is not address generation semantics!!!
 707 //
 708 // The same restrictions as on add2reg() are valid!!!
 709 void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) {
 710   assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong");
 711 
 712   if (x == noreg) { x = Z_R0; }
 713   if (b == noreg) { b = r; }
 714 
 715   // Handle special case x == R0.
 716   if (x == Z_R0) {
 717     // Can simply add the immediate value to the base register.
 718     add2reg(r, d, b);
 719     return;
 720   }
 721 
 722   if (!PreferLAoverADD || (b == Z_R0)) {
 723     bool distinctOpnds = VM_Version::has_DistinctOpnds();
 724     // Handle special case d == 0.
 725     if (d == 0) {
 726       if (b == x)        { z_sllg(r, b, 1); return; }
 727       if (r == x)        { z_agr(r, b);     return; }
 728       if (r == b)        { z_agr(r, x);     return; }
 729       if (distinctOpnds) { z_agrk(r, x, b); return; }
 730       z_lgr(r, b);
 731       z_agr(r, x);
 732     } else {
 733       if (x == b)             { z_sllg(r, x, 1); }
 734       else if (r == x)        { z_agr(r, b); }
 735       else if (r == b)        { z_agr(r, x); }
 736       else if (distinctOpnds) { z_agrk(r, x, b); }
 737       else {
 738         z_lgr(r, b);
 739         z_agr(r, x);
 740       }
 741       add2reg(r, d);
 742     }
 743   } else {
 744     // Can we encode imm in 12 bits unsigned?
 745     if (Displacement::is_shortDisp(d)) {
 746       z_la(r, d, x, b);
 747       return;
 748     }
 749     // Can we encode imm in 20 bits signed?
 750     if (Displacement::is_validDisp(d)) {
 751       z_lay(r, d, x, b);
 752       return;
 753     }
 754     z_la(r, 0, x, b);
 755     add2reg(r, d);
 756   }
 757 }
 758 
 759 // Generic emitter (32bit) for direct memory increment.
 760 // For optimal code, do not specify Z_R0 as temp register.
 761 void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) {
 762   if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
 763     z_asi(a, imm);
 764   } else {
 765     z_lgf(tmp, a);
 766     add2reg(tmp, imm);
 767     z_st(tmp, a);
 768   }
 769 }
 770 
 771 void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) {
 772   if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
 773     z_agsi(a, imm);
 774   } else {
 775     z_lg(tmp, a);
 776     add2reg(tmp, imm);
 777     z_stg(tmp, a);
 778   }
 779 }
 780 
 781 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
 782   switch (size_in_bytes) {
 783     case  8: z_lg(dst, src); break;
 784     case  4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break;
 785     case  2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break;
 786     case  1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break;
 787     default: ShouldNotReachHere();
 788   }
 789 }
 790 
 791 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
 792   switch (size_in_bytes) {
 793     case  8: z_stg(src, dst); break;
 794     case  4: z_st(src, dst); break;
 795     case  2: z_sth(src, dst); break;
 796     case  1: z_stc(src, dst); break;
 797     default: ShouldNotReachHere();
 798   }
 799 }
 800 
 801 // Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and
 802 // a high-order summand in register tmp.
 803 //
 804 // return value: <  0: No split required, si20 actually has property uimm12.
 805 //               >= 0: Split performed. Use return value as uimm12 displacement and
 806 //                     tmp as index register.
 807 int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) {
 808   assert(Immediate::is_simm20(si20_offset), "sanity");
 809   int lg_off = (int)si20_offset &  0x0fff; // Punch out low-order 12 bits, always positive.
 810   int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero.
 811   assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) ||
 812          !Displacement::is_shortDisp(si20_offset), "unexpected offset values");
 813   assert((lg_off+ll_off) == si20_offset, "offset splitup error");
 814 
 815   Register work = accumulate? Z_R0 : tmp;
 816 
 817   if (fixed_codelen) {          // Len of code = 10 = 4 + 6.
 818     z_lghi(work, ll_off>>12);   // Implicit sign extension.
 819     z_slag(work, work, 12);
 820   } else {                      // Len of code = 0..10.
 821     if (ll_off == 0) { return -1; }
 822     // ll_off has 8 significant bits (at most) plus sign.
 823     if ((ll_off & 0x0000f000) == 0) {    // Non-zero bits only in upper halfbyte.
 824       z_llilh(work, ll_off >> 16);
 825       if (ll_off < 0) {                  // Sign-extension required.
 826         z_lgfr(work, work);
 827       }
 828     } else {
 829       if ((ll_off & 0x000f0000) == 0) {  // Non-zero bits only in lower halfbyte.
 830         z_llill(work, ll_off);
 831       } else {                           // Non-zero bits in both halfbytes.
 832         z_lghi(work, ll_off>>12);        // Implicit sign extension.
 833         z_slag(work, work, 12);
 834       }
 835     }
 836   }
 837   if (accumulate) { z_algr(tmp, work); } // len of code += 4
 838   return lg_off;
 839 }
 840 
 841 void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
 842   if (Displacement::is_validDisp(si20)) {
 843     z_ley(t, si20, a);
 844   } else {
 845     // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset
 846     // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
 847     // pool loads).
 848     bool accumulate    = true;
 849     bool fixed_codelen = true;
 850     Register work;
 851 
 852     if (fixed_codelen) {
 853       z_lgr(tmp, a);  // Lgr_if_needed not applicable due to fixed_codelen.
 854     } else {
 855       accumulate = (a == tmp);
 856     }
 857     work = tmp;
 858 
 859     int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
 860     if (disp12 < 0) {
 861       z_le(t, si20, work);
 862     } else {
 863       if (accumulate) {
 864         z_le(t, disp12, work);
 865       } else {
 866         z_le(t, disp12, work, a);
 867       }
 868     }
 869   }
 870 }
 871 
 872 void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
 873   if (Displacement::is_validDisp(si20)) {
 874     z_ldy(t, si20, a);
 875   } else {
 876     // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset
 877     // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
 878     // pool loads).
 879     bool accumulate    = true;
 880     bool fixed_codelen = true;
 881     Register work;
 882 
 883     if (fixed_codelen) {
 884       z_lgr(tmp, a);  // Lgr_if_needed not applicable due to fixed_codelen.
 885     } else {
 886       accumulate = (a == tmp);
 887     }
 888     work = tmp;
 889 
 890     int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
 891     if (disp12 < 0) {
 892       z_ld(t, si20, work);
 893     } else {
 894       if (accumulate) {
 895         z_ld(t, disp12, work);
 896       } else {
 897         z_ld(t, disp12, work, a);
 898       }
 899     }
 900   }
 901 }
 902 
 903 // PCrelative TOC access.
 904 // Returns distance (in bytes) from current position to start of consts section.
 905 // Returns 0 (zero) if no consts section exists or if it has size zero.
 906 long MacroAssembler::toc_distance() {
 907   CodeSection* cs = code()->consts();
 908   return (long)((cs != NULL) ? cs->start()-pc() : 0);
 909 }
 910 
 911 // Implementation on x86/sparc assumes that constant and instruction section are
 912 // adjacent, but this doesn't hold. Two special situations may occur, that we must
 913 // be able to handle:
 914 //   1. const section may be located apart from the inst section.
 915 //   2. const section may be empty
 916 // In both cases, we use the const section's start address to compute the "TOC",
 917 // this seems to occur only temporarily; in the final step we always seem to end up
 918 // with the pc-relatice variant.
 919 //
 920 // PC-relative offset could be +/-2**32 -> use long for disp
 921 // Furthermore: makes no sense to have special code for
 922 // adjacent const and inst sections.
 923 void MacroAssembler::load_toc(Register Rtoc) {
 924   // Simply use distance from start of const section (should be patched in the end).
 925   long disp = toc_distance();
 926 
 927   RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp);
 928   relocate(rspec);
 929   z_larl(Rtoc, RelAddr::pcrel_off32(disp));  // Offset is in halfwords.
 930 }
 931 
 932 // PCrelative TOC access.
 933 // Load from anywhere pcrelative (with relocation of load instr)
 934 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) {
 935   address          pc             = this->pc();
 936   ptrdiff_t        total_distance = dataLocation - pc;
 937   RelocationHolder rspec          = internal_word_Relocation::spec(dataLocation);
 938 
 939   assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
 940   assert(total_distance != 0, "sanity");
 941 
 942   // Some extra safety net.
 943   if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
 944     guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
 945   }
 946 
 947   (this)->relocate(rspec, relocInfo::pcrel_addr_format);
 948   z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
 949 }
 950 
 951 
 952 // PCrelative TOC access.
 953 // Load from anywhere pcrelative (with relocation of load instr)
 954 // loaded addr has to be relocated when added to constant pool.
 955 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) {
 956   address          pc             = this->pc();
 957   ptrdiff_t        total_distance = addrLocation - pc;
 958   RelocationHolder rspec          = internal_word_Relocation::spec(addrLocation);
 959 
 960   assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
 961 
 962   // Some extra safety net.
 963   if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
 964     guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
 965   }
 966 
 967   (this)->relocate(rspec, relocInfo::pcrel_addr_format);
 968   z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
 969 }
 970 
 971 // Generic operation: load a value from memory and test.
 972 // CondCode indicates the sign (<0, ==0, >0) of the loaded value.
 973 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) {
 974   z_lb(dst, a);
 975   z_ltr(dst, dst);
 976 }
 977 
 978 void MacroAssembler::load_and_test_short(Register dst, const Address &a) {
 979   int64_t disp = a.disp20();
 980   if (Displacement::is_shortDisp(disp)) {
 981     z_lh(dst, a);
 982   } else if (Displacement::is_longDisp(disp)) {
 983     z_lhy(dst, a);
 984   } else {
 985     guarantee(false, "displacement out of range");
 986   }
 987   z_ltr(dst, dst);
 988 }
 989 
 990 void MacroAssembler::load_and_test_int(Register dst, const Address &a) {
 991   z_lt(dst, a);
 992 }
 993 
 994 void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) {
 995   z_ltgf(dst, a);
 996 }
 997 
 998 void MacroAssembler::load_and_test_long(Register dst, const Address &a) {
 999   z_ltg(dst, a);
1000 }
1001 
1002 // Test a bit in memory.
1003 void MacroAssembler::testbit(const Address &a, unsigned int bit) {
1004   assert(a.index() == noreg, "no index reg allowed in testbit");
1005   if (bit <= 7) {
1006     z_tm(a.disp() + 3, a.base(), 1 << bit);
1007   } else if (bit <= 15) {
1008     z_tm(a.disp() + 2, a.base(), 1 << (bit - 8));
1009   } else if (bit <= 23) {
1010     z_tm(a.disp() + 1, a.base(), 1 << (bit - 16));
1011   } else if (bit <= 31) {
1012     z_tm(a.disp() + 0, a.base(), 1 << (bit - 24));
1013   } else {
1014     ShouldNotReachHere();
1015   }
1016 }
1017 
1018 // Test a bit in a register. Result is reflected in CC.
1019 void MacroAssembler::testbit(Register r, unsigned int bitPos) {
1020   if (bitPos < 16) {
1021     z_tmll(r, 1U<<bitPos);
1022   } else if (bitPos < 32) {
1023     z_tmlh(r, 1U<<(bitPos-16));
1024   } else if (bitPos < 48) {
1025     z_tmhl(r, 1U<<(bitPos-32));
1026   } else if (bitPos < 64) {
1027     z_tmhh(r, 1U<<(bitPos-48));
1028   } else {
1029     ShouldNotReachHere();
1030   }
1031 }
1032 
1033 void MacroAssembler::prefetch_read(Address a) {
1034   z_pfd(1, a.disp20(), a.indexOrR0(), a.base());
1035 }
1036 void MacroAssembler::prefetch_update(Address a) {
1037   z_pfd(2, a.disp20(), a.indexOrR0(), a.base());
1038 }
1039 
1040 // Clear a register, i.e. load const zero into reg.
1041 // Return len (in bytes) of generated instruction(s).
1042 // whole_reg: Clear 64 bits if true, 32 bits otherwise.
1043 // set_cc:    Use instruction that sets the condition code, if true.
1044 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) {
1045   unsigned int start_off = offset();
1046   if (whole_reg) {
1047     set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0);
1048   } else {  // Only 32bit register.
1049     set_cc ? z_xr(r, r) : z_lhi(r, 0);
1050   }
1051   return offset() - start_off;
1052 }
1053 
1054 #ifdef ASSERT
1055 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) {
1056   switch (pattern_len) {
1057     case 1:
1058       pattern = (pattern & 0x000000ff)  | ((pattern & 0x000000ff)<<8);
1059     case 2:
1060       pattern = (pattern & 0x0000ffff)  | ((pattern & 0x0000ffff)<<16);
1061     case 4:
1062       pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32);
1063     case 8:
1064       return load_const_optimized_rtn_len(r, pattern, true);
1065       break;
1066     default:
1067       guarantee(false, "preset_reg: bad len");
1068   }
1069   return 0;
1070 }
1071 #endif
1072 
1073 // addr: Address descriptor of memory to clear index register will not be used !
1074 // size: Number of bytes to clear.
1075 //    !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!!
1076 //    !!! Use store_const() instead                  !!!
1077 void MacroAssembler::clear_mem(const Address& addr, unsigned size) {
1078   guarantee(size <= 256, "MacroAssembler::clear_mem: size too large");
1079 
1080   if (size == 1) {
1081     z_mvi(addr, 0);
1082     return;
1083   }
1084 
1085   switch (size) {
1086     case 2: z_mvhhi(addr, 0);
1087       return;
1088     case 4: z_mvhi(addr, 0);
1089       return;
1090     case 8: z_mvghi(addr, 0);
1091       return;
1092     default: ; // Fallthru to xc.
1093   }
1094 
1095   z_xc(addr, size, addr);
1096 }
1097 
1098 void MacroAssembler::align(int modulus) {
1099   while (offset() % modulus != 0) z_nop();
1100 }
1101 
1102 // Special version for non-relocateable code if required alignment
1103 // is larger than CodeEntryAlignment.
1104 void MacroAssembler::align_address(int modulus) {
1105   while ((uintptr_t)pc() % modulus != 0) z_nop();
1106 }
1107 
1108 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
1109                                          Register temp_reg,
1110                                          int64_t extra_slot_offset) {
1111   // On Z, we can have index and disp in an Address. So don't call argument_offset,
1112   // which issues an unnecessary add instruction.
1113   int stackElementSize = Interpreter::stackElementSize;
1114   int64_t offset = extra_slot_offset * stackElementSize;
1115   const Register argbase = Z_esp;
1116   if (arg_slot.is_constant()) {
1117     offset += arg_slot.as_constant() * stackElementSize;
1118     return Address(argbase, offset);
1119   }
1120   // else
1121   assert(temp_reg != noreg, "must specify");
1122   assert(temp_reg != Z_ARG1, "base and index are conflicting");
1123   z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3
1124   return Address(argbase, temp_reg, offset);
1125 }
1126 
1127 
1128 //===================================================================
1129 //===   START   C O N S T A N T S   I N   C O D E   S T R E A M   ===
1130 //===================================================================
1131 //===            P A T CH A B L E   C O N S T A N T S             ===
1132 //===================================================================
1133 
1134 
1135 //---------------------------------------------------
1136 //  Load (patchable) constant into register
1137 //---------------------------------------------------
1138 
1139 
1140 // Load absolute address (and try to optimize).
1141 //   Note: This method is usable only for position-fixed code,
1142 //         referring to a position-fixed target location.
1143 //         If not so, relocations and patching must be used.
1144 void MacroAssembler::load_absolute_address(Register d, address addr) {
1145   assert(addr != NULL, "should not happen");
1146   BLOCK_COMMENT("load_absolute_address:");
1147   if (addr == NULL) {
1148     z_larl(d, pc()); // Dummy emit for size calc.
1149     return;
1150   }
1151 
1152   if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) {
1153     z_larl(d, addr);
1154     return;
1155   }
1156 
1157   load_const_optimized(d, (long)addr);
1158 }
1159 
1160 // Load a 64bit constant.
1161 // Patchable code sequence, but not atomically patchable.
1162 // Make sure to keep code size constant -> no value-dependent optimizations.
1163 // Do not kill condition code.
1164 void MacroAssembler::load_const(Register t, long x) {
1165   Assembler::z_iihf(t, (int)(x >> 32));
1166   Assembler::z_iilf(t, (int)(x & 0xffffffff));
1167 }
1168 
1169 // Load a 32bit constant into a 64bit register, sign-extend or zero-extend.
1170 // Patchable code sequence, but not atomically patchable.
1171 // Make sure to keep code size constant -> no value-dependent optimizations.
1172 // Do not kill condition code.
1173 void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) {
1174   if (sign_extend) { Assembler::z_lgfi(t, x); }
1175   else             { Assembler::z_llilf(t, x); }
1176 }
1177 
1178 // Load narrow oop constant, no decompression.
1179 void MacroAssembler::load_narrow_oop(Register t, narrowOop a) {
1180   assert(UseCompressedOops, "must be on to call this method");
1181   load_const_32to64(t, a, false /*sign_extend*/);
1182 }
1183 
1184 // Load narrow klass constant, compression required.
1185 void MacroAssembler::load_narrow_klass(Register t, Klass* k) {
1186   assert(UseCompressedClassPointers, "must be on to call this method");
1187   narrowKlass encoded_k = Klass::encode_klass(k);
1188   load_const_32to64(t, encoded_k, false /*sign_extend*/);
1189 }
1190 
1191 //------------------------------------------------------
1192 //  Compare (patchable) constant with register.
1193 //------------------------------------------------------
1194 
1195 // Compare narrow oop in reg with narrow oop constant, no decompression.
1196 void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) {
1197   assert(UseCompressedOops, "must be on to call this method");
1198 
1199   Assembler::z_clfi(oop1, oop2);
1200 }
1201 
1202 // Compare narrow oop in reg with narrow oop constant, no decompression.
1203 void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) {
1204   assert(UseCompressedClassPointers, "must be on to call this method");
1205   narrowKlass encoded_k = Klass::encode_klass(klass2);
1206 
1207   Assembler::z_clfi(klass1, encoded_k);
1208 }
1209 
1210 //----------------------------------------------------------
1211 //  Check which kind of load_constant we have here.
1212 //----------------------------------------------------------
1213 
1214 // Detection of CPU version dependent load_const sequence.
1215 // The detection is valid only for code sequences generated by load_const,
1216 // not load_const_optimized.
1217 bool MacroAssembler::is_load_const(address a) {
1218   unsigned long inst1, inst2;
1219   unsigned int  len1,  len2;
1220 
1221   len1 = get_instruction(a, &inst1);
1222   len2 = get_instruction(a + len1, &inst2);
1223 
1224   return is_z_iihf(inst1) && is_z_iilf(inst2);
1225 }
1226 
1227 // Detection of CPU version dependent load_const_32to64 sequence.
1228 // Mostly used for narrow oops and narrow Klass pointers.
1229 // The detection is valid only for code sequences generated by load_const_32to64.
1230 bool MacroAssembler::is_load_const_32to64(address pos) {
1231   unsigned long inst1, inst2;
1232   unsigned int len1;
1233 
1234   len1 = get_instruction(pos, &inst1);
1235   return is_z_llilf(inst1);
1236 }
1237 
1238 // Detection of compare_immediate_narrow sequence.
1239 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
1240 bool MacroAssembler::is_compare_immediate32(address pos) {
1241   return is_equal(pos, CLFI_ZOPC, RIL_MASK);
1242 }
1243 
1244 // Detection of compare_immediate_narrow sequence.
1245 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
1246 bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) {
1247   return is_compare_immediate32(pos);
1248   }
1249 
1250 // Detection of compare_immediate_narrow sequence.
1251 // The detection is valid only for code sequences generated by compare_immediate_narrow_klass.
1252 bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) {
1253   return is_compare_immediate32(pos);
1254 }
1255 
1256 //-----------------------------------
1257 //  patch the load_constant
1258 //-----------------------------------
1259 
1260 // CPU-version dependend patching of load_const.
1261 void MacroAssembler::patch_const(address a, long x) {
1262   assert(is_load_const(a), "not a load of a constant");
1263   set_imm32((address)a, (int) ((x >> 32) & 0xffffffff));
1264   set_imm32((address)(a + 6), (int)(x & 0xffffffff));
1265 }
1266 
1267 // Patching the value of CPU version dependent load_const_32to64 sequence.
1268 // The passed ptr MUST be in compressed format!
1269 int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) {
1270   assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)");
1271 
1272   set_imm32(pos, np);
1273   return 6;
1274 }
1275 
1276 // Patching the value of CPU version dependent compare_immediate_narrow sequence.
1277 // The passed ptr MUST be in compressed format!
1278 int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) {
1279   assert(is_compare_immediate32(pos), "not a compressed ptr compare");
1280 
1281   set_imm32(pos, np);
1282   return 6;
1283 }
1284 
1285 // Patching the immediate value of CPU version dependent load_narrow_oop sequence.
1286 // The passed ptr must NOT be in compressed format!
1287 int MacroAssembler::patch_load_narrow_oop(address pos, oop o) {
1288   assert(UseCompressedOops, "Can only patch compressed oops");
1289 
1290   narrowOop no = CompressedOops::encode(o);
1291   return patch_load_const_32to64(pos, no);
1292 }
1293 
1294 // Patching the immediate value of CPU version dependent load_narrow_klass sequence.
1295 // The passed ptr must NOT be in compressed format!
1296 int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) {
1297   assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
1298 
1299   narrowKlass nk = Klass::encode_klass(k);
1300   return patch_load_const_32to64(pos, nk);
1301 }
1302 
1303 // Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence.
1304 // The passed ptr must NOT be in compressed format!
1305 int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) {
1306   assert(UseCompressedOops, "Can only patch compressed oops");
1307 
1308   narrowOop no = CompressedOops::encode(o);
1309   return patch_compare_immediate_32(pos, no);
1310 }
1311 
1312 // Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence.
1313 // The passed ptr must NOT be in compressed format!
1314 int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) {
1315   assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
1316 
1317   narrowKlass nk = Klass::encode_klass(k);
1318   return patch_compare_immediate_32(pos, nk);
1319 }
1320 
1321 //------------------------------------------------------------------------
1322 //  Extract the constant from a load_constant instruction stream.
1323 //------------------------------------------------------------------------
1324 
1325 // Get constant from a load_const sequence.
1326 long MacroAssembler::get_const(address a) {
1327   assert(is_load_const(a), "not a load of a constant");
1328   unsigned long x;
1329   x =  (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32);
1330   x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff)));
1331   return (long) x;
1332 }
1333 
1334 //--------------------------------------
1335 //  Store a constant in memory.
1336 //--------------------------------------
1337 
1338 // General emitter to move a constant to memory.
1339 // The store is atomic.
1340 //  o Address must be given in RS format (no index register)
1341 //  o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported.
1342 //  o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned.
1343 //  o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned.
1344 //  o Memory slot must be at least as wide as constant, will assert otherwise.
1345 //  o Signed constants will sign-extend, unsigned constants will zero-extend to slot width.
1346 int MacroAssembler::store_const(const Address &dest, long imm,
1347                                 unsigned int lm, unsigned int lc,
1348                                 Register scratch) {
1349   int64_t  disp = dest.disp();
1350   Register base = dest.base();
1351   assert(!dest.has_index(), "not supported");
1352   assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory   length not supported");
1353   assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported");
1354   assert(lm>=lc, "memory slot too small");
1355   assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range");
1356   assert(Displacement::is_validDisp(disp), "displacement out of range");
1357 
1358   bool is_shortDisp = Displacement::is_shortDisp(disp);
1359   int store_offset = -1;
1360 
1361   // For target len == 1 it's easy.
1362   if (lm == 1) {
1363     store_offset = offset();
1364     if (is_shortDisp) {
1365       z_mvi(disp, base, imm);
1366       return store_offset;
1367     } else {
1368       z_mviy(disp, base, imm);
1369       return store_offset;
1370     }
1371   }
1372 
1373   // All the "good stuff" takes an unsigned displacement.
1374   if (is_shortDisp) {
1375     // NOTE: Cannot use clear_mem for imm==0, because it is not atomic.
1376 
1377     store_offset = offset();
1378     switch (lm) {
1379       case 2:  // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening.
1380         z_mvhhi(disp, base, imm);
1381         return store_offset;
1382       case 4:
1383         if (Immediate::is_simm16(imm)) {
1384           z_mvhi(disp, base, imm);
1385           return store_offset;
1386         }
1387         break;
1388       case 8:
1389         if (Immediate::is_simm16(imm)) {
1390           z_mvghi(disp, base, imm);
1391           return store_offset;
1392         }
1393         break;
1394       default:
1395         ShouldNotReachHere();
1396         break;
1397     }
1398   }
1399 
1400   //  Can't optimize, so load value and store it.
1401   guarantee(scratch != noreg, " need a scratch register here !");
1402   if (imm != 0) {
1403     load_const_optimized(scratch, imm);  // Preserves CC anyway.
1404   } else {
1405     // Leave CC alone!!
1406     (void) clear_reg(scratch, true, false); // Indicate unused result.
1407   }
1408 
1409   store_offset = offset();
1410   if (is_shortDisp) {
1411     switch (lm) {
1412       case 2:
1413         z_sth(scratch, disp, Z_R0, base);
1414         return store_offset;
1415       case 4:
1416         z_st(scratch, disp, Z_R0, base);
1417         return store_offset;
1418       case 8:
1419         z_stg(scratch, disp, Z_R0, base);
1420         return store_offset;
1421       default:
1422         ShouldNotReachHere();
1423         break;
1424     }
1425   } else {
1426     switch (lm) {
1427       case 2:
1428         z_sthy(scratch, disp, Z_R0, base);
1429         return store_offset;
1430       case 4:
1431         z_sty(scratch, disp, Z_R0, base);
1432         return store_offset;
1433       case 8:
1434         z_stg(scratch, disp, Z_R0, base);
1435         return store_offset;
1436       default:
1437         ShouldNotReachHere();
1438         break;
1439     }
1440   }
1441   return -1; // should not reach here
1442 }
1443 
1444 //===================================================================
1445 //===       N O T   P A T CH A B L E   C O N S T A N T S          ===
1446 //===================================================================
1447 
1448 // Load constant x into register t with a fast instrcution sequence
1449 // depending on the bits in x. Preserves CC under all circumstances.
1450 int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) {
1451   if (x == 0) {
1452     int len;
1453     if (emit) {
1454       len = clear_reg(t, true, false);
1455     } else {
1456       len = 4;
1457     }
1458     return len;
1459   }
1460 
1461   if (Immediate::is_simm16(x)) {
1462     if (emit) { z_lghi(t, x); }
1463     return 4;
1464   }
1465 
1466   // 64 bit value: | part1 | part2 | part3 | part4 |
1467   // At least one part is not zero!
1468   int part1 = ((x >> 32) & 0xffff0000) >> 16;
1469   int part2 = (x >> 32) & 0x0000ffff;
1470   int part3 = (x & 0xffff0000) >> 16;
1471   int part4 = (x & 0x0000ffff);
1472 
1473   // Lower word only (unsigned).
1474   if ((part1 == 0) && (part2 == 0)) {
1475     if (part3 == 0) {
1476       if (emit) z_llill(t, part4);
1477       return 4;
1478     }
1479     if (part4 == 0) {
1480       if (emit) z_llilh(t, part3);
1481       return 4;
1482     }
1483     if (emit) z_llilf(t, (int)(x & 0xffffffff));
1484     return 6;
1485   }
1486 
1487   // Upper word only.
1488   if ((part3 == 0) && (part4 == 0)) {
1489     if (part1 == 0) {
1490       if (emit) z_llihl(t, part2);
1491       return 4;
1492     }
1493     if (part2 == 0) {
1494       if (emit) z_llihh(t, part1);
1495       return 4;
1496     }
1497     if (emit) z_llihf(t, (int)(x >> 32));
1498     return 6;
1499   }
1500 
1501   // Lower word only (signed).
1502   if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) {
1503     if (emit) z_lgfi(t, (int)(x & 0xffffffff));
1504     return 6;
1505   }
1506 
1507   int len = 0;
1508 
1509   if ((part1 == 0) || (part2 == 0)) {
1510     if (part1 == 0) {
1511       if (emit) z_llihl(t, part2);
1512       len += 4;
1513     } else {
1514       if (emit) z_llihh(t, part1);
1515       len += 4;
1516     }
1517   } else {
1518     if (emit) z_llihf(t, (int)(x >> 32));
1519     len += 6;
1520   }
1521 
1522   if ((part3 == 0) || (part4 == 0)) {
1523     if (part3 == 0) {
1524       if (emit) z_iill(t, part4);
1525       len += 4;
1526     } else {
1527       if (emit) z_iilh(t, part3);
1528       len += 4;
1529     }
1530   } else {
1531     if (emit) z_iilf(t, (int)(x & 0xffffffff));
1532     len += 6;
1533   }
1534   return len;
1535 }
1536 
1537 //=====================================================================
1538 //===     H I G H E R   L E V E L   B R A N C H   E M I T T E R S   ===
1539 //=====================================================================
1540 
1541 // Note: In the worst case, one of the scratch registers is destroyed!!!
1542 void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1543   // Right operand is constant.
1544   if (x2.is_constant()) {
1545     jlong value = x2.as_constant();
1546     compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true);
1547     return;
1548   }
1549 
1550   // Right operand is in register.
1551   compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true);
1552 }
1553 
1554 // Note: In the worst case, one of the scratch registers is destroyed!!!
1555 void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1556   // Right operand is constant.
1557   if (x2.is_constant()) {
1558     jlong value = x2.as_constant();
1559     compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false);
1560     return;
1561   }
1562 
1563   // Right operand is in register.
1564   compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false);
1565 }
1566 
1567 // Note: In the worst case, one of the scratch registers is destroyed!!!
1568 void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1569   // Right operand is constant.
1570   if (x2.is_constant()) {
1571     jlong value = x2.as_constant();
1572     compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true);
1573     return;
1574   }
1575 
1576   // Right operand is in register.
1577   compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true);
1578 }
1579 
1580 void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1581   // Right operand is constant.
1582   if (x2.is_constant()) {
1583     jlong value = x2.as_constant();
1584     compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false);
1585     return;
1586   }
1587 
1588   // Right operand is in register.
1589   compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false);
1590 }
1591 
1592 // Generate an optimal branch to the branch target.
1593 // Optimal means that a relative branch (brc or brcl) is used if the
1594 // branch distance is short enough. Loading the target address into a
1595 // register and branching via reg is used as fallback only.
1596 //
1597 // Used registers:
1598 //   Z_R1 - work reg. Holds branch target address.
1599 //          Used in fallback case only.
1600 //
1601 // This version of branch_optimized is good for cases where the target address is known
1602 // and constant, i.e. is never changed (no relocation, no patching).
1603 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) {
1604   address branch_origin = pc();
1605 
1606   if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
1607     z_brc(cond, branch_addr);
1608   } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) {
1609     z_brcl(cond, branch_addr);
1610   } else {
1611     load_const_optimized(Z_R1, branch_addr);  // CC must not get killed by load_const_optimized.
1612     z_bcr(cond, Z_R1);
1613   }
1614 }
1615 
1616 // This version of branch_optimized is good for cases where the target address
1617 // is potentially not yet known at the time the code is emitted.
1618 //
1619 // One very common case is a branch to an unbound label which is handled here.
1620 // The caller might know (or hope) that the branch distance is short enough
1621 // to be encoded in a 16bit relative address. In this case he will pass a
1622 // NearLabel branch_target.
1623 // Care must be taken with unbound labels. Each call to target(label) creates
1624 // an entry in the patch queue for that label to patch all references of the label
1625 // once it gets bound. Those recorded patch locations must be patchable. Otherwise,
1626 // an assertion fires at patch time.
1627 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) {
1628   if (branch_target.is_bound()) {
1629     address branch_addr = target(branch_target);
1630     branch_optimized(cond, branch_addr);
1631   } else if (branch_target.is_near()) {
1632     z_brc(cond, branch_target);  // Caller assures that the target will be in range for z_brc.
1633   } else {
1634     z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time.
1635   }
1636 }
1637 
1638 // Generate an optimal compare and branch to the branch target.
1639 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the
1640 // branch distance is short enough. Loading the target address into a
1641 // register and branching via reg is used as fallback only.
1642 //
1643 // Input:
1644 //   r1 - left compare operand
1645 //   r2 - right compare operand
1646 void MacroAssembler::compare_and_branch_optimized(Register r1,
1647                                                   Register r2,
1648                                                   Assembler::branch_condition cond,
1649                                                   address  branch_addr,
1650                                                   bool     len64,
1651                                                   bool     has_sign) {
1652   unsigned int casenum = (len64?2:0)+(has_sign?0:1);
1653 
1654   address branch_origin = pc();
1655   if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
1656     switch (casenum) {
1657       case 0: z_crj( r1, r2, cond, branch_addr); break;
1658       case 1: z_clrj (r1, r2, cond, branch_addr); break;
1659       case 2: z_cgrj(r1, r2, cond, branch_addr); break;
1660       case 3: z_clgrj(r1, r2, cond, branch_addr); break;
1661       default: ShouldNotReachHere(); break;
1662     }
1663   } else {
1664     switch (casenum) {
1665       case 0: z_cr( r1, r2); break;
1666       case 1: z_clr(r1, r2); break;
1667       case 2: z_cgr(r1, r2); break;
1668       case 3: z_clgr(r1, r2); break;
1669       default: ShouldNotReachHere(); break;
1670     }
1671     branch_optimized(cond, branch_addr);
1672   }
1673 }
1674 
1675 // Generate an optimal compare and branch to the branch target.
1676 // Optimal means that a relative branch (clgij, brc or brcl) is used if the
1677 // branch distance is short enough. Loading the target address into a
1678 // register and branching via reg is used as fallback only.
1679 //
1680 // Input:
1681 //   r1 - left compare operand (in register)
1682 //   x2 - right compare operand (immediate)
1683 void MacroAssembler::compare_and_branch_optimized(Register r1,
1684                                                   jlong    x2,
1685                                                   Assembler::branch_condition cond,
1686                                                   Label&   branch_target,
1687                                                   bool     len64,
1688                                                   bool     has_sign) {
1689   address      branch_origin = pc();
1690   bool         x2_imm8       = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2));
1691   bool         is_RelAddr16  = branch_target.is_near() ||
1692                                (branch_target.is_bound() &&
1693                                 RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin));
1694   unsigned int casenum       = (len64?2:0)+(has_sign?0:1);
1695 
1696   if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) {
1697     switch (casenum) {
1698       case 0: z_cij( r1, x2, cond, branch_target); break;
1699       case 1: z_clij(r1, x2, cond, branch_target); break;
1700       case 2: z_cgij(r1, x2, cond, branch_target); break;
1701       case 3: z_clgij(r1, x2, cond, branch_target); break;
1702       default: ShouldNotReachHere(); break;
1703     }
1704     return;
1705   }
1706 
1707   if (x2 == 0) {
1708     switch (casenum) {
1709       case 0: z_ltr(r1, r1); break;
1710       case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
1711       case 2: z_ltgr(r1, r1); break;
1712       case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
1713       default: ShouldNotReachHere(); break;
1714     }
1715   } else {
1716     if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) {
1717       switch (casenum) {
1718         case 0: z_chi(r1, x2); break;
1719         case 1: z_chi(r1, x2); break; // positive immediate < 2**15
1720         case 2: z_cghi(r1, x2); break;
1721         case 3: z_cghi(r1, x2); break; // positive immediate < 2**15
1722         default: break;
1723       }
1724     } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) {
1725       switch (casenum) {
1726         case 0: z_cfi( r1, x2); break;
1727         case 1: z_clfi(r1, x2); break;
1728         case 2: z_cgfi(r1, x2); break;
1729         case 3: z_clgfi(r1, x2); break;
1730         default: ShouldNotReachHere(); break;
1731       }
1732     } else {
1733       // No instruction with immediate operand possible, so load into register.
1734       Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1;
1735       load_const_optimized(scratch, x2);
1736       switch (casenum) {
1737         case 0: z_cr( r1, scratch); break;
1738         case 1: z_clr(r1, scratch); break;
1739         case 2: z_cgr(r1, scratch); break;
1740         case 3: z_clgr(r1, scratch); break;
1741         default: ShouldNotReachHere(); break;
1742       }
1743     }
1744   }
1745   branch_optimized(cond, branch_target);
1746 }
1747 
1748 // Generate an optimal compare and branch to the branch target.
1749 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the
1750 // branch distance is short enough. Loading the target address into a
1751 // register and branching via reg is used as fallback only.
1752 //
1753 // Input:
1754 //   r1 - left compare operand
1755 //   r2 - right compare operand
1756 void MacroAssembler::compare_and_branch_optimized(Register r1,
1757                                                   Register r2,
1758                                                   Assembler::branch_condition cond,
1759                                                   Label&   branch_target,
1760                                                   bool     len64,
1761                                                   bool     has_sign) {
1762   unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1);
1763 
1764   if (branch_target.is_bound()) {
1765     address branch_addr = target(branch_target);
1766     compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign);
1767   } else {
1768     if (VM_Version::has_CompareBranch() && branch_target.is_near()) {
1769       switch (casenum) {
1770         case 0: z_crj(  r1, r2, cond, branch_target); break;
1771         case 1: z_clrj( r1, r2, cond, branch_target); break;
1772         case 2: z_cgrj( r1, r2, cond, branch_target); break;
1773         case 3: z_clgrj(r1, r2, cond, branch_target); break;
1774         default: ShouldNotReachHere(); break;
1775       }
1776     } else {
1777       switch (casenum) {
1778         case 0: z_cr( r1, r2); break;
1779         case 1: z_clr(r1, r2); break;
1780         case 2: z_cgr(r1, r2); break;
1781         case 3: z_clgr(r1, r2); break;
1782         default: ShouldNotReachHere(); break;
1783       }
1784       branch_optimized(cond, branch_target);
1785     }
1786   }
1787 }
1788 
1789 //===========================================================================
1790 //===   END     H I G H E R   L E V E L   B R A N C H   E M I T T E R S   ===
1791 //===========================================================================
1792 
1793 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
1794   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1795   int index = oop_recorder()->allocate_metadata_index(obj);
1796   RelocationHolder rspec = metadata_Relocation::spec(index);
1797   return AddressLiteral((address)obj, rspec);
1798 }
1799 
1800 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
1801   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1802   int index = oop_recorder()->find_index(obj);
1803   RelocationHolder rspec = metadata_Relocation::spec(index);
1804   return AddressLiteral((address)obj, rspec);
1805 }
1806 
1807 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
1808   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1809   int oop_index = oop_recorder()->allocate_oop_index(obj);
1810   return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
1811 }
1812 
1813 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
1814   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1815   int oop_index = oop_recorder()->find_index(obj);
1816   return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
1817 }
1818 
1819 // NOTE: destroys r
1820 void MacroAssembler::c2bool(Register r, Register t) {
1821   z_lcr(t, r);   // t = -r
1822   z_or(r, t);    // r = -r OR r
1823   z_srl(r, 31);  // Yields 0 if r was 0, 1 otherwise.
1824 }
1825 
1826 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
1827                                                       Register tmp,
1828                                                       int offset) {
1829   intptr_t value = *delayed_value_addr;
1830   if (value != 0) {
1831     return RegisterOrConstant(value + offset);
1832   }
1833 
1834   BLOCK_COMMENT("delayed_value {");
1835   // Load indirectly to solve generation ordering problem.
1836   load_absolute_address(tmp, (address) delayed_value_addr); // tmp = a;
1837   z_lg(tmp, 0, tmp);                   // tmp = *tmp;
1838 
1839 #ifdef ASSERT
1840   NearLabel L;
1841   compare64_and_branch(tmp, (intptr_t)0L, Assembler::bcondNotEqual, L);
1842   z_illtrap();
1843   bind(L);
1844 #endif
1845 
1846   if (offset != 0) {
1847     z_agfi(tmp, offset);               // tmp = tmp + offset;
1848   }
1849 
1850   BLOCK_COMMENT("} delayed_value");
1851   return RegisterOrConstant(tmp);
1852 }
1853 
1854 // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos'
1855 // and return the resulting instruction.
1856 // Dest_pos and inst_pos are 32 bit only. These parms can only designate
1857 // relative positions.
1858 // Use correct argument types. Do not pre-calculate distance.
1859 unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) {
1860   int c = 0;
1861   unsigned long patched_inst = 0;
1862   if (is_call_pcrelative_short(inst) ||
1863       is_branch_pcrelative_short(inst) ||
1864       is_branchoncount_pcrelative_short(inst) ||
1865       is_branchonindex32_pcrelative_short(inst)) {
1866     c = 1;
1867     int m = fmask(15, 0);    // simm16(-1, 16, 32);
1868     int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32);
1869     patched_inst = (inst & ~m) | v;
1870   } else if (is_compareandbranch_pcrelative_short(inst)) {
1871     c = 2;
1872     long m = fmask(31, 16);  // simm16(-1, 16, 48);
1873     long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
1874     patched_inst = (inst & ~m) | v;
1875   } else if (is_branchonindex64_pcrelative_short(inst)) {
1876     c = 3;
1877     long m = fmask(31, 16);  // simm16(-1, 16, 48);
1878     long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
1879     patched_inst = (inst & ~m) | v;
1880   } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) {
1881     c = 4;
1882     long m = fmask(31, 0);  // simm32(-1, 16, 48);
1883     long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
1884     patched_inst = (inst & ~m) | v;
1885   } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions.
1886     c = 5;
1887     long m = fmask(31, 0);  // simm32(-1, 16, 48);
1888     long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
1889     patched_inst = (inst & ~m) | v;
1890   } else {
1891     print_dbg_msg(tty, inst, "not a relative branch", 0);
1892     dump_code_range(tty, inst_pos, 32, "not a pcrelative branch");
1893     ShouldNotReachHere();
1894   }
1895 
1896   long new_off = get_pcrel_offset(patched_inst);
1897   if (new_off != (dest_pos-inst_pos)) {
1898     tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off);
1899     print_dbg_msg(tty, inst,         "<- original instruction: branch patching error", 0);
1900     print_dbg_msg(tty, patched_inst, "<- patched  instruction: branch patching error", 0);
1901 #ifdef LUCY_DBG
1902     VM_Version::z_SIGSEGV();
1903 #endif
1904     ShouldNotReachHere();
1905   }
1906   return patched_inst;
1907 }
1908 
1909 // Only called when binding labels (share/vm/asm/assembler.cpp)
1910 // Pass arguments as intended. Do not pre-calculate distance.
1911 void MacroAssembler::pd_patch_instruction(address branch, address target) {
1912   unsigned long stub_inst;
1913   int           inst_len = get_instruction(branch, &stub_inst);
1914 
1915   set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len);
1916 }
1917 
1918 
1919 // Extract relative address (aka offset).
1920 // inv_simm16 works for 4-byte instructions only.
1921 // compare and branch instructions are 6-byte and have a 16bit offset "in the middle".
1922 long MacroAssembler::get_pcrel_offset(unsigned long inst) {
1923 
1924   if (MacroAssembler::is_pcrelative_short(inst)) {
1925     if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) {
1926       return RelAddr::inv_pcrel_off16(inv_simm16(inst));
1927     } else {
1928       return RelAddr::inv_pcrel_off16(inv_simm16_48(inst));
1929     }
1930   }
1931 
1932   if (MacroAssembler::is_pcrelative_long(inst)) {
1933     return RelAddr::inv_pcrel_off32(inv_simm32(inst));
1934   }
1935 
1936   print_dbg_msg(tty, inst, "not a pcrelative instruction", 6);
1937 #ifdef LUCY_DBG
1938   VM_Version::z_SIGSEGV();
1939 #else
1940   ShouldNotReachHere();
1941 #endif
1942   return -1;
1943 }
1944 
1945 long MacroAssembler::get_pcrel_offset(address pc) {
1946   unsigned long inst;
1947   unsigned int  len = get_instruction(pc, &inst);
1948 
1949 #ifdef ASSERT
1950   long offset;
1951   if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) {
1952     offset = get_pcrel_offset(inst);
1953   } else {
1954     offset = -1;
1955   }
1956 
1957   if (offset == -1) {
1958     dump_code_range(tty, pc, 32, "not a pcrelative instruction");
1959 #ifdef LUCY_DBG
1960     VM_Version::z_SIGSEGV();
1961 #else
1962     ShouldNotReachHere();
1963 #endif
1964   }
1965   return offset;
1966 #else
1967   return get_pcrel_offset(inst);
1968 #endif // ASSERT
1969 }
1970 
1971 // Get target address from pc-relative instructions.
1972 address MacroAssembler::get_target_addr_pcrel(address pc) {
1973   assert(is_pcrelative_long(pc), "not a pcrelative instruction");
1974   return pc + get_pcrel_offset(pc);
1975 }
1976 
1977 // Patch pc relative load address.
1978 void MacroAssembler::patch_target_addr_pcrel(address pc, address con) {
1979   unsigned long inst;
1980   // Offset is +/- 2**32 -> use long.
1981   ptrdiff_t distance = con - pc;
1982 
1983   get_instruction(pc, &inst);
1984 
1985   if (is_pcrelative_short(inst)) {
1986     *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc);  // Instructions are at least 2-byte aligned, no test required.
1987 
1988     // Some extra safety net.
1989     if (!RelAddr::is_in_range_of_RelAddr16(distance)) {
1990       print_dbg_msg(tty, inst, "distance out of range (16bit)", 4);
1991       dump_code_range(tty, pc, 32, "distance out of range (16bit)");
1992       guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16");
1993     }
1994     return;
1995   }
1996 
1997   if (is_pcrelative_long(inst)) {
1998     *(int *)(pc+2)   = RelAddr::pcrel_off32(con, pc);
1999 
2000     // Some Extra safety net.
2001     if (!RelAddr::is_in_range_of_RelAddr32(distance)) {
2002       print_dbg_msg(tty, inst, "distance out of range (32bit)", 6);
2003       dump_code_range(tty, pc, 32, "distance out of range (32bit)");
2004       guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32");
2005     }
2006     return;
2007   }
2008 
2009   guarantee(false, "not a pcrelative instruction to patch!");
2010 }
2011 
2012 // "Current PC" here means the address just behind the basr instruction.
2013 address MacroAssembler::get_PC(Register result) {
2014   z_basr(result, Z_R0); // Don't branch, just save next instruction address in result.
2015   return pc();
2016 }
2017 
2018 // Get current PC + offset.
2019 // Offset given in bytes, must be even!
2020 // "Current PC" here means the address of the larl instruction plus the given offset.
2021 address MacroAssembler::get_PC(Register result, int64_t offset) {
2022   address here = pc();
2023   z_larl(result, offset/2); // Save target instruction address in result.
2024   return here + offset;
2025 }
2026 
2027 void MacroAssembler::instr_size(Register size, Register pc) {
2028   // Extract 2 most significant bits of current instruction.
2029   z_llgc(size, Address(pc));
2030   z_srl(size, 6);
2031   // Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6.
2032   z_ahi(size, 3);
2033   z_nill(size, 6);
2034 }
2035 
2036 // Resize_frame with SP(new) = SP(old) - [offset].
2037 void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp)
2038 {
2039   assert_different_registers(offset, fp, Z_SP);
2040   if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); }
2041 
2042   z_sgr(Z_SP, offset);
2043   z_stg(fp, _z_abi(callers_sp), Z_SP);
2044 }
2045 
2046 // Resize_frame with SP(new) = [newSP] + offset.
2047 //   This emitter is useful if we already have calculated a pointer
2048 //   into the to-be-allocated stack space, e.g. with special alignment properties,
2049 //   but need some additional space, e.g. for spilling.
2050 //   newSP    is the pre-calculated pointer. It must not be modified.
2051 //   fp       holds, or is filled with, the frame pointer.
2052 //   offset   is the additional increment which is added to addr to form the new SP.
2053 //            Note: specify a negative value to reserve more space!
2054 //   load_fp == true  only indicates that fp is not pre-filled with the frame pointer.
2055 //                    It does not guarantee that fp contains the frame pointer at the end.
2056 void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) {
2057   assert_different_registers(newSP, fp, Z_SP);
2058 
2059   if (load_fp) {
2060     z_lg(fp, _z_abi(callers_sp), Z_SP);
2061   }
2062 
2063   add2reg(Z_SP, offset, newSP);
2064   z_stg(fp, _z_abi(callers_sp), Z_SP);
2065 }
2066 
2067 // Resize_frame with SP(new) = [newSP].
2068 //   load_fp == true  only indicates that fp is not pre-filled with the frame pointer.
2069 //                    It does not guarantee that fp contains the frame pointer at the end.
2070 void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) {
2071   assert_different_registers(newSP, fp, Z_SP);
2072 
2073   if (load_fp) {
2074     z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store.
2075   }
2076 
2077   z_lgr(Z_SP, newSP);
2078   if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses.
2079     z_stg(fp, _z_abi(callers_sp), newSP);
2080   } else {
2081     z_stg(fp, _z_abi(callers_sp), Z_SP);
2082   }
2083 }
2084 
2085 // Resize_frame with SP(new) = SP(old) + offset.
2086 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) {
2087   assert_different_registers(fp, Z_SP);
2088 
2089   if (load_fp) {
2090     z_lg(fp, _z_abi(callers_sp), Z_SP);
2091   }
2092   add64(Z_SP, offset);
2093   z_stg(fp, _z_abi(callers_sp), Z_SP);
2094 }
2095 
2096 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) {
2097 #ifdef ASSERT
2098   assert_different_registers(bytes, old_sp, Z_SP);
2099   if (!copy_sp) {
2100     z_cgr(old_sp, Z_SP);
2101     asm_assert_eq("[old_sp]!=[Z_SP]", 0x211);
2102   }
2103 #endif
2104   if (copy_sp) { z_lgr(old_sp, Z_SP); }
2105   if (bytes_with_inverted_sign) {
2106     z_agr(Z_SP, bytes);
2107   } else {
2108     z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster.
2109   }
2110   z_stg(old_sp, _z_abi(callers_sp), Z_SP);
2111 }
2112 
2113 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) {
2114   long offset = Assembler::align(bytes, frame::alignment_in_bytes);
2115   assert(offset > 0, "should push a frame with positive size, size = %ld.", offset);
2116   assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset);
2117 
2118   // We must not write outside the current stack bounds (given by Z_SP).
2119   // Thus, we have to first update Z_SP and then store the previous SP as stack linkage.
2120   // We rely on Z_R0 by default to be available as scratch.
2121   z_lgr(scratch, Z_SP);
2122   add2reg(Z_SP, -offset);
2123   z_stg(scratch, _z_abi(callers_sp), Z_SP);
2124 #ifdef ASSERT
2125   // Just make sure nobody uses the value in the default scratch register.
2126   // When another register is used, the caller might rely on it containing the frame pointer.
2127   if (scratch == Z_R0) {
2128     z_iihf(scratch, 0xbaadbabe);
2129     z_iilf(scratch, 0xdeadbeef);
2130   }
2131 #endif
2132   return offset;
2133 }
2134 
2135 // Push a frame of size `bytes' plus abi160 on top.
2136 unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) {
2137   BLOCK_COMMENT("push_frame_abi160 {");
2138   unsigned int res = push_frame(bytes + frame::z_abi_160_size);
2139   BLOCK_COMMENT("} push_frame_abi160");
2140   return res;
2141 }
2142 
2143 // Pop current C frame.
2144 void MacroAssembler::pop_frame() {
2145   BLOCK_COMMENT("pop_frame:");
2146   Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
2147 }
2148 
2149 // Pop current C frame and restore return PC register (Z_R14).
2150 void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) {
2151   BLOCK_COMMENT("pop_frame_restore_retPC:");
2152   int retPC_offset = _z_abi16(return_pc) + frame_size_in_bytes;
2153   // If possible, pop frame by add instead of load (a penny saved is a penny got :-).
2154   if (Displacement::is_validDisp(retPC_offset)) {
2155     z_lg(Z_R14, retPC_offset, Z_SP);
2156     add2reg(Z_SP, frame_size_in_bytes);
2157   } else {
2158     add2reg(Z_SP, frame_size_in_bytes);
2159     restore_return_pc();
2160   }
2161 }
2162 
2163 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) {
2164   if (allow_relocation) {
2165     call_c(entry_point);
2166   } else {
2167     call_c_static(entry_point);
2168   }
2169 }
2170 
2171 void MacroAssembler::call_VM_leaf_base(address entry_point) {
2172   bool allow_relocation = true;
2173   call_VM_leaf_base(entry_point, allow_relocation);
2174 }
2175 
2176 void MacroAssembler::call_VM_base(Register oop_result,
2177                                   Register last_java_sp,
2178                                   address  entry_point,
2179                                   bool     allow_relocation,
2180                                   bool     check_exceptions) { // Defaults to true.
2181   // Allow_relocation indicates, if true, that the generated code shall
2182   // be fit for code relocation or referenced data relocation. In other
2183   // words: all addresses must be considered variable. PC-relative addressing
2184   // is not possible then.
2185   // On the other hand, if (allow_relocation == false), addresses and offsets
2186   // may be considered stable, enabling us to take advantage of some PC-relative
2187   // addressing tweaks. These might improve performance and reduce code size.
2188 
2189   // Determine last_java_sp register.
2190   if (!last_java_sp->is_valid()) {
2191     last_java_sp = Z_SP;  // Load Z_SP as SP.
2192   }
2193 
2194   set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation);
2195 
2196   // ARG1 must hold thread address.
2197   z_lgr(Z_ARG1, Z_thread);
2198 
2199   address return_pc = NULL;
2200   if (allow_relocation) {
2201     return_pc = call_c(entry_point);
2202   } else {
2203     return_pc = call_c_static(entry_point);
2204   }
2205 
2206   reset_last_Java_frame(allow_relocation);
2207 
2208   // C++ interp handles this in the interpreter.
2209   check_and_handle_popframe(Z_thread);
2210   check_and_handle_earlyret(Z_thread);
2211 
2212   // Check for pending exceptions.
2213   if (check_exceptions) {
2214     // Check for pending exceptions (java_thread is set upon return).
2215     load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset()));
2216 
2217     // This used to conditionally jump to forward_exception however it is
2218     // possible if we relocate that the branch will not reach. So we must jump
2219     // around so we can always reach.
2220 
2221     Label ok;
2222     z_bre(ok); // Bcondequal is the same as bcondZero.
2223     call_stub(StubRoutines::forward_exception_entry());
2224     bind(ok);
2225   }
2226 
2227   // Get oop result if there is one and reset the value in the thread.
2228   if (oop_result->is_valid()) {
2229     get_vm_result(oop_result);
2230   }
2231 
2232   _last_calls_return_pc = return_pc;  // Wipe out other (error handling) calls.
2233 }
2234 
2235 void MacroAssembler::call_VM_base(Register oop_result,
2236                                   Register last_java_sp,
2237                                   address  entry_point,
2238                                   bool     check_exceptions) { // Defaults to true.
2239   bool allow_relocation = true;
2240   call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions);
2241 }
2242 
2243 // VM calls without explicit last_java_sp.
2244 
2245 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
2246   // Call takes possible detour via InterpreterMacroAssembler.
2247   call_VM_base(oop_result, noreg, entry_point, true, check_exceptions);
2248 }
2249 
2250 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
2251   // Z_ARG1 is reserved for the thread.
2252   lgr_if_needed(Z_ARG2, arg_1);
2253   call_VM(oop_result, entry_point, check_exceptions);
2254 }
2255 
2256 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
2257   // Z_ARG1 is reserved for the thread.
2258   lgr_if_needed(Z_ARG2, arg_1);
2259   assert(arg_2 != Z_ARG2, "smashed argument");
2260   lgr_if_needed(Z_ARG3, arg_2);
2261   call_VM(oop_result, entry_point, check_exceptions);
2262 }
2263 
2264 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
2265                              Register arg_3, bool check_exceptions) {
2266   // Z_ARG1 is reserved for the thread.
2267   lgr_if_needed(Z_ARG2, arg_1);
2268   assert(arg_2 != Z_ARG2, "smashed argument");
2269   lgr_if_needed(Z_ARG3, arg_2);
2270   assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
2271   lgr_if_needed(Z_ARG4, arg_3);
2272   call_VM(oop_result, entry_point, check_exceptions);
2273 }
2274 
2275 // VM static calls without explicit last_java_sp.
2276 
2277 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) {
2278   // Call takes possible detour via InterpreterMacroAssembler.
2279   call_VM_base(oop_result, noreg, entry_point, false, check_exceptions);
2280 }
2281 
2282 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2,
2283                                     Register arg_3, bool check_exceptions) {
2284   // Z_ARG1 is reserved for the thread.
2285   lgr_if_needed(Z_ARG2, arg_1);
2286   assert(arg_2 != Z_ARG2, "smashed argument");
2287   lgr_if_needed(Z_ARG3, arg_2);
2288   assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
2289   lgr_if_needed(Z_ARG4, arg_3);
2290   call_VM_static(oop_result, entry_point, check_exceptions);
2291 }
2292 
2293 // VM calls with explicit last_java_sp.
2294 
2295 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) {
2296   // Call takes possible detour via InterpreterMacroAssembler.
2297   call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions);
2298 }
2299 
2300 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
2301    // Z_ARG1 is reserved for the thread.
2302    lgr_if_needed(Z_ARG2, arg_1);
2303    call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2304 }
2305 
2306 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
2307                              Register arg_2, bool check_exceptions) {
2308    // Z_ARG1 is reserved for the thread.
2309    lgr_if_needed(Z_ARG2, arg_1);
2310    assert(arg_2 != Z_ARG2, "smashed argument");
2311    lgr_if_needed(Z_ARG3, arg_2);
2312    call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2313 }
2314 
2315 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
2316                              Register arg_2, Register arg_3, bool check_exceptions) {
2317   // Z_ARG1 is reserved for the thread.
2318   lgr_if_needed(Z_ARG2, arg_1);
2319   assert(arg_2 != Z_ARG2, "smashed argument");
2320   lgr_if_needed(Z_ARG3, arg_2);
2321   assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
2322   lgr_if_needed(Z_ARG4, arg_3);
2323   call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2324 }
2325 
2326 // VM leaf calls.
2327 
2328 void MacroAssembler::call_VM_leaf(address entry_point) {
2329   // Call takes possible detour via InterpreterMacroAssembler.
2330   call_VM_leaf_base(entry_point, true);
2331 }
2332 
2333 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
2334   if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2335   call_VM_leaf(entry_point);
2336 }
2337 
2338 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
2339   if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2340   assert(arg_2 != Z_ARG1, "smashed argument");
2341   if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2342   call_VM_leaf(entry_point);
2343 }
2344 
2345 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
2346   if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2347   assert(arg_2 != Z_ARG1, "smashed argument");
2348   if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2349   assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
2350   if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
2351   call_VM_leaf(entry_point);
2352 }
2353 
2354 // Static VM leaf calls.
2355 // Really static VM leaf calls are never patched.
2356 
2357 void MacroAssembler::call_VM_leaf_static(address entry_point) {
2358   // Call takes possible detour via InterpreterMacroAssembler.
2359   call_VM_leaf_base(entry_point, false);
2360 }
2361 
2362 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) {
2363   if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2364   call_VM_leaf_static(entry_point);
2365 }
2366 
2367 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) {
2368   if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2369   assert(arg_2 != Z_ARG1, "smashed argument");
2370   if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2371   call_VM_leaf_static(entry_point);
2372 }
2373 
2374 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
2375   if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2376   assert(arg_2 != Z_ARG1, "smashed argument");
2377   if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2378   assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
2379   if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
2380   call_VM_leaf_static(entry_point);
2381 }
2382 
2383 // Don't use detour via call_c(reg).
2384 address MacroAssembler::call_c(address function_entry) {
2385   load_const(Z_R1, function_entry);
2386   return call(Z_R1);
2387 }
2388 
2389 // Variant for really static (non-relocatable) calls which are never patched.
2390 address MacroAssembler::call_c_static(address function_entry) {
2391   load_absolute_address(Z_R1, function_entry);
2392 #if 0 // def ASSERT
2393   // Verify that call site did not move.
2394   load_const_optimized(Z_R0, function_entry);
2395   z_cgr(Z_R1, Z_R0);
2396   z_brc(bcondEqual, 3);
2397   z_illtrap(0xba);
2398 #endif
2399   return call(Z_R1);
2400 }
2401 
2402 address MacroAssembler::call_c_opt(address function_entry) {
2403   bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */);
2404   _last_calls_return_pc = success ? pc() : NULL;
2405   return _last_calls_return_pc;
2406 }
2407 
2408 // Identify a call_far_patchable instruction: LARL + LG + BASR
2409 //
2410 //    nop                   ; optionally, if required for alignment
2411 //    lgrl rx,A(TOC entry)  ; PC-relative access into constant pool
2412 //    basr Z_R14,rx         ; end of this instruction must be aligned to a word boundary
2413 //
2414 // Code pattern will eventually get patched into variant2 (see below for detection code).
2415 //
2416 bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) {
2417   address iaddr = instruction_addr;
2418 
2419   // Check for the actual load instruction.
2420   if (!is_load_const_from_toc(iaddr)) { return false; }
2421   iaddr += load_const_from_toc_size();
2422 
2423   // Check for the call (BASR) instruction, finally.
2424   assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch");
2425   return is_call_byregister(iaddr);
2426 }
2427 
2428 // Identify a call_far_patchable instruction: BRASL
2429 //
2430 // Code pattern to suits atomic patching:
2431 //    nop                       ; Optionally, if required for alignment.
2432 //    nop    ...                ; Multiple filler nops to compensate for size difference (variant0 is longer).
2433 //    nop                       ; For code pattern detection: Prepend each BRASL with a nop.
2434 //    brasl  Z_R14,<reladdr>    ; End of code must be 4-byte aligned !
2435 bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) {
2436   const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size());
2437 
2438   // Check for correct number of leading nops.
2439   address iaddr;
2440   for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) {
2441     if (!is_z_nop(iaddr)) { return false; }
2442   }
2443   assert(iaddr == call_addr, "sanity");
2444 
2445   // --> Check for call instruction.
2446   if (is_call_far_pcrelative(call_addr)) {
2447     assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch");
2448     return true;
2449   }
2450 
2451   return false;
2452 }
2453 
2454 // Emit a NOT mt-safely patchable 64 bit absolute call.
2455 // If toc_offset == -2, then the destination of the call (= target) is emitted
2456 //                      to the constant pool and a runtime_call relocation is added
2457 //                      to the code buffer.
2458 // If toc_offset != -2, target must already be in the constant pool at
2459 //                      _ctableStart+toc_offset (a caller can retrieve toc_offset
2460 //                      from the runtime_call relocation).
2461 // Special handling of emitting to scratch buffer when there is no constant pool.
2462 // Slightly changed code pattern. We emit an additional nop if we would
2463 // not end emitting at a word aligned address. This is to ensure
2464 // an atomically patchable displacement in brasl instructions.
2465 //
2466 // A call_far_patchable comes in different flavors:
2467 //  - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register)
2468 //  - LGRL(CP) / BR          (address in constant pool, pc-relative accesss)
2469 //  - BRASL                  (relative address of call target coded in instruction)
2470 // All flavors occupy the same amount of space. Length differences are compensated
2471 // by leading nops, such that the instruction sequence always ends at the same
2472 // byte offset. This is required to keep the return offset constant.
2473 // Furthermore, the return address (the end of the instruction sequence) is forced
2474 // to be on a 4-byte boundary. This is required for atomic patching, should we ever
2475 // need to patch the call target of the BRASL flavor.
2476 // RETURN value: false, if no constant pool entry could be allocated, true otherwise.
2477 bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) {
2478   // Get current pc and ensure word alignment for end of instr sequence.
2479   const address start_pc = pc();
2480   const intptr_t       start_off = offset();
2481   assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address");
2482   const ptrdiff_t      dist      = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop.
2483   const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit();
2484   const bool emit_relative_call  = !emit_target_to_pool &&
2485                                    RelAddr::is_in_range_of_RelAddr32(dist) &&
2486                                    ReoptimizeCallSequences &&
2487                                    !code_section()->scratch_emit();
2488 
2489   if (emit_relative_call) {
2490     // Add padding to get the same size as below.
2491     const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size();
2492     unsigned int current_padding;
2493     for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); }
2494     assert(current_padding == padding, "sanity");
2495 
2496     // relative call: len = 2(nop) + 6 (brasl)
2497     // CodeBlob resize cannot occur in this case because
2498     // this call is emitted into pre-existing space.
2499     z_nop(); // Prepend each BRASL with a nop.
2500     z_brasl(Z_R14, target);
2501   } else {
2502     // absolute call: Get address from TOC.
2503     // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8}
2504     if (emit_target_to_pool) {
2505       // When emitting the call for the first time, we do not need to use
2506       // the pc-relative version. It will be patched anyway, when the code
2507       // buffer is copied.
2508       // Relocation is not needed when !ReoptimizeCallSequences.
2509       relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none;
2510       AddressLiteral dest(target, rt);
2511       // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills
2512       // inst_mark(). Reset if possible.
2513       bool reset_mark = (inst_mark() == pc());
2514       tocOffset = store_oop_in_toc(dest);
2515       if (reset_mark) { set_inst_mark(); }
2516       if (tocOffset == -1) {
2517         return false; // Couldn't create constant pool entry.
2518       }
2519     }
2520     assert(offset() == start_off, "emit no code before this point!");
2521 
2522     address tocPos = pc() + tocOffset;
2523     if (emit_target_to_pool) {
2524       tocPos = code()->consts()->start() + tocOffset;
2525     }
2526     load_long_pcrelative(Z_R14, tocPos);
2527     z_basr(Z_R14, Z_R14);
2528   }
2529 
2530 #ifdef ASSERT
2531   // Assert that we can identify the emitted call.
2532   assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call");
2533   assert(offset() == start_off+call_far_patchable_size(), "wrong size");
2534 
2535   if (emit_target_to_pool) {
2536     assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target,
2537            "wrong encoding of dest address");
2538   }
2539 #endif
2540   return true; // success
2541 }
2542 
2543 // Identify a call_far_patchable instruction.
2544 // For more detailed information see header comment of call_far_patchable.
2545 bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) {
2546   return is_call_far_patchable_variant2_at(instruction_addr)  || // short version: BRASL
2547          is_call_far_patchable_variant0_at(instruction_addr);    // long version LARL + LG + BASR
2548 }
2549 
2550 // Does the call_far_patchable instruction use a pc-relative encoding
2551 // of the call destination?
2552 bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) {
2553   // Variant 2 is pc-relative.
2554   return is_call_far_patchable_variant2_at(instruction_addr);
2555 }
2556 
2557 bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) {
2558   // Prepend each BRASL with a nop.
2559   return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size());  // Match at position after one nop required.
2560 }
2561 
2562 // Set destination address of a call_far_patchable instruction.
2563 void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) {
2564   ResourceMark rm;
2565 
2566   // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit).
2567   int code_size = MacroAssembler::call_far_patchable_size();
2568   CodeBuffer buf(instruction_addr, code_size);
2569   MacroAssembler masm(&buf);
2570   masm.call_far_patchable(dest, tocOffset);
2571   ICache::invalidate_range(instruction_addr, code_size); // Empty on z.
2572 }
2573 
2574 // Get dest address of a call_far_patchable instruction.
2575 address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) {
2576   // Dynamic TOC: absolute address in constant pool.
2577   // Check variant2 first, it is more frequent.
2578 
2579   // Relative address encoded in call instruction.
2580   if (is_call_far_patchable_variant2_at(instruction_addr)) {
2581     return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop.
2582 
2583   // Absolute address in constant pool.
2584   } else if (is_call_far_patchable_variant0_at(instruction_addr)) {
2585     address iaddr = instruction_addr;
2586 
2587     long    tocOffset = get_load_const_from_toc_offset(iaddr);
2588     address tocLoc    = iaddr + tocOffset;
2589     return *(address *)(tocLoc);
2590   } else {
2591     fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr);
2592     fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n",
2593             *(unsigned long*)instruction_addr,
2594             *(unsigned long*)(instruction_addr+8),
2595             call_far_patchable_size());
2596     Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size());
2597     ShouldNotReachHere();
2598     return NULL;
2599   }
2600 }
2601 
2602 void MacroAssembler::align_call_far_patchable(address pc) {
2603   if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); }
2604 }
2605 
2606 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
2607 }
2608 
2609 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
2610 }
2611 
2612 // Read from the polling page.
2613 // Use TM or TMY instruction, depending on read offset.
2614 //   offset = 0: Use TM, safepoint polling.
2615 //   offset < 0: Use TMY, profiling safepoint polling.
2616 void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) {
2617   if (Immediate::is_uimm12(offset)) {
2618     z_tm(offset, polling_page_address, mask_safepoint);
2619   } else {
2620     z_tmy(offset, polling_page_address, mask_profiling);
2621   }
2622 }
2623 
2624 // Check whether z_instruction is a read access to the polling page
2625 // which was emitted by load_from_polling_page(..).
2626 bool MacroAssembler::is_load_from_polling_page(address instr_loc) {
2627   unsigned long z_instruction;
2628   unsigned int  ilen = get_instruction(instr_loc, &z_instruction);
2629 
2630   if (ilen == 2) { return false; } // It's none of the allowed instructions.
2631 
2632   if (ilen == 4) {
2633     if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail.
2634 
2635     int ms = inv_mask(z_instruction,8,32);  // mask
2636     int ra = inv_reg(z_instruction,16,32);  // base register
2637     int ds = inv_uimm12(z_instruction);     // displacement
2638 
2639     if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) {
2640       return false; // It's not a z_tm(0, ra, mask_safepoint). Fail.
2641     }
2642 
2643   } else { /* if (ilen == 6) */
2644 
2645     assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y).");
2646 
2647     if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail.
2648 
2649     int ms = inv_mask(z_instruction,8,48);  // mask
2650     int ra = inv_reg(z_instruction,16,48);  // base register
2651     int ds = inv_simm20(z_instruction);     // displacement
2652   }
2653 
2654   return true;
2655 }
2656 
2657 // Extract poll address from instruction and ucontext.
2658 address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) {
2659   assert(ucontext != NULL, "must have ucontext");
2660   ucontext_t* uc = (ucontext_t*) ucontext;
2661   unsigned long z_instruction;
2662   unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2663 
2664   if (ilen == 4 && is_z_tm(z_instruction)) {
2665     int ra = inv_reg(z_instruction, 16, 32);  // base register
2666     int ds = inv_uimm12(z_instruction);       // displacement
2667     address addr = (address)uc->uc_mcontext.gregs[ra];
2668     return addr + ds;
2669   } else if (ilen == 6 && is_z_tmy(z_instruction)) {
2670     int ra = inv_reg(z_instruction, 16, 48);  // base register
2671     int ds = inv_simm20(z_instruction);       // displacement
2672     address addr = (address)uc->uc_mcontext.gregs[ra];
2673     return addr + ds;
2674   }
2675 
2676   ShouldNotReachHere();
2677   return NULL;
2678 }
2679 
2680 // Extract poll register from instruction.
2681 uint MacroAssembler::get_poll_register(address instr_loc) {
2682   unsigned long z_instruction;
2683   unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2684 
2685   if (ilen == 4 && is_z_tm(z_instruction)) {
2686     return (uint)inv_reg(z_instruction, 16, 32);  // base register
2687   } else if (ilen == 6 && is_z_tmy(z_instruction)) {
2688     return (uint)inv_reg(z_instruction, 16, 48);  // base register
2689   }
2690 
2691   ShouldNotReachHere();
2692   return 0;
2693 }
2694 
2695 bool MacroAssembler::is_memory_serialization(int instruction, JavaThread* thread, void* ucontext) {
2696   ShouldNotCallThis();
2697   return false;
2698 }
2699 
2700 // Write serialization page so VM thread can do a pseudo remote membar
2701 // We use the current thread pointer to calculate a thread specific
2702 // offset to write to within the page. This minimizes bus traffic
2703 // due to cache line collision.
2704 void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
2705   assert_different_registers(tmp1, tmp2);
2706   z_sllg(tmp2, thread, os::get_serialize_page_shift_count());
2707   load_const_optimized(tmp1, (long) os::get_memory_serialize_page());
2708 
2709   int mask = os::get_serialize_page_mask();
2710   if (Immediate::is_uimm16(mask)) {
2711     z_nill(tmp2, mask);
2712     z_llghr(tmp2, tmp2);
2713   } else {
2714     z_nilf(tmp2, mask);
2715     z_llgfr(tmp2, tmp2);
2716   }
2717 
2718   z_release();
2719   z_st(Z_R0, 0, tmp2, tmp1);
2720 }
2721 
2722 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
2723   if (SafepointMechanism::uses_thread_local_poll()) {
2724     const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
2725     // Armed page has poll_bit set.
2726     z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
2727     z_brnaz(slow_path);
2728   } else {
2729     load_const_optimized(temp_reg, SafepointSynchronize::address_of_state());
2730     z_cli(/*SafepointSynchronize::sz_state()*/4-1, temp_reg, SafepointSynchronize::_not_synchronized);
2731     z_brne(slow_path);
2732   }
2733 }
2734 
2735 // Don't rely on register locking, always use Z_R1 as scratch register instead.
2736 void MacroAssembler::bang_stack_with_offset(int offset) {
2737   // Stack grows down, caller passes positive offset.
2738   assert(offset > 0, "must bang with positive offset");
2739   if (Displacement::is_validDisp(-offset)) {
2740     z_tmy(-offset, Z_SP, mask_stackbang);
2741   } else {
2742     add2reg(Z_R1, -offset, Z_SP);    // Do not destroy Z_SP!!!
2743     z_tm(0, Z_R1, mask_stackbang);  // Just banging.
2744   }
2745 }
2746 
2747 void MacroAssembler::reserved_stack_check(Register return_pc) {
2748   // Test if reserved zone needs to be enabled.
2749   Label no_reserved_zone_enabling;
2750   assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub.");
2751   BLOCK_COMMENT("reserved_stack_check {");
2752 
2753   z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset()));
2754   z_brl(no_reserved_zone_enabling);
2755 
2756   // Enable reserved zone again, throw stack overflow exception.
2757   save_return_pc();
2758   push_frame_abi160(0);
2759   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread);
2760   pop_frame();
2761   restore_return_pc();
2762 
2763   load_const_optimized(Z_R1, StubRoutines::throw_delayed_StackOverflowError_entry());
2764   // Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc.
2765   z_br(Z_R1);
2766 
2767   should_not_reach_here();
2768 
2769   bind(no_reserved_zone_enabling);
2770   BLOCK_COMMENT("} reserved_stack_check");
2771 }
2772 
2773 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
2774 void MacroAssembler::tlab_allocate(Register obj,
2775                                    Register var_size_in_bytes,
2776                                    int con_size_in_bytes,
2777                                    Register t1,
2778                                    Label& slow_case) {
2779   assert_different_registers(obj, var_size_in_bytes, t1);
2780   Register end = t1;
2781   Register thread = Z_thread;
2782 
2783   z_lg(obj, Address(thread, JavaThread::tlab_top_offset()));
2784   if (var_size_in_bytes == noreg) {
2785     z_lay(end, Address(obj, con_size_in_bytes));
2786   } else {
2787     z_lay(end, Address(obj, var_size_in_bytes));
2788   }
2789   z_cg(end, Address(thread, JavaThread::tlab_fast_path_end_offset()));
2790   branch_optimized(bcondHigh, slow_case);
2791 
2792   // Update the tlab top pointer.
2793   z_stg(end, Address(thread, JavaThread::tlab_top_offset()));
2794 
2795   // Recover var_size_in_bytes if necessary.
2796   if (var_size_in_bytes == end) {
2797     z_sgr(var_size_in_bytes, obj);
2798   }
2799 }
2800 
2801 // Emitter for interface method lookup.
2802 //   input: recv_klass, intf_klass, itable_index
2803 //   output: method_result
2804 //   kills: itable_index, temp1_reg, Z_R0, Z_R1
2805 // TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs.
2806 // If the register is still not needed then, remove it.
2807 void MacroAssembler::lookup_interface_method(Register           recv_klass,
2808                                              Register           intf_klass,
2809                                              RegisterOrConstant itable_index,
2810                                              Register           method_result,
2811                                              Register           temp1_reg,
2812                                              Label&             no_such_interface,
2813                                              bool               return_method) {
2814 
2815   const Register vtable_len = temp1_reg;    // Used to compute itable_entry_addr.
2816   const Register itable_entry_addr = Z_R1_scratch;
2817   const Register itable_interface = Z_R0_scratch;
2818 
2819   BLOCK_COMMENT("lookup_interface_method {");
2820 
2821   // Load start of itable entries into itable_entry_addr.
2822   z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset()));
2823   z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes()));
2824 
2825   // Loop over all itable entries until desired interfaceOop(Rinterface) found.
2826   const int vtable_base_offset = in_bytes(Klass::vtable_start_offset());
2827 
2828   add2reg_with_index(itable_entry_addr,
2829                      vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(),
2830                      recv_klass, vtable_len);
2831 
2832   const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize;
2833   Label     search;
2834 
2835   bind(search);
2836 
2837   // Handle IncompatibleClassChangeError.
2838   // If the entry is NULL then we've reached the end of the table
2839   // without finding the expected interface, so throw an exception.
2840   load_and_test_long(itable_interface, Address(itable_entry_addr));
2841   z_bre(no_such_interface);
2842 
2843   add2reg(itable_entry_addr, itable_offset_search_inc);
2844   z_cgr(itable_interface, intf_klass);
2845   z_brne(search);
2846 
2847   // Entry found and itable_entry_addr points to it, get offset of vtable for interface.
2848   if (return_method) {
2849     const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() -
2850                                       itableOffsetEntry::interface_offset_in_bytes()) -
2851                                      itable_offset_search_inc;
2852 
2853     // Compute itableMethodEntry and get method and entry point
2854     // we use addressing with index and displacement, since the formula
2855     // for computing the entry's offset has a fixed and a dynamic part,
2856     // the latter depending on the matched interface entry and on the case,
2857     // that the itable index has been passed as a register, not a constant value.
2858     int method_offset = itableMethodEntry::method_offset_in_bytes();
2859                              // Fixed part (displacement), common operand.
2860     Register itable_offset = method_result;  // Dynamic part (index register).
2861 
2862     if (itable_index.is_register()) {
2863        // Compute the method's offset in that register, for the formula, see the
2864        // else-clause below.
2865        z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize));
2866        z_agf(itable_offset, vtable_offset_offset, itable_entry_addr);
2867     } else {
2868       // Displacement increases.
2869       method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant();
2870 
2871       // Load index from itable.
2872       z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr);
2873     }
2874 
2875     // Finally load the method's oop.
2876     z_lg(method_result, method_offset, itable_offset, recv_klass);
2877   }
2878   BLOCK_COMMENT("} lookup_interface_method");
2879 }
2880 
2881 // Lookup for virtual method invocation.
2882 void MacroAssembler::lookup_virtual_method(Register           recv_klass,
2883                                            RegisterOrConstant vtable_index,
2884                                            Register           method_result) {
2885   assert_different_registers(recv_klass, vtable_index.register_or_noreg());
2886   assert(vtableEntry::size() * wordSize == wordSize,
2887          "else adjust the scaling in the code below");
2888 
2889   BLOCK_COMMENT("lookup_virtual_method {");
2890 
2891   const int base = in_bytes(Klass::vtable_start_offset());
2892 
2893   if (vtable_index.is_constant()) {
2894     // Load with base + disp.
2895     Address vtable_entry_addr(recv_klass,
2896                               vtable_index.as_constant() * wordSize +
2897                               base +
2898                               vtableEntry::method_offset_in_bytes());
2899 
2900     z_lg(method_result, vtable_entry_addr);
2901   } else {
2902     // Shift index properly and load with base + index + disp.
2903     Register vindex = vtable_index.as_register();
2904     Address  vtable_entry_addr(recv_klass, vindex,
2905                                base + vtableEntry::method_offset_in_bytes());
2906 
2907     z_sllg(vindex, vindex, exact_log2(wordSize));
2908     z_lg(method_result, vtable_entry_addr);
2909   }
2910   BLOCK_COMMENT("} lookup_virtual_method");
2911 }
2912 
2913 // Factor out code to call ic_miss_handler.
2914 // Generate code to call the inline cache miss handler.
2915 //
2916 // In most cases, this code will be generated out-of-line.
2917 // The method parameters are intended to provide some variability.
2918 //   ICM          - Label which has to be bound to the start of useful code (past any traps).
2919 //   trapMarker   - Marking byte for the generated illtrap instructions (if any).
2920 //                  Any value except 0x00 is supported.
2921 //                  = 0x00 - do not generate illtrap instructions.
2922 //                         use nops to fill ununsed space.
2923 //   requiredSize - required size of the generated code. If the actually
2924 //                  generated code is smaller, use padding instructions to fill up.
2925 //                  = 0 - no size requirement, no padding.
2926 //   scratch      - scratch register to hold branch target address.
2927 //
2928 //  The method returns the code offset of the bound label.
2929 unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) {
2930   intptr_t startOffset = offset();
2931 
2932   // Prevent entry at content_begin().
2933   if (trapMarker != 0) {
2934     z_illtrap(trapMarker);
2935   }
2936 
2937   // Load address of inline cache miss code into scratch register
2938   // and branch to cache miss handler.
2939   BLOCK_COMMENT("IC miss handler {");
2940   BIND(ICM);
2941   unsigned int   labelOffset = offset();
2942   AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub());
2943 
2944   load_const_optimized(scratch, icmiss);
2945   z_br(scratch);
2946 
2947   // Fill unused space.
2948   if (requiredSize > 0) {
2949     while ((offset() - startOffset) < requiredSize) {
2950       if (trapMarker == 0) {
2951         z_nop();
2952       } else {
2953         z_illtrap(trapMarker);
2954       }
2955     }
2956   }
2957   BLOCK_COMMENT("} IC miss handler");
2958   return labelOffset;
2959 }
2960 
2961 void MacroAssembler::nmethod_UEP(Label& ic_miss) {
2962   Register ic_reg       = as_Register(Matcher::inline_cache_reg_encode());
2963   int      klass_offset = oopDesc::klass_offset_in_bytes();
2964   if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2965     if (VM_Version::has_CompareBranch()) {
2966       z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss);
2967     } else {
2968       z_ltgr(Z_ARG1, Z_ARG1);
2969       z_bre(ic_miss);
2970     }
2971   }
2972   // Compare cached class against klass from receiver.
2973   compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false);
2974   z_brne(ic_miss);
2975 }
2976 
2977 void MacroAssembler::check_klass_subtype_fast_path(Register   sub_klass,
2978                                                    Register   super_klass,
2979                                                    Register   temp1_reg,
2980                                                    Label*     L_success,
2981                                                    Label*     L_failure,
2982                                                    Label*     L_slow_path,
2983                                                    RegisterOrConstant super_check_offset) {
2984 
2985   const int sc_offset  = in_bytes(Klass::secondary_super_cache_offset());
2986   const int sco_offset = in_bytes(Klass::super_check_offset_offset());
2987 
2988   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
2989   bool need_slow_path = (must_load_sco ||
2990                          super_check_offset.constant_or_zero() == sc_offset);
2991 
2992   // Input registers must not overlap.
2993   assert_different_registers(sub_klass, super_klass, temp1_reg);
2994   if (super_check_offset.is_register()) {
2995     assert_different_registers(sub_klass, super_klass,
2996                                super_check_offset.as_register());
2997   } else if (must_load_sco) {
2998     assert(temp1_reg != noreg, "supply either a temp or a register offset");
2999   }
3000 
3001   const Register Rsuper_check_offset = temp1_reg;
3002 
3003   NearLabel L_fallthrough;
3004   int label_nulls = 0;
3005   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
3006   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
3007   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3008   assert(label_nulls <= 1 ||
3009          (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
3010          "at most one NULL in the batch, usually");
3011 
3012   BLOCK_COMMENT("check_klass_subtype_fast_path {");
3013   // If the pointers are equal, we are done (e.g., String[] elements).
3014   // This self-check enables sharing of secondary supertype arrays among
3015   // non-primary types such as array-of-interface. Otherwise, each such
3016   // type would need its own customized SSA.
3017   // We move this check to the front of the fast path because many
3018   // type checks are in fact trivially successful in this manner,
3019   // so we get a nicely predicted branch right at the start of the check.
3020   compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success);
3021 
3022   // Check the supertype display, which is uint.
3023   if (must_load_sco) {
3024     z_llgf(Rsuper_check_offset, sco_offset, super_klass);
3025     super_check_offset = RegisterOrConstant(Rsuper_check_offset);
3026   }
3027   Address super_check_addr(sub_klass, super_check_offset, 0);
3028   z_cg(super_klass, super_check_addr); // compare w/ displayed supertype
3029 
3030   // This check has worked decisively for primary supers.
3031   // Secondary supers are sought in the super_cache ('super_cache_addr').
3032   // (Secondary supers are interfaces and very deeply nested subtypes.)
3033   // This works in the same check above because of a tricky aliasing
3034   // between the super_cache and the primary super display elements.
3035   // (The 'super_check_addr' can address either, as the case requires.)
3036   // Note that the cache is updated below if it does not help us find
3037   // what we need immediately.
3038   // So if it was a primary super, we can just fail immediately.
3039   // Otherwise, it's the slow path for us (no success at this point).
3040 
3041   // Hacked jmp, which may only be used just before L_fallthrough.
3042 #define final_jmp(label)                                                \
3043   if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
3044   else                            { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/
3045 
3046   if (super_check_offset.is_register()) {
3047     branch_optimized(Assembler::bcondEqual, *L_success);
3048     z_cfi(super_check_offset.as_register(), sc_offset);
3049     if (L_failure == &L_fallthrough) {
3050       branch_optimized(Assembler::bcondEqual, *L_slow_path);
3051     } else {
3052       branch_optimized(Assembler::bcondNotEqual, *L_failure);
3053       final_jmp(*L_slow_path);
3054     }
3055   } else if (super_check_offset.as_constant() == sc_offset) {
3056     // Need a slow path; fast failure is impossible.
3057     if (L_slow_path == &L_fallthrough) {
3058       branch_optimized(Assembler::bcondEqual, *L_success);
3059     } else {
3060       branch_optimized(Assembler::bcondNotEqual, *L_slow_path);
3061       final_jmp(*L_success);
3062     }
3063   } else {
3064     // No slow path; it's a fast decision.
3065     if (L_failure == &L_fallthrough) {
3066       branch_optimized(Assembler::bcondEqual, *L_success);
3067     } else {
3068       branch_optimized(Assembler::bcondNotEqual, *L_failure);
3069       final_jmp(*L_success);
3070     }
3071   }
3072 
3073   bind(L_fallthrough);
3074 #undef local_brc
3075 #undef final_jmp
3076   BLOCK_COMMENT("} check_klass_subtype_fast_path");
3077   // fallthru (to slow path)
3078 }
3079 
3080 void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
3081                                                    Register Rsuperklass,
3082                                                    Register Rarray_ptr,  // tmp
3083                                                    Register Rlength,     // tmp
3084                                                    Label* L_success,
3085                                                    Label* L_failure) {
3086   // Input registers must not overlap.
3087   // Also check for R1 which is explicitely used here.
3088   assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength);
3089   NearLabel L_fallthrough, L_loop;
3090   int label_nulls = 0;
3091   if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3092   if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3093   assert(label_nulls <= 1, "at most one NULL in the batch");
3094 
3095   const int ss_offset = in_bytes(Klass::secondary_supers_offset());
3096   const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3097 
3098   const int length_offset = Array<Klass*>::length_offset_in_bytes();
3099   const int base_offset   = Array<Klass*>::base_offset_in_bytes();
3100 
3101   // Hacked jmp, which may only be used just before L_fallthrough.
3102 #define final_jmp(label)                                                \
3103   if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
3104   else                            branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/
3105 
3106   NearLabel loop_iterate, loop_count, match;
3107 
3108   BLOCK_COMMENT("check_klass_subtype_slow_path {");
3109   z_lg(Rarray_ptr, ss_offset, Rsubklass);
3110 
3111   load_and_test_int(Rlength, Address(Rarray_ptr, length_offset));
3112   branch_optimized(Assembler::bcondZero, *L_failure);
3113 
3114   // Oops in table are NO MORE compressed.
3115   z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match.
3116   z_bre(match);                               // Shortcut for array length = 1.
3117 
3118   // No match yet, so we must walk the array's elements.
3119   z_lngfr(Rlength, Rlength);
3120   z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array
3121   z_llill(Z_R1, BytesPerWord);               // Set increment/end index.
3122   add2reg(Rlength, 2 * BytesPerWord);        // start index  = -(n-2)*BytesPerWord
3123   z_slgr(Rarray_ptr, Rlength);               // start addr: +=  (n-2)*BytesPerWord
3124   z_bru(loop_count);
3125 
3126   BIND(loop_iterate);
3127   z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match.
3128   z_bre(match);
3129   BIND(loop_count);
3130   z_brxlg(Rlength, Z_R1, loop_iterate);
3131 
3132   // Rsuperklass not found among secondary super classes -> failure.
3133   branch_optimized(Assembler::bcondAlways, *L_failure);
3134 
3135   // Got a hit. Return success (zero result). Set cache.
3136   // Cache load doesn't happen here. For speed it is directly emitted by the compiler.
3137 
3138   BIND(match);
3139 
3140   z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
3141 
3142   final_jmp(*L_success);
3143 
3144   // Exit to the surrounding code.
3145   BIND(L_fallthrough);
3146 #undef local_brc
3147 #undef final_jmp
3148   BLOCK_COMMENT("} check_klass_subtype_slow_path");
3149 }
3150 
3151 // Emitter for combining fast and slow path.
3152 void MacroAssembler::check_klass_subtype(Register sub_klass,
3153                                          Register super_klass,
3154                                          Register temp1_reg,
3155                                          Register temp2_reg,
3156                                          Label&   L_success) {
3157   NearLabel failure;
3158   BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name()));
3159   check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg,
3160                                 &L_success, &failure, NULL);
3161   check_klass_subtype_slow_path(sub_klass, super_klass,
3162                                 temp1_reg, temp2_reg, &L_success, NULL);
3163   BIND(failure);
3164   BLOCK_COMMENT("} check_klass_subtype");
3165 }
3166 
3167 // Increment a counter at counter_address when the eq condition code is
3168 // set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code.
3169 void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) {
3170   Label l;
3171   z_brne(l);
3172   load_const(tmp1_reg, counter_address);
3173   add2mem_32(Address(tmp1_reg), 1, tmp2_reg);
3174   z_cr(tmp1_reg, tmp1_reg); // Set cc to eq.
3175   bind(l);
3176 }
3177 
3178 // Semantics are dependent on the slow_case label:
3179 //   If the slow_case label is not NULL, failure to biased-lock the object
3180 //   transfers control to the location of the slow_case label. If the
3181 //   object could be biased-locked, control is transferred to the done label.
3182 //   The condition code is unpredictable.
3183 //
3184 //   If the slow_case label is NULL, failure to biased-lock the object results
3185 //   in a transfer of control to the done label with a condition code of not_equal.
3186 //   If the biased-lock could be successfully obtained, control is transfered to
3187 //   the done label with a condition code of equal.
3188 //   It is mandatory to react on the condition code At the done label.
3189 //
3190 void MacroAssembler::biased_locking_enter(Register  obj_reg,
3191                                           Register  mark_reg,
3192                                           Register  temp_reg,
3193                                           Register  temp2_reg,    // May be Z_RO!
3194                                           Label    &done,
3195                                           Label    *slow_case) {
3196   assert(UseBiasedLocking, "why call this otherwise?");
3197   assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg);
3198 
3199   Label cas_label; // Try, if implemented, CAS locking. Fall thru to slow path otherwise.
3200 
3201   BLOCK_COMMENT("biased_locking_enter {");
3202 
3203   // Biased locking
3204   // See whether the lock is currently biased toward our thread and
3205   // whether the epoch is still valid.
3206   // Note that the runtime guarantees sufficient alignment of JavaThread
3207   // pointers to allow age to be placed into low bits.
3208   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits,
3209          "biased locking makes assumptions about bit layout");
3210   z_lr(temp_reg, mark_reg);
3211   z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place);
3212   z_chi(temp_reg, markOopDesc::biased_lock_pattern);
3213   z_brne(cas_label);  // Try cas if object is not biased, i.e. cannot be biased locked.
3214 
3215   load_prototype_header(temp_reg, obj_reg);
3216   load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place));
3217 
3218   z_ogr(temp_reg, Z_thread);
3219   z_xgr(temp_reg, mark_reg);
3220   z_ngr(temp_reg, temp2_reg);
3221   if (PrintBiasedLockingStatistics) {
3222     increment_counter_eq((address) BiasedLocking::biased_lock_entry_count_addr(), mark_reg, temp2_reg);
3223     // Restore mark_reg.
3224     z_lg(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
3225   }
3226   branch_optimized(Assembler::bcondEqual, done);  // Biased lock obtained, return success.
3227 
3228   Label try_revoke_bias;
3229   Label try_rebias;
3230   Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
3231 
3232   //----------------------------------------------------------------------------
3233   // At this point we know that the header has the bias pattern and
3234   // that we are not the bias owner in the current epoch. We need to
3235   // figure out more details about the state of the header in order to
3236   // know what operations can be legally performed on the object's
3237   // header.
3238 
3239   // If the low three bits in the xor result aren't clear, that means
3240   // the prototype header is no longer biased and we have to revoke
3241   // the bias on this object.
3242   z_tmll(temp_reg, markOopDesc::biased_lock_mask_in_place);
3243   z_brnaz(try_revoke_bias);
3244 
3245   // Biasing is still enabled for this data type. See whether the
3246   // epoch of the current bias is still valid, meaning that the epoch
3247   // bits of the mark word are equal to the epoch bits of the
3248   // prototype header. (Note that the prototype header's epoch bits
3249   // only change at a safepoint.) If not, attempt to rebias the object
3250   // toward the current thread. Note that we must be absolutely sure
3251   // that the current epoch is invalid in order to do this because
3252   // otherwise the manipulations it performs on the mark word are
3253   // illegal.
3254   z_tmll(temp_reg, markOopDesc::epoch_mask_in_place);
3255   z_brnaz(try_rebias);
3256 
3257   //----------------------------------------------------------------------------
3258   // The epoch of the current bias is still valid but we know nothing
3259   // about the owner; it might be set or it might be clear. Try to
3260   // acquire the bias of the object using an atomic operation. If this
3261   // fails we will go in to the runtime to revoke the object's bias.
3262   // Note that we first construct the presumed unbiased header so we
3263   // don't accidentally blow away another thread's valid bias.
3264   z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place |
3265          markOopDesc::epoch_mask_in_place);
3266   z_lgr(temp_reg, Z_thread);
3267   z_llgfr(mark_reg, mark_reg);
3268   z_ogr(temp_reg, mark_reg);
3269 
3270   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3271 
3272   z_csg(mark_reg, temp_reg, 0, obj_reg);
3273 
3274   // If the biasing toward our thread failed, this means that
3275   // another thread succeeded in biasing it toward itself and we
3276   // need to revoke that bias. The revocation will occur in the
3277   // interpreter runtime in the slow case.
3278 
3279   if (PrintBiasedLockingStatistics) {
3280     increment_counter_eq((address) BiasedLocking::anonymously_biased_lock_entry_count_addr(),
3281                          temp_reg, temp2_reg);
3282   }
3283   if (slow_case != NULL) {
3284     branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way.
3285   }
3286   branch_optimized(Assembler::bcondAlways, done);           // Biased lock status given in condition code.
3287 
3288   //----------------------------------------------------------------------------
3289   bind(try_rebias);
3290   // At this point we know the epoch has expired, meaning that the
3291   // current "bias owner", if any, is actually invalid. Under these
3292   // circumstances _only_, we are allowed to use the current header's
3293   // value as the comparison value when doing the cas to acquire the
3294   // bias in the current epoch. In other words, we allow transfer of
3295   // the bias from one thread to another directly in this situation.
3296 
3297   z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
3298   load_prototype_header(temp_reg, obj_reg);
3299   z_llgfr(mark_reg, mark_reg);
3300 
3301   z_ogr(temp_reg, Z_thread);
3302 
3303   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3304 
3305   z_csg(mark_reg, temp_reg, 0, obj_reg);
3306 
3307   // If the biasing toward our thread failed, this means that
3308   // another thread succeeded in biasing it toward itself and we
3309   // need to revoke that bias. The revocation will occur in the
3310   // interpreter runtime in the slow case.
3311 
3312   if (PrintBiasedLockingStatistics) {
3313     increment_counter_eq((address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg, temp2_reg);
3314   }
3315   if (slow_case != NULL) {
3316     branch_optimized(Assembler::bcondNotEqual, *slow_case);  // Biased lock not obtained, need to go the long way.
3317   }
3318   z_bru(done);           // Biased lock status given in condition code.
3319 
3320   //----------------------------------------------------------------------------
3321   bind(try_revoke_bias);
3322   // The prototype mark in the klass doesn't have the bias bit set any
3323   // more, indicating that objects of this data type are not supposed
3324   // to be biased any more. We are going to try to reset the mark of
3325   // this object to the prototype value and fall through to the
3326   // CAS-based locking scheme. Note that if our CAS fails, it means
3327   // that another thread raced us for the privilege of revoking the
3328   // bias of this particular object, so it's okay to continue in the
3329   // normal locking code.
3330   load_prototype_header(temp_reg, obj_reg);
3331 
3332   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3333 
3334   z_csg(mark_reg, temp_reg, 0, obj_reg);
3335 
3336   // Fall through to the normal CAS-based lock, because no matter what
3337   // the result of the above CAS, some thread must have succeeded in
3338   // removing the bias bit from the object's header.
3339   if (PrintBiasedLockingStatistics) {
3340     // z_cgr(mark_reg, temp2_reg);
3341     increment_counter_eq((address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg, temp2_reg);
3342   }
3343 
3344   bind(cas_label);
3345   BLOCK_COMMENT("} biased_locking_enter");
3346 }
3347 
3348 void MacroAssembler::biased_locking_exit(Register mark_addr, Register temp_reg, Label& done) {
3349   // Check for biased locking unlock case, which is a no-op
3350   // Note: we do not have to check the thread ID for two reasons.
3351   // First, the interpreter checks for IllegalMonitorStateException at
3352   // a higher level. Second, if the bias was revoked while we held the
3353   // lock, the object could not be rebiased toward another thread, so
3354   // the bias bit would be clear.
3355   BLOCK_COMMENT("biased_locking_exit {");
3356 
3357   z_lg(temp_reg, 0, mark_addr);
3358   z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place);
3359 
3360   z_chi(temp_reg, markOopDesc::biased_lock_pattern);
3361   z_bre(done);
3362   BLOCK_COMMENT("} biased_locking_exit");
3363 }
3364 
3365 void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
3366   Register displacedHeader = temp1;
3367   Register currentHeader = temp1;
3368   Register temp = temp2;
3369   NearLabel done, object_has_monitor;
3370 
3371   BLOCK_COMMENT("compiler_fast_lock_object {");
3372 
3373   // Load markOop from oop into mark.
3374   z_lg(displacedHeader, 0, oop);
3375 
3376   if (try_bias) {
3377     biased_locking_enter(oop, displacedHeader, temp, Z_R0, done);
3378   }
3379 
3380   // Handle existing monitor.
3381   if ((EmitSync & 0x01) == 0) {
3382     // The object has an existing monitor iff (mark & monitor_value) != 0.
3383     guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
3384     z_lr(temp, displacedHeader);
3385     z_nill(temp, markOopDesc::monitor_value);
3386     z_brne(object_has_monitor);
3387   }
3388 
3389   // Set mark to markOop | markOopDesc::unlocked_value.
3390   z_oill(displacedHeader, markOopDesc::unlocked_value);
3391 
3392   // Load Compare Value application register.
3393 
3394   // Initialize the box (must happen before we update the object mark).
3395   z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box);
3396 
3397   // Memory Fence (in cmpxchgd)
3398   // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
3399 
3400   // If the compare-and-swap succeeded, then we found an unlocked object and we
3401   // have now locked it.
3402   z_csg(displacedHeader, box, 0, oop);
3403   assert(currentHeader==displacedHeader, "must be same register"); // Identified two registers from z/Architecture.
3404   z_bre(done);
3405 
3406   // We did not see an unlocked object so try the fast recursive case.
3407 
3408   z_sgr(currentHeader, Z_SP);
3409   load_const_optimized(temp, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
3410 
3411   z_ngr(currentHeader, temp);
3412   //   z_brne(done);
3413   //   z_release();
3414   z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box);
3415 
3416   z_bru(done);
3417 
3418   if ((EmitSync & 0x01) == 0) {
3419     Register zero = temp;
3420     Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
3421     bind(object_has_monitor);
3422     // The object's monitor m is unlocked iff m->owner == NULL,
3423     // otherwise m->owner may contain a thread or a stack address.
3424     //
3425     // Try to CAS m->owner from NULL to current thread.
3426     z_lghi(zero, 0);
3427     // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
3428     z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
3429     // Store a non-null value into the box.
3430     z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
3431 #ifdef ASSERT
3432       z_brne(done);
3433       // We've acquired the monitor, check some invariants.
3434       // Invariant 1: _recursions should be 0.
3435       asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
3436                               "monitor->_recursions should be 0", -1);
3437       z_ltgr(zero, zero); // Set CR=EQ.
3438 #endif
3439   }
3440   bind(done);
3441 
3442   BLOCK_COMMENT("} compiler_fast_lock_object");
3443   // If locking was successful, CR should indicate 'EQ'.
3444   // The compiler or the native wrapper generates a branch to the runtime call
3445   // _complete_monitor_locking_Java.
3446 }
3447 
3448 void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
3449   Register displacedHeader = temp1;
3450   Register currentHeader = temp2;
3451   Register temp = temp1;
3452   Register monitor = temp2;
3453 
3454   Label done, object_has_monitor;
3455 
3456   BLOCK_COMMENT("compiler_fast_unlock_object {");
3457 
3458   if (try_bias) {
3459     biased_locking_exit(oop, currentHeader, done);
3460   }
3461 
3462   // Find the lock address and load the displaced header from the stack.
3463   // if the displaced header is zero, we have a recursive unlock.
3464   load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3465   z_bre(done);
3466 
3467   // Handle existing monitor.
3468   if ((EmitSync & 0x02) == 0) {
3469     // The object has an existing monitor iff (mark & monitor_value) != 0.
3470     z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
3471     guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
3472     z_nill(currentHeader, markOopDesc::monitor_value);
3473     z_brne(object_has_monitor);
3474   }
3475 
3476   // Check if it is still a light weight lock, this is true if we see
3477   // the stack address of the basicLock in the markOop of the object
3478   // copy box to currentHeader such that csg does not kill it.
3479   z_lgr(currentHeader, box);
3480   z_csg(currentHeader, displacedHeader, 0, oop);
3481   z_bru(done); // Csg sets CR as desired.
3482 
3483   // Handle existing monitor.
3484   if ((EmitSync & 0x02) == 0) {
3485     bind(object_has_monitor);
3486     z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);    // CurrentHeader is tagged with monitor_value set.
3487     load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
3488     z_brne(done);
3489     load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
3490     z_brne(done);
3491     load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
3492     z_brne(done);
3493     load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
3494     z_brne(done);
3495     z_release();
3496     z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
3497   }
3498 
3499   bind(done);
3500 
3501   BLOCK_COMMENT("} compiler_fast_unlock_object");
3502   // flag == EQ indicates success
3503   // flag == NE indicates failure
3504 }
3505 
3506 // Write to card table for modification at store_addr - register is destroyed afterwards.
3507 void MacroAssembler::card_write_barrier_post(Register store_addr, Register tmp) {
3508   BarrierSet* bs = Universe::heap()->barrier_set();
3509   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
3510   CardTable* ct = ctbs->card_table();
3511   assert(bs->kind() == BarrierSet::CardTableBarrierSet, "wrong barrier");
3512   assert_different_registers(store_addr, tmp);
3513   z_srlg(store_addr, store_addr, CardTable::card_shift);
3514   load_absolute_address(tmp, (address)ct->byte_map_base());
3515   z_agr(store_addr, tmp);
3516   z_mvi(0, store_addr, 0); // Store byte 0.
3517 }
3518 
3519 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) {
3520   NearLabel Ldone;
3521   z_ltgr(tmp1, value);
3522   z_bre(Ldone);          // Use NULL result as-is.
3523 
3524   z_nill(value, ~JNIHandles::weak_tag_mask);
3525   z_lg(value, 0, value); // Resolve (untagged) jobject.
3526 
3527 #if INCLUDE_ALL_GCS
3528   if (UseG1GC) {
3529     NearLabel Lnot_weak;
3530     z_tmll(tmp1, JNIHandles::weak_tag_mask); // Test for jweak tag.
3531     z_braz(Lnot_weak);
3532     verify_oop(value);
3533     g1_write_barrier_pre(noreg /* obj */,
3534                          noreg /* offset */,
3535                          value /* pre_val */,
3536                          noreg /* val */,
3537                          tmp1  /* tmp1 */,
3538                          tmp2  /* tmp2 */,
3539                          true  /* pre_val_needed */);
3540     bind(Lnot_weak);
3541   }
3542 #endif // INCLUDE_ALL_GCS
3543   verify_oop(value);
3544   bind(Ldone);
3545 }
3546 
3547 #if INCLUDE_ALL_GCS
3548 
3549 //------------------------------------------------------
3550 // General G1 pre-barrier generator.
3551 // Purpose: record the previous value if it is not null.
3552 // All non-tmps are preserved.
3553 //------------------------------------------------------
3554 // Note: Rpre_val needs special attention.
3555 //   The flag pre_val_needed indicated that the caller of this emitter function
3556 //   relies on Rpre_val containing the correct value, that is:
3557 //     either the value it contained on entry to this code segment
3558 //     or the value that was loaded into the register from (Robj+offset).
3559 //
3560 //   Independent from this requirement, the contents of Rpre_val must survive
3561 //   the push_frame() operation. push_frame() uses Z_R0_scratch by default
3562 //   to temporarily remember the frame pointer.
3563 //   If Rpre_val is assigned Z_R0_scratch by the caller, code must be emitted to
3564 //   save it's value.
3565 void MacroAssembler::g1_write_barrier_pre(Register           Robj,
3566                                           RegisterOrConstant offset,
3567                                           Register           Rpre_val,      // Ideally, this is a non-volatile register.
3568                                           Register           Rval,          // Will be preserved.
3569                                           Register           Rtmp1,         // If Rpre_val is volatile, either Rtmp1
3570                                           Register           Rtmp2,         // or Rtmp2 has to be non-volatile..
3571                                           bool               pre_val_needed // Save Rpre_val across runtime call, caller uses it.
3572                                        ) {
3573   Label callRuntime, filtered;
3574   const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active());
3575   const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf());
3576   const int index_offset  = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_index());
3577   assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp<i> must be Z_R0!!
3578   assert_different_registers(Robj, Z_R0_scratch);         // Used for addressing. Furthermore, push_frame destroys Z_R0!!
3579   assert_different_registers(Rval, Z_R0_scratch);         // push_frame destroys Z_R0!!
3580 
3581 #ifdef ASSERT
3582   // make sure the register is not Z_R0. Used for addressing. Furthermore, would be destroyed by push_frame.
3583   if (offset.is_register() && offset.as_register()->encoding() == 0) {
3584     tty->print_cr("Roffset(g1_write_barrier_pre)  = %%r%d", offset.as_register()->encoding());
3585     assert(false, "bad register for offset");
3586   }
3587 #endif
3588 
3589   BLOCK_COMMENT("g1_write_barrier_pre {");
3590 
3591   // Is marking active?
3592   // Note: value is loaded for test purposes only. No further use here.
3593   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
3594     load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
3595   } else {
3596     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
3597     load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
3598   }
3599   z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
3600 
3601   assert(Rpre_val != noreg, "must have a real register");
3602 
3603 
3604   // If an object is given, we need to load the previous value into Rpre_val.
3605   if (Robj != noreg) {
3606     // Load the previous value...
3607     Register ixReg = offset.is_register() ? offset.register_or_noreg() : Z_R0;
3608     if (UseCompressedOops) {
3609       z_llgf(Rpre_val, offset.constant_or_zero(), ixReg, Robj);
3610     } else {
3611       z_lg(Rpre_val, offset.constant_or_zero(), ixReg, Robj);
3612     }
3613   }
3614 
3615   // Is the previous value NULL?
3616   // If so, we don't need to record it and we're done.
3617   // Note: pre_val is loaded, decompressed and stored (directly or via runtime call).
3618   //       Register contents is preserved across runtime call if caller requests to do so.
3619   z_ltgr(Rpre_val, Rpre_val);
3620   z_bre(filtered); // previous value is NULL, so we don't need to record it.
3621 
3622   // Decode the oop now. We know it's not NULL.
3623   if (Robj != noreg && UseCompressedOops) {
3624     oop_decoder(Rpre_val, Rpre_val, /*maybeNULL=*/false);
3625   }
3626 
3627   // OK, it's not filtered, so we'll need to call enqueue.
3628 
3629   // We can store the original value in the thread's buffer
3630   // only if index > 0. Otherwise, we need runtime to handle.
3631   // (The index field is typed as size_t.)
3632   Register Rbuffer = Rtmp1, Rindex = Rtmp2;
3633   assert_different_registers(Rbuffer, Rindex, Rpre_val);
3634 
3635   z_lg(Rbuffer, buffer_offset, Z_thread);
3636 
3637   load_and_test_long(Rindex, Address(Z_thread, index_offset));
3638   z_bre(callRuntime); // If index == 0, goto runtime.
3639 
3640   add2reg(Rindex, -wordSize); // Decrement index.
3641   z_stg(Rindex, index_offset, Z_thread);
3642 
3643   // Record the previous value.
3644   z_stg(Rpre_val, 0, Rbuffer, Rindex);
3645   z_bru(filtered);  // We are done.
3646 
3647   Rbuffer = noreg;  // end of life
3648   Rindex  = noreg;  // end of life
3649 
3650   bind(callRuntime);
3651 
3652   // Save some registers (inputs and result) over runtime call
3653   // by spilling them into the top frame.
3654   if (Robj != noreg && Robj->is_volatile()) {
3655     z_stg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
3656   }
3657   if (offset.is_register() && offset.as_register()->is_volatile()) {
3658     Register Roff = offset.as_register();
3659     z_stg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
3660   }
3661   if (Rval != noreg && Rval->is_volatile()) {
3662     z_stg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
3663   }
3664 
3665   // Save Rpre_val (result) over runtime call.
3666   Register Rpre_save = Rpre_val;
3667   if ((Rpre_val == Z_R0_scratch) || (pre_val_needed && Rpre_val->is_volatile())) {
3668     guarantee(!Rtmp1->is_volatile() || !Rtmp2->is_volatile(), "oops!");
3669     Rpre_save = !Rtmp1->is_volatile() ? Rtmp1 : Rtmp2;
3670   }
3671   lgr_if_needed(Rpre_save, Rpre_val);
3672 
3673   // Push frame to protect top frame with return pc and spilled register values.
3674   save_return_pc();
3675   push_frame_abi160(0); // Will use Z_R0 as tmp.
3676 
3677   // Rpre_val may be destroyed by push_frame().
3678   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_save, Z_thread);
3679 
3680   pop_frame();
3681   restore_return_pc();
3682 
3683   // Restore spilled values.
3684   if (Robj != noreg && Robj->is_volatile()) {
3685     z_lg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
3686   }
3687   if (offset.is_register() && offset.as_register()->is_volatile()) {
3688     Register Roff = offset.as_register();
3689     z_lg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
3690   }
3691   if (Rval != noreg && Rval->is_volatile()) {
3692     z_lg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
3693   }
3694   if (pre_val_needed && Rpre_val->is_volatile()) {
3695     lgr_if_needed(Rpre_val, Rpre_save);
3696   }
3697 
3698   bind(filtered);
3699   BLOCK_COMMENT("} g1_write_barrier_pre");
3700 }
3701 
3702 // General G1 post-barrier generator.
3703 // Purpose: Store cross-region card.
3704 void MacroAssembler::g1_write_barrier_post(Register Rstore_addr,
3705                                            Register Rnew_val,
3706                                            Register Rtmp1,
3707                                            Register Rtmp2,
3708                                            Register Rtmp3) {
3709   Label callRuntime, filtered;
3710 
3711   assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); // Most probably, Rnew_val == Rtmp3.
3712 
3713   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(Universe::heap()->barrier_set());
3714   CardTable* ct = bs->card_table();
3715   assert(bs->kind() == BarrierSet::G1BarrierSet, "wrong barrier");
3716 
3717   BLOCK_COMMENT("g1_write_barrier_post {");
3718 
3719   // Does store cross heap regions?
3720   // It does if the two addresses specify different grain addresses.
3721   if (G1RSBarrierRegionFilter) {
3722     if (VM_Version::has_DistinctOpnds()) {
3723       z_xgrk(Rtmp1, Rstore_addr, Rnew_val);
3724     } else {
3725       z_lgr(Rtmp1, Rstore_addr);
3726       z_xgr(Rtmp1, Rnew_val);
3727     }
3728     z_srag(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes);
3729     z_bre(filtered);
3730   }
3731 
3732   // Crosses regions, storing NULL?
3733 #ifdef ASSERT
3734   z_ltgr(Rnew_val, Rnew_val);
3735   asm_assert_ne("null oop not allowed (G1)", 0x255); // TODO: also on z? Checked by caller on PPC64, so following branch is obsolete:
3736   z_bre(filtered);  // Safety net: don't break if we have a NULL oop.
3737 #endif
3738   Rnew_val = noreg; // end of lifetime
3739 
3740   // Storing region crossing non-NULL, is card already dirty?
3741   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
3742   assert_different_registers(Rtmp1, Rtmp2, Rtmp3);
3743   // Make sure not to use Z_R0 for any of these registers.
3744   Register Rcard_addr = (Rtmp1 != Z_R0_scratch) ? Rtmp1 : Rtmp3;
3745   Register Rbase      = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp3;
3746 
3747   // calculate address of card
3748   load_const_optimized(Rbase, (address)ct->byte_map_base());      // Card table base.
3749   z_srlg(Rcard_addr, Rstore_addr, CardTable::card_shift);         // Index into card table.
3750   z_algr(Rcard_addr, Rbase);                                      // Explicit calculation needed for cli.
3751   Rbase = noreg; // end of lifetime
3752 
3753   // Filter young.
3754   assert((unsigned int)G1CardTable::g1_young_card_val() <= 255, "otherwise check this code");
3755   z_cli(0, Rcard_addr, (int)G1CardTable::g1_young_card_val());
3756   z_bre(filtered);
3757 
3758   // Check the card value. If dirty, we're done.
3759   // This also avoids false sharing of the (already dirty) card.
3760   z_sync(); // Required to support concurrent cleaning.
3761   assert((unsigned int)CardTable::dirty_card_val() <= 255, "otherwise check this code");
3762   z_cli(0, Rcard_addr, CardTable::dirty_card_val()); // Reload after membar.
3763   z_bre(filtered);
3764 
3765   // Storing a region crossing, non-NULL oop, card is clean.
3766   // Dirty card and log.
3767   z_mvi(0, Rcard_addr, CardTable::dirty_card_val());
3768 
3769   Register Rcard_addr_x = Rcard_addr;
3770   Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
3771   Register Rqueue_buf   = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1;
3772   const int qidx_off    = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_index());
3773   const int qbuf_off    = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_buf());
3774   if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) {
3775     Rcard_addr_x = Z_R0_scratch;  // Register shortage. We have to use Z_R0.
3776   }
3777   lgr_if_needed(Rcard_addr_x, Rcard_addr);
3778 
3779   load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off));
3780   z_bre(callRuntime); // Index == 0 then jump to runtime.
3781 
3782   z_lg(Rqueue_buf, qbuf_off, Z_thread);
3783 
3784   add2reg(Rqueue_index, -wordSize); // Decrement index.
3785   z_stg(Rqueue_index, qidx_off, Z_thread);
3786 
3787   z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card.
3788   z_bru(filtered);
3789 
3790   bind(callRuntime);
3791 
3792   // TODO: do we need a frame? Introduced to be on the safe side.
3793   bool needs_frame = true;
3794   lgr_if_needed(Rcard_addr, Rcard_addr_x); // copy back asap. push_frame will destroy Z_R0_scratch!
3795 
3796   // VM call need frame to access(write) O register.
3797   if (needs_frame) {
3798     save_return_pc();
3799     push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
3800   }
3801 
3802   // Save the live input values.
3803   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, Z_thread);
3804 
3805   if (needs_frame) {
3806     pop_frame();
3807     restore_return_pc();
3808   }
3809 
3810   bind(filtered);
3811 
3812   BLOCK_COMMENT("} g1_write_barrier_post");
3813 }
3814 #endif // INCLUDE_ALL_GCS
3815 
3816 // Last_Java_sp must comply to the rules in frame_s390.hpp.
3817 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) {
3818   BLOCK_COMMENT("set_last_Java_frame {");
3819 
3820   // Always set last_Java_pc and flags first because once last_Java_sp
3821   // is visible has_last_Java_frame is true and users will look at the
3822   // rest of the fields. (Note: flags should always be zero before we
3823   // get here so doesn't need to be set.)
3824 
3825   // Verify that last_Java_pc was zeroed on return to Java.
3826   if (allow_relocation) {
3827     asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()),
3828                             Z_thread,
3829                             "last_Java_pc not zeroed before leaving Java",
3830                             0x200);
3831   } else {
3832     asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()),
3833                                    Z_thread,
3834                                    "last_Java_pc not zeroed before leaving Java",
3835                                    0x200);
3836   }
3837 
3838   // When returning from calling out from Java mode the frame anchor's
3839   // last_Java_pc will always be set to NULL. It is set here so that
3840   // if we are doing a call to native (not VM) that we capture the
3841   // known pc and don't have to rely on the native call having a
3842   // standard frame linkage where we can find the pc.
3843   if (last_Java_pc!=noreg) {
3844     z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset()));
3845   }
3846 
3847   // This membar release is not required on z/Architecture, since the sequence of stores
3848   // in maintained. Nevertheless, we leave it in to document the required ordering.
3849   // The implementation of z_release() should be empty.
3850   // z_release();
3851 
3852   z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset()));
3853   BLOCK_COMMENT("} set_last_Java_frame");
3854 }
3855 
3856 void MacroAssembler::reset_last_Java_frame(bool allow_relocation) {
3857   BLOCK_COMMENT("reset_last_Java_frame {");
3858 
3859   if (allow_relocation) {
3860     asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()),
3861                                Z_thread,
3862                                "SP was not set, still zero",
3863                                0x202);
3864   } else {
3865     asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()),
3866                                       Z_thread,
3867                                       "SP was not set, still zero",
3868                                       0x202);
3869   }
3870 
3871   // _last_Java_sp = 0
3872   // Clearing storage must be atomic here, so don't use clear_mem()!
3873   store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0);
3874 
3875   // _last_Java_pc = 0
3876   store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0);
3877 
3878   BLOCK_COMMENT("} reset_last_Java_frame");
3879   return;
3880 }
3881 
3882 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) {
3883   assert_different_registers(sp, tmp1);
3884 
3885   // We cannot trust that code generated by the C++ compiler saves R14
3886   // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at
3887   // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()).
3888   // Therefore we load the PC into tmp1 and let set_last_Java_frame() save
3889   // it into the frame anchor.
3890   get_PC(tmp1);
3891   set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation);
3892 }
3893 
3894 void MacroAssembler::set_thread_state(JavaThreadState new_state) {
3895   z_release();
3896 
3897   assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction");
3898   assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int");
3899   store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false);
3900 }
3901 
3902 void MacroAssembler::get_vm_result(Register oop_result) {
3903   verify_thread();
3904 
3905   z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
3906   clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*));
3907 
3908   verify_oop(oop_result);
3909 }
3910 
3911 void MacroAssembler::get_vm_result_2(Register result) {
3912   verify_thread();
3913 
3914   z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset()));
3915   clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*));
3916 }
3917 
3918 // We require that C code which does not return a value in vm_result will
3919 // leave it undisturbed.
3920 void MacroAssembler::set_vm_result(Register oop_result) {
3921   z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
3922 }
3923 
3924 // Explicit null checks (used for method handle code).
3925 void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) {
3926   if (!ImplicitNullChecks) {
3927     NearLabel ok;
3928 
3929     compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok);
3930 
3931     // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address).
3932     address exception_entry = Interpreter::throw_NullPointerException_entry();
3933     load_absolute_address(reg, exception_entry);
3934     z_br(reg);
3935 
3936     bind(ok);
3937   } else {
3938     if (needs_explicit_null_check((intptr_t)offset)) {
3939       // Provoke OS NULL exception if reg = NULL by
3940       // accessing M[reg] w/o changing any registers.
3941       z_lg(tmp, 0, reg);
3942     }
3943     // else
3944       // Nothing to do, (later) access of M[reg + offset]
3945       // will provoke OS NULL exception if reg = NULL.
3946   }
3947 }
3948 
3949 //-------------------------------------
3950 //  Compressed Klass Pointers
3951 //-------------------------------------
3952 
3953 // Klass oop manipulations if compressed.
3954 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3955   Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible.
3956   address  base    = Universe::narrow_klass_base();
3957   int      shift   = Universe::narrow_klass_shift();
3958   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3959 
3960   BLOCK_COMMENT("cKlass encoder {");
3961 
3962 #ifdef ASSERT
3963   Label ok;
3964   z_tmll(current, KlassAlignmentInBytes-1); // Check alignment.
3965   z_brc(Assembler::bcondAllZero, ok);
3966   // The plain disassembler does not recognize illtrap. It instead displays
3967   // a 32-bit value. Issueing two illtraps assures the disassembler finds
3968   // the proper beginning of the next instruction.
3969   z_illtrap(0xee);
3970   z_illtrap(0xee);
3971   bind(ok);
3972 #endif
3973 
3974   if (base != NULL) {
3975     unsigned int base_h = ((unsigned long)base)>>32;
3976     unsigned int base_l = (unsigned int)((unsigned long)base);
3977     if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3978       lgr_if_needed(dst, current);
3979       z_aih(dst, -((int)base_h));     // Base has no set bits in lower half.
3980     } else if ((base_h == 0) && (base_l != 0)) {
3981       lgr_if_needed(dst, current);
3982       z_agfi(dst, -(int)base_l);
3983     } else {
3984       load_const(Z_R0, base);
3985       lgr_if_needed(dst, current);
3986       z_sgr(dst, Z_R0);
3987     }
3988     current = dst;
3989   }
3990   if (shift != 0) {
3991     assert (LogKlassAlignmentInBytes == shift, "decode alg wrong");
3992     z_srlg(dst, current, shift);
3993     current = dst;
3994   }
3995   lgr_if_needed(dst, current); // Move may be required (if neither base nor shift != 0).
3996 
3997   BLOCK_COMMENT("} cKlass encoder");
3998 }
3999 
4000 // This function calculates the size of the code generated by
4001 //   decode_klass_not_null(register dst, Register src)
4002 // when (Universe::heap() != NULL). Hence, if the instructions
4003 // it generates change, then this method needs to be updated.
4004 int MacroAssembler::instr_size_for_decode_klass_not_null() {
4005   address  base    = Universe::narrow_klass_base();
4006   int shift_size   = Universe::narrow_klass_shift() == 0 ? 0 : 6; /* sllg */
4007   int addbase_size = 0;
4008   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
4009 
4010   if (base != NULL) {
4011     unsigned int base_h = ((unsigned long)base)>>32;
4012     unsigned int base_l = (unsigned int)((unsigned long)base);
4013     if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
4014       addbase_size += 6; /* aih */
4015     } else if ((base_h == 0) && (base_l != 0)) {
4016       addbase_size += 6; /* algfi */
4017     } else {
4018       addbase_size += load_const_size();
4019       addbase_size += 4; /* algr */
4020     }
4021   }
4022 #ifdef ASSERT
4023   addbase_size += 10;
4024   addbase_size += 2; // Extra sigill.
4025 #endif
4026   return addbase_size + shift_size;
4027 }
4028 
4029 // !!! If the instructions that get generated here change
4030 //     then function instr_size_for_decode_klass_not_null()
4031 //     needs to get updated.
4032 // This variant of decode_klass_not_null() must generate predictable code!
4033 // The code must only depend on globally known parameters.
4034 void MacroAssembler::decode_klass_not_null(Register dst) {
4035   address  base    = Universe::narrow_klass_base();
4036   int      shift   = Universe::narrow_klass_shift();
4037   int      beg_off = offset();
4038   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
4039 
4040   BLOCK_COMMENT("cKlass decoder (const size) {");
4041 
4042   if (shift != 0) { // Shift required?
4043     z_sllg(dst, dst, shift);
4044   }
4045   if (base != NULL) {
4046     unsigned int base_h = ((unsigned long)base)>>32;
4047     unsigned int base_l = (unsigned int)((unsigned long)base);
4048     if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
4049       z_aih(dst, base_h);     // Base has no set bits in lower half.
4050     } else if ((base_h == 0) && (base_l != 0)) {
4051       z_algfi(dst, base_l);   // Base has no set bits in upper half.
4052     } else {
4053       load_const(Z_R0, base); // Base has set bits everywhere.
4054       z_algr(dst, Z_R0);
4055     }
4056   }
4057 
4058 #ifdef ASSERT
4059   Label ok;
4060   z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
4061   z_brc(Assembler::bcondAllZero, ok);
4062   // The plain disassembler does not recognize illtrap. It instead displays
4063   // a 32-bit value. Issueing two illtraps assures the disassembler finds
4064   // the proper beginning of the next instruction.
4065   z_illtrap(0xd1);
4066   z_illtrap(0xd1);
4067   bind(ok);
4068 #endif
4069   assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch.");
4070 
4071   BLOCK_COMMENT("} cKlass decoder (const size)");
4072 }
4073 
4074 // This variant of decode_klass_not_null() is for cases where
4075 //  1) the size of the generated instructions may vary
4076 //  2) the result is (potentially) stored in a register different from the source.
4077 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
4078   address base  = Universe::narrow_klass_base();
4079   int     shift = Universe::narrow_klass_shift();
4080   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
4081 
4082   BLOCK_COMMENT("cKlass decoder {");
4083 
4084   if (src == noreg) src = dst;
4085 
4086   if (shift != 0) { // Shift or at least move required?
4087     z_sllg(dst, src, shift);
4088   } else {
4089     lgr_if_needed(dst, src);
4090   }
4091 
4092   if (base != NULL) {
4093     unsigned int base_h = ((unsigned long)base)>>32;
4094     unsigned int base_l = (unsigned int)((unsigned long)base);
4095     if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
4096       z_aih(dst, base_h);     // Base has not set bits in lower half.
4097     } else if ((base_h == 0) && (base_l != 0)) {
4098       z_algfi(dst, base_l);   // Base has no set bits in upper half.
4099     } else {
4100       load_const_optimized(Z_R0, base); // Base has set bits everywhere.
4101       z_algr(dst, Z_R0);
4102     }
4103   }
4104 
4105 #ifdef ASSERT
4106   Label ok;
4107   z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
4108   z_brc(Assembler::bcondAllZero, ok);
4109   // The plain disassembler does not recognize illtrap. It instead displays
4110   // a 32-bit value. Issueing two illtraps assures the disassembler finds
4111   // the proper beginning of the next instruction.
4112   z_illtrap(0xd2);
4113   z_illtrap(0xd2);
4114   bind(ok);
4115 #endif
4116   BLOCK_COMMENT("} cKlass decoder");
4117 }
4118 
4119 void MacroAssembler::load_klass(Register klass, Address mem) {
4120   if (UseCompressedClassPointers) {
4121     z_llgf(klass, mem);
4122     // Attention: no null check here!
4123     decode_klass_not_null(klass);
4124   } else {
4125     z_lg(klass, mem);
4126   }
4127 }
4128 
4129 void MacroAssembler::load_klass(Register klass, Register src_oop) {
4130   if (UseCompressedClassPointers) {
4131     z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop);
4132     // Attention: no null check here!
4133     decode_klass_not_null(klass);
4134   } else {
4135     z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop);
4136   }
4137 }
4138 
4139 void MacroAssembler::load_prototype_header(Register Rheader, Register Rsrc_oop) {
4140   assert_different_registers(Rheader, Rsrc_oop);
4141   load_klass(Rheader, Rsrc_oop);
4142   z_lg(Rheader, Address(Rheader, Klass::prototype_header_offset()));
4143 }
4144 
4145 void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) {
4146   if (UseCompressedClassPointers) {
4147     assert_different_registers(dst_oop, klass, Z_R0);
4148     if (ck == noreg) ck = klass;
4149     encode_klass_not_null(ck, klass);
4150     z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
4151   } else {
4152     z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
4153   }
4154 }
4155 
4156 void MacroAssembler::store_klass_gap(Register s, Register d) {
4157   if (UseCompressedClassPointers) {
4158     assert(s != d, "not enough registers");
4159     // Support s = noreg.
4160     if (s != noreg) {
4161       z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
4162     } else {
4163       z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0);
4164     }
4165   }
4166 }
4167 
4168 // Compare klass ptr in memory against klass ptr in register.
4169 //
4170 // Rop1            - klass in register, always uncompressed.
4171 // disp            - Offset of klass in memory, compressed/uncompressed, depending on runtime flag.
4172 // Rbase           - Base address of cKlass in memory.
4173 // maybeNULL       - True if Rop1 possibly is a NULL.
4174 void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL) {
4175 
4176   BLOCK_COMMENT("compare klass ptr {");
4177 
4178   if (UseCompressedClassPointers) {
4179     const int shift = Universe::narrow_klass_shift();
4180     address   base  = Universe::narrow_klass_base();
4181 
4182     assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift");
4183     assert_different_registers(Rop1, Z_R0);
4184     assert_different_registers(Rop1, Rbase, Z_R1);
4185 
4186     // First encode register oop and then compare with cOop in memory.
4187     // This sequence saves an unnecessary cOop load and decode.
4188     if (base == NULL) {
4189       if (shift == 0) {
4190         z_cl(Rop1, disp, Rbase);     // Unscaled
4191       } else {
4192         z_srlg(Z_R0, Rop1, shift);   // ZeroBased
4193         z_cl(Z_R0, disp, Rbase);
4194       }
4195     } else {                         // HeapBased
4196 #ifdef ASSERT
4197       bool     used_R0 = true;
4198       bool     used_R1 = true;
4199 #endif
4200       Register current = Rop1;
4201       Label    done;
4202 
4203       if (maybeNULL) {       // NULL ptr must be preserved!
4204         z_ltgr(Z_R0, current);
4205         z_bre(done);
4206         current = Z_R0;
4207       }
4208 
4209       unsigned int base_h = ((unsigned long)base)>>32;
4210       unsigned int base_l = (unsigned int)((unsigned long)base);
4211       if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
4212         lgr_if_needed(Z_R0, current);
4213         z_aih(Z_R0, -((int)base_h));     // Base has no set bits in lower half.
4214       } else if ((base_h == 0) && (base_l != 0)) {
4215         lgr_if_needed(Z_R0, current);
4216         z_agfi(Z_R0, -(int)base_l);
4217       } else {
4218         int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
4219         add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement.
4220       }
4221 
4222       if (shift != 0) {
4223         z_srlg(Z_R0, Z_R0, shift);
4224       }
4225       bind(done);
4226       z_cl(Z_R0, disp, Rbase);
4227 #ifdef ASSERT
4228       if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
4229       if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
4230 #endif
4231     }
4232   } else {
4233     z_clg(Rop1, disp, Z_R0, Rbase);
4234   }
4235   BLOCK_COMMENT("} compare klass ptr");
4236 }
4237 
4238 //---------------------------
4239 //  Compressed oops
4240 //---------------------------
4241 
4242 void MacroAssembler::encode_heap_oop(Register oop) {
4243   oop_encoder(oop, oop, true /*maybe null*/);
4244 }
4245 
4246 void MacroAssembler::encode_heap_oop_not_null(Register oop) {
4247   oop_encoder(oop, oop, false /*not null*/);
4248 }
4249 
4250 // Called with something derived from the oop base. e.g. oop_base>>3.
4251 int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) {
4252   unsigned int oop_base_ll = ((unsigned int)(oop_base >>  0)) & 0xffff;
4253   unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff;
4254   unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff;
4255   unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff;
4256   unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1)
4257                                + (oop_base_lh == 0 ? 0:1)
4258                                + (oop_base_hl == 0 ? 0:1)
4259                                + (oop_base_hh == 0 ? 0:1);
4260 
4261   assert(oop_base != 0, "This is for HeapBased cOops only");
4262 
4263   if (n_notzero_parts != 1) { //  Check if oop_base is just a few pages shy of a power of 2.
4264     uint64_t pow2_offset = 0x10000 - oop_base_ll;
4265     if (pow2_offset < 0x8000) {  // This might not be necessary.
4266       uint64_t oop_base2 = oop_base + pow2_offset;
4267 
4268       oop_base_ll = ((unsigned int)(oop_base2 >>  0)) & 0xffff;
4269       oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff;
4270       oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff;
4271       oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff;
4272       n_notzero_parts = (oop_base_ll == 0 ? 0:1) +
4273                         (oop_base_lh == 0 ? 0:1) +
4274                         (oop_base_hl == 0 ? 0:1) +
4275                         (oop_base_hh == 0 ? 0:1);
4276       if (n_notzero_parts == 1) {
4277         assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register");
4278         return -pow2_offset;
4279       }
4280     }
4281   }
4282   return 0;
4283 }
4284 
4285 // If base address is offset from a straight power of two by just a few pages,
4286 // return this offset to the caller for a possible later composite add.
4287 // TODO/FIX: will only work correctly for 4k pages.
4288 int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) {
4289   int pow2_offset = get_oop_base_pow2_offset(oop_base);
4290 
4291   load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible.
4292 
4293   return pow2_offset;
4294 }
4295 
4296 int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) {
4297   int offset = get_oop_base(Rbase, oop_base);
4298   z_lcgr(Rbase, Rbase);
4299   return -offset;
4300 }
4301 
4302 // Compare compressed oop in memory against oop in register.
4303 // Rop1            - Oop in register.
4304 // disp            - Offset of cOop in memory.
4305 // Rbase           - Base address of cOop in memory.
4306 // maybeNULL       - True if Rop1 possibly is a NULL.
4307 // maybeNULLtarget - Branch target for Rop1 == NULL, if flow control shall NOT continue with compare instruction.
4308 void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybeNULL) {
4309   Register Rbase  = mem.baseOrR0();
4310   Register Rindex = mem.indexOrR0();
4311   int64_t  disp   = mem.disp();
4312 
4313   const int shift = Universe::narrow_oop_shift();
4314   address   base  = Universe::narrow_oop_base();
4315 
4316   assert(UseCompressedOops, "must be on to call this method");
4317   assert(Universe::heap() != NULL, "java heap must be initialized to call this method");
4318   assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
4319   assert_different_registers(Rop1, Z_R0);
4320   assert_different_registers(Rop1, Rbase, Z_R1);
4321   assert_different_registers(Rop1, Rindex, Z_R1);
4322 
4323   BLOCK_COMMENT("compare heap oop {");
4324 
4325   // First encode register oop and then compare with cOop in memory.
4326   // This sequence saves an unnecessary cOop load and decode.
4327   if (base == NULL) {
4328     if (shift == 0) {
4329       z_cl(Rop1, disp, Rindex, Rbase);  // Unscaled
4330     } else {
4331       z_srlg(Z_R0, Rop1, shift);        // ZeroBased
4332       z_cl(Z_R0, disp, Rindex, Rbase);
4333     }
4334   } else {                              // HeapBased
4335 #ifdef ASSERT
4336     bool  used_R0 = true;
4337     bool  used_R1 = true;
4338 #endif
4339     Label done;
4340     int   pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
4341 
4342     if (maybeNULL) {       // NULL ptr must be preserved!
4343       z_ltgr(Z_R0, Rop1);
4344       z_bre(done);
4345     }
4346 
4347     add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1);
4348     z_srlg(Z_R0, Z_R0, shift);
4349 
4350     bind(done);
4351     z_cl(Z_R0, disp, Rindex, Rbase);
4352 #ifdef ASSERT
4353     if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
4354     if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
4355 #endif
4356   }
4357   BLOCK_COMMENT("} compare heap oop");
4358 }
4359 
4360 // Load heap oop and decompress, if necessary.
4361 void  MacroAssembler::load_heap_oop(Register dest, const Address &a) {
4362   if (UseCompressedOops) {
4363     z_llgf(dest, a.disp(), a.indexOrR0(), a.baseOrR0());
4364     oop_decoder(dest, dest, true);
4365   } else {
4366     z_lg(dest, a.disp(), a.indexOrR0(), a.baseOrR0());
4367   }
4368 }
4369 
4370 // Load heap oop and decompress, if necessary.
4371 void MacroAssembler::load_heap_oop(Register dest, int64_t disp, Register base) {
4372   if (UseCompressedOops) {
4373     z_llgf(dest, disp, base);
4374     oop_decoder(dest, dest, true);
4375   } else {
4376     z_lg(dest, disp, base);
4377   }
4378 }
4379 
4380 // Load heap oop and decompress, if necessary.
4381 void MacroAssembler::load_heap_oop_not_null(Register dest, int64_t disp, Register base) {
4382   if (UseCompressedOops) {
4383     z_llgf(dest, disp, base);
4384     oop_decoder(dest, dest, false);
4385   } else {
4386     z_lg(dest, disp, base);
4387   }
4388 }
4389 
4390 // Compress, if necessary, and store oop to heap.
4391 void MacroAssembler::store_heap_oop(Register Roop, RegisterOrConstant offset, Register base) {
4392   Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0;
4393   if (UseCompressedOops) {
4394     assert_different_registers(Roop, offset.register_or_noreg(), base);
4395     encode_heap_oop(Roop);
4396     z_st(Roop, offset.constant_or_zero(), Ridx, base);
4397   } else {
4398     z_stg(Roop, offset.constant_or_zero(), Ridx, base);
4399   }
4400 }
4401 
4402 // Compress, if necessary, and store oop to heap. Oop is guaranteed to be not NULL.
4403 void MacroAssembler::store_heap_oop_not_null(Register Roop, RegisterOrConstant offset, Register base) {
4404   Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0;
4405   if (UseCompressedOops) {
4406     assert_different_registers(Roop, offset.register_or_noreg(), base);
4407     encode_heap_oop_not_null(Roop);
4408     z_st(Roop, offset.constant_or_zero(), Ridx, base);
4409   } else {
4410     z_stg(Roop, offset.constant_or_zero(), Ridx, base);
4411   }
4412 }
4413 
4414 // Store NULL oop to heap.
4415 void MacroAssembler::store_heap_oop_null(Register zero, RegisterOrConstant offset, Register base) {
4416   Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0;
4417   if (UseCompressedOops) {
4418     z_st(zero, offset.constant_or_zero(), Ridx, base);
4419   } else {
4420     z_stg(zero, offset.constant_or_zero(), Ridx, base);
4421   }
4422 }
4423 
4424 //-------------------------------------------------
4425 // Encode compressed oop. Generally usable encoder.
4426 //-------------------------------------------------
4427 // Rsrc - contains regular oop on entry. It remains unchanged.
4428 // Rdst - contains compressed oop on exit.
4429 // Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged.
4430 //
4431 // Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality.
4432 // Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance.
4433 //
4434 // only32bitValid is set, if later code only uses the lower 32 bits. In this
4435 // case we must not fix the upper 32 bits.
4436 void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL,
4437                                  Register Rbase, int pow2_offset, bool only32bitValid) {
4438 
4439   const address oop_base  = Universe::narrow_oop_base();
4440   const int     oop_shift = Universe::narrow_oop_shift();
4441   const bool    disjoint  = Universe::narrow_oop_base_disjoint();
4442 
4443   assert(UseCompressedOops, "must be on to call this method");
4444   assert(Universe::heap() != NULL, "java heap must be initialized to call this encoder");
4445   assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
4446 
4447   if (disjoint || (oop_base == NULL)) {
4448     BLOCK_COMMENT("cOop encoder zeroBase {");
4449     if (oop_shift == 0) {
4450       if (oop_base != NULL && !only32bitValid) {
4451         z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again.
4452       } else {
4453         lgr_if_needed(Rdst, Rsrc);
4454       }
4455     } else {
4456       z_srlg(Rdst, Rsrc, oop_shift);
4457       if (oop_base != NULL && !only32bitValid) {
4458         z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4459       }
4460     }
4461     BLOCK_COMMENT("} cOop encoder zeroBase");
4462     return;
4463   }
4464 
4465   bool used_R0 = false;
4466   bool used_R1 = false;
4467 
4468   BLOCK_COMMENT("cOop encoder general {");
4469   assert_different_registers(Rdst, Z_R1);
4470   assert_different_registers(Rsrc, Rbase);
4471   if (maybeNULL) {
4472     Label done;
4473     // We reorder shifting and subtracting, so that we can compare
4474     // and shift in parallel:
4475     //
4476     // cycle 0:  potential LoadN, base = <const>
4477     // cycle 1:  base = !base     dst = src >> 3,    cmp cr = (src != 0)
4478     // cycle 2:  if (cr) br,      dst = dst + base + offset
4479 
4480     // Get oop_base components.
4481     if (pow2_offset == -1) {
4482       if (Rdst == Rbase) {
4483         if (Rdst == Z_R1 || Rsrc == Z_R1) {
4484           Rbase = Z_R0;
4485           used_R0 = true;
4486         } else {
4487           Rdst = Z_R1;
4488           used_R1 = true;
4489         }
4490       }
4491       if (Rbase == Z_R1) {
4492         used_R1 = true;
4493       }
4494       pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift);
4495     }
4496     assert_different_registers(Rdst, Rbase);
4497 
4498     // Check for NULL oop (must be left alone) and shift.
4499     if (oop_shift != 0) {  // Shift out alignment bits
4500       if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set.
4501         z_srag(Rdst, Rsrc, oop_shift);  // Arithmetic shift sets the condition code.
4502       } else {
4503         z_srlg(Rdst, Rsrc, oop_shift);
4504         z_ltgr(Rsrc, Rsrc);  // This is the recommended way of testing for zero.
4505         // This probably is faster, as it does not write a register. No!
4506         // z_cghi(Rsrc, 0);
4507       }
4508     } else {
4509       z_ltgr(Rdst, Rsrc);   // Move NULL to result register.
4510     }
4511     z_bre(done);
4512 
4513     // Subtract oop_base components.
4514     if ((Rdst == Z_R0) || (Rbase == Z_R0)) {
4515       z_algr(Rdst, Rbase);
4516       if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); }
4517     } else {
4518       add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst);
4519     }
4520     if (!only32bitValid) {
4521       z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4522     }
4523     bind(done);
4524 
4525   } else {  // not null
4526     // Get oop_base components.
4527     if (pow2_offset == -1) {
4528       pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base);
4529     }
4530 
4531     // Subtract oop_base components and shift.
4532     if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) {
4533       // Don't use lay instruction.
4534       if (Rdst == Rsrc) {
4535         z_algr(Rdst, Rbase);
4536       } else {
4537         lgr_if_needed(Rdst, Rbase);
4538         z_algr(Rdst, Rsrc);
4539       }
4540       if (pow2_offset != 0) add2reg(Rdst, pow2_offset);
4541     } else {
4542       add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc);
4543     }
4544     if (oop_shift != 0) {   // Shift out alignment bits.
4545       z_srlg(Rdst, Rdst, oop_shift);
4546     }
4547     if (!only32bitValid) {
4548       z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4549     }
4550   }
4551 #ifdef ASSERT
4552   if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); }
4553   if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); }
4554 #endif
4555   BLOCK_COMMENT("} cOop encoder general");
4556 }
4557 
4558 //-------------------------------------------------
4559 // decode compressed oop. Generally usable decoder.
4560 //-------------------------------------------------
4561 // Rsrc - contains compressed oop on entry.
4562 // Rdst - contains regular oop on exit.
4563 // Rdst and Rsrc may indicate same register.
4564 // Rdst must not be the same register as Rbase, if Rbase was preloaded (before call).
4565 // Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch.
4566 // Rbase - register to use for the base
4567 // pow2_offset - offset of base to nice value. If -1, base must be loaded.
4568 // For performance, it is good to
4569 //  - avoid Z_R0 for any of the argument registers.
4570 //  - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance.
4571 //  - avoid Z_R1 for Rdst if Rdst == Rbase.
4572 void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, Register Rbase, int pow2_offset) {
4573 
4574   const address oop_base  = Universe::narrow_oop_base();
4575   const int     oop_shift = Universe::narrow_oop_shift();
4576   const bool    disjoint  = Universe::narrow_oop_base_disjoint();
4577 
4578   assert(UseCompressedOops, "must be on to call this method");
4579   assert(Universe::heap() != NULL, "java heap must be initialized to call this decoder");
4580   assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes),
4581          "cOop encoder detected bad shift");
4582 
4583   // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary.
4584 
4585   if (oop_base != NULL) {
4586     unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff;
4587     unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff;
4588     unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff;
4589     if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) {
4590       BLOCK_COMMENT("cOop decoder disjointBase {");
4591       // We do not need to load the base. Instead, we can install the upper bits
4592       // with an OR instead of an ADD.
4593       Label done;
4594 
4595       // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
4596       if (maybeNULL) {  // NULL ptr must be preserved!
4597         z_slag(Rdst, Rsrc, oop_shift);  // Arithmetic shift sets the condition code.
4598         z_bre(done);
4599       } else {
4600         z_sllg(Rdst, Rsrc, oop_shift);  // Logical shift leaves condition code alone.
4601       }
4602       if ((oop_base_hl != 0) && (oop_base_hh != 0)) {
4603         z_oihf(Rdst, oop_base_hf);
4604       } else if (oop_base_hl != 0) {
4605         z_oihl(Rdst, oop_base_hl);
4606       } else {
4607         assert(oop_base_hh != 0, "not heapbased mode");
4608         z_oihh(Rdst, oop_base_hh);
4609       }
4610       bind(done);
4611       BLOCK_COMMENT("} cOop decoder disjointBase");
4612     } else {
4613       BLOCK_COMMENT("cOop decoder general {");
4614       // There are three decode steps:
4615       //   scale oop offset (shift left)
4616       //   get base (in reg) and pow2_offset (constant)
4617       //   add base, pow2_offset, and oop offset
4618       // The following register overlap situations may exist:
4619       // Rdst == Rsrc,  Rbase any other
4620       //   not a problem. Scaling in-place leaves Rbase undisturbed.
4621       //   Loading Rbase does not impact the scaled offset.
4622       // Rdst == Rbase, Rsrc  any other
4623       //   scaling would destroy a possibly preloaded Rbase. Loading Rbase
4624       //   would destroy the scaled offset.
4625       //   Remedy: use Rdst_tmp if Rbase has been preloaded.
4626       //           use Rbase_tmp if base has to be loaded.
4627       // Rsrc == Rbase, Rdst  any other
4628       //   Only possible without preloaded Rbase.
4629       //   Loading Rbase does not destroy compressed oop because it was scaled into Rdst before.
4630       // Rsrc == Rbase, Rdst == Rbase
4631       //   Only possible without preloaded Rbase.
4632       //   Loading Rbase would destroy compressed oop. Scaling in-place is ok.
4633       //   Remedy: use Rbase_tmp.
4634       //
4635       Label    done;
4636       Register Rdst_tmp       = Rdst;
4637       Register Rbase_tmp      = Rbase;
4638       bool     used_R0        = false;
4639       bool     used_R1        = false;
4640       bool     base_preloaded = pow2_offset >= 0;
4641       guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller");
4642       assert(oop_shift != 0, "room for optimization");
4643 
4644       // Check if we need to use scratch registers.
4645       if (Rdst == Rbase) {
4646         assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg");
4647         if (Rdst != Rsrc) {
4648           if (base_preloaded) { Rdst_tmp  = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
4649           else                { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
4650         } else {
4651           Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1;
4652         }
4653       }
4654       if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase);
4655 
4656       // Scale oop and check for NULL.
4657       // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
4658       if (maybeNULL) {  // NULL ptr must be preserved!
4659         z_slag(Rdst_tmp, Rsrc, oop_shift);  // Arithmetic shift sets the condition code.
4660         z_bre(done);
4661       } else {
4662         z_sllg(Rdst_tmp, Rsrc, oop_shift);  // Logical shift leaves condition code alone.
4663       }
4664 
4665       // Get oop_base components.
4666       if (!base_preloaded) {
4667         pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base);
4668       }
4669 
4670       // Add up all components.
4671       if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) {
4672         z_algr(Rdst_tmp, Rbase_tmp);
4673         if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); }
4674       } else {
4675         add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp);
4676       }
4677 
4678       bind(done);
4679       lgr_if_needed(Rdst, Rdst_tmp);
4680 #ifdef ASSERT
4681       if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); }
4682       if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); }
4683 #endif
4684       BLOCK_COMMENT("} cOop decoder general");
4685     }
4686   } else {
4687     BLOCK_COMMENT("cOop decoder zeroBase {");
4688     if (oop_shift == 0) {
4689       lgr_if_needed(Rdst, Rsrc);
4690     } else {
4691       z_sllg(Rdst, Rsrc, oop_shift);
4692     }
4693     BLOCK_COMMENT("} cOop decoder zeroBase");
4694   }
4695 }
4696 
4697 // ((OopHandle)result).resolve();
4698 void MacroAssembler::resolve_oop_handle(Register result) {
4699   // OopHandle::resolve is an indirection.
4700   z_lg(result, 0, result);
4701 }
4702 
4703 void MacroAssembler::load_mirror(Register mirror, Register method) {
4704   mem2reg_opt(mirror, Address(method, Method::const_offset()));
4705   mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
4706   mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
4707   mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
4708   resolve_oop_handle(mirror);
4709 }
4710 
4711 //---------------------------------------------------------------
4712 //---  Operations on arrays.
4713 //---------------------------------------------------------------
4714 
4715 // Compiler ensures base is doubleword aligned and cnt is #doublewords.
4716 // Emitter does not KILL cnt and base arguments, since they need to be copied to
4717 // work registers anyway.
4718 // Actually, only r0, r1, and r5 are killed.
4719 unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len) {
4720   // Src_addr is evenReg.
4721   // Src_len is odd_Reg.
4722 
4723   int      block_start = offset();
4724   Register tmp_reg  = src_len; // Holds target instr addr for EX.
4725   Register dst_len  = Z_R1;    // Holds dst len  for MVCLE.
4726   Register dst_addr = Z_R0;    // Holds dst addr for MVCLE.
4727 
4728   Label doXC, doMVCLE, done;
4729 
4730   BLOCK_COMMENT("Clear_Array {");
4731 
4732   // Check for zero len and convert to long.
4733   z_ltgfr(src_len, cnt_arg);      // Remember casted value for doSTG case.
4734   z_bre(done);                    // Nothing to do if len == 0.
4735 
4736   // Prefetch data to be cleared.
4737   if (VM_Version::has_Prefetch()) {
4738     z_pfd(0x02,   0, Z_R0, base_pointer_arg);
4739     z_pfd(0x02, 256, Z_R0, base_pointer_arg);
4740   }
4741 
4742   z_sllg(dst_len, src_len, 3);    // #bytes to clear.
4743   z_cghi(src_len, 32);            // Check for len <= 256 bytes (<=32 DW).
4744   z_brnh(doXC);                   // If so, use executed XC to clear.
4745 
4746   // MVCLE: initialize long arrays (general case).
4747   bind(doMVCLE);
4748   z_lgr(dst_addr, base_pointer_arg);
4749   clear_reg(src_len, true, false); // Src len of MVCLE is zero.
4750 
4751   MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
4752   z_bru(done);
4753 
4754   // XC: initialize short arrays.
4755   Label XC_template; // Instr template, never exec directly!
4756     bind(XC_template);
4757     z_xc(0,0,base_pointer_arg,0,base_pointer_arg);
4758 
4759   bind(doXC);
4760     add2reg(dst_len, -1);             // Get #bytes-1 for EXECUTE.
4761     if (VM_Version::has_ExecuteExtensions()) {
4762       z_exrl(dst_len, XC_template);   // Execute XC with var. len.
4763     } else {
4764       z_larl(tmp_reg, XC_template);
4765       z_ex(dst_len,0,Z_R0,tmp_reg);   // Execute XC with var. len.
4766     }
4767     // z_bru(done);      // fallthru
4768 
4769   bind(done);
4770 
4771   BLOCK_COMMENT("} Clear_Array");
4772 
4773   int block_end = offset();
4774   return block_end - block_start;
4775 }
4776 
4777 // Compiler ensures base is doubleword aligned and cnt is count of doublewords.
4778 // Emitter does not KILL any arguments nor work registers.
4779 // Emitter generates up to 16 XC instructions, depending on the array length.
4780 unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
4781   int  block_start    = offset();
4782   int  off;
4783   int  lineSize_Bytes = AllocatePrefetchStepSize;
4784   int  lineSize_DW    = AllocatePrefetchStepSize>>LogBytesPerWord;
4785   bool doPrefetch     = VM_Version::has_Prefetch();
4786   int  XC_maxlen      = 256;
4787   int  numXCInstr     = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0;
4788 
4789   BLOCK_COMMENT("Clear_Array_Const {");
4790   assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only");
4791 
4792   // Do less prefetching for very short arrays.
4793   if (numXCInstr > 0) {
4794     // Prefetch only some cache lines, then begin clearing.
4795     if (doPrefetch) {
4796       if (cnt*BytesPerWord <= lineSize_Bytes/4) {  // If less than 1/4 of a cache line to clear,
4797         z_pfd(0x02, 0, Z_R0, base);                // prefetch just the first cache line.
4798       } else {
4799         assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines");
4800         for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) {
4801           z_pfd(0x02, off*lineSize_Bytes, Z_R0, base);
4802         }
4803       }
4804     }
4805 
4806     for (off=0; off<(numXCInstr-1); off++) {
4807       z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base);
4808 
4809       // Prefetch some cache lines in advance.
4810       if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) {
4811         z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base);
4812       }
4813     }
4814     if (off*XC_maxlen < cnt*BytesPerWord) {
4815       z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base);
4816     }
4817   }
4818   BLOCK_COMMENT("} Clear_Array_Const");
4819 
4820   int block_end = offset();
4821   return block_end - block_start;
4822 }
4823 
4824 // Compiler ensures base is doubleword aligned and cnt is #doublewords.
4825 // Emitter does not KILL cnt and base arguments, since they need to be copied to
4826 // work registers anyway.
4827 // Actually, only r0, r1, r4, and r5 (which are work registers) are killed.
4828 //
4829 // For very large arrays, exploit MVCLE H/W support.
4830 // MVCLE instruction automatically exploits H/W-optimized page mover.
4831 // - Bytes up to next page boundary are cleared with a series of XC to self.
4832 // - All full pages are cleared with the page mover H/W assist.
4833 // - Remaining bytes are again cleared by a series of XC to self.
4834 //
4835 unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len) {
4836   // Src_addr is evenReg.
4837   // Src_len is odd_Reg.
4838 
4839   int      block_start = offset();
4840   Register dst_len  = Z_R1;      // Holds dst len  for MVCLE.
4841   Register dst_addr = Z_R0;      // Holds dst addr for MVCLE.
4842 
4843   BLOCK_COMMENT("Clear_Array_Const_Big {");
4844 
4845   // Get len to clear.
4846   load_const_optimized(dst_len, (long)cnt*8L);  // in Bytes = #DW*8
4847 
4848   // Prepare other args to MVCLE.
4849   z_lgr(dst_addr, base_pointer_arg);
4850   // Indicate unused result.
4851   (void) clear_reg(src_len, true, false);  // Src len of MVCLE is zero.
4852 
4853   // Clear.
4854   MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
4855   BLOCK_COMMENT("} Clear_Array_Const_Big");
4856 
4857   int block_end = offset();
4858   return block_end - block_start;
4859 }
4860 
4861 // Allocator.
4862 unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
4863                                                            Register cnt_reg,
4864                                                            Register tmp1_reg, Register tmp2_reg) {
4865   // Tmp1 is oddReg.
4866   // Tmp2 is evenReg.
4867 
4868   int block_start = offset();
4869   Label doMVC, doMVCLE, done, MVC_template;
4870 
4871   BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {");
4872 
4873   // Check for zero len and convert to long.
4874   z_ltgfr(cnt_reg, cnt_reg);      // Remember casted value for doSTG case.
4875   z_bre(done);                    // Nothing to do if len == 0.
4876 
4877   z_sllg(Z_R1, cnt_reg, 3);       // Dst len in bytes. calc early to have the result ready.
4878 
4879   z_cghi(cnt_reg, 32);            // Check for len <= 256 bytes (<=32 DW).
4880   z_brnh(doMVC);                  // If so, use executed MVC to clear.
4881 
4882   bind(doMVCLE);                  // A lot of data (more than 256 bytes).
4883   // Prep dest reg pair.
4884   z_lgr(Z_R0, dst_reg);           // dst addr
4885   // Dst len already in Z_R1.
4886   // Prep src reg pair.
4887   z_lgr(tmp2_reg, src_reg);       // src addr
4888   z_lgr(tmp1_reg, Z_R1);          // Src len same as dst len.
4889 
4890   // Do the copy.
4891   move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache.
4892   z_bru(done);                         // All done.
4893 
4894   bind(MVC_template);             // Just some data (not more than 256 bytes).
4895   z_mvc(0, 0, dst_reg, 0, src_reg);
4896 
4897   bind(doMVC);
4898 
4899   if (VM_Version::has_ExecuteExtensions()) {
4900     add2reg(Z_R1, -1);
4901   } else {
4902     add2reg(tmp1_reg, -1, Z_R1);
4903     z_larl(Z_R1, MVC_template);
4904   }
4905 
4906   if (VM_Version::has_Prefetch()) {
4907     z_pfd(1,  0,Z_R0,src_reg);
4908     z_pfd(2,  0,Z_R0,dst_reg);
4909     //    z_pfd(1,256,Z_R0,src_reg);    // Assume very short copy.
4910     //    z_pfd(2,256,Z_R0,dst_reg);
4911   }
4912 
4913   if (VM_Version::has_ExecuteExtensions()) {
4914     z_exrl(Z_R1, MVC_template);
4915   } else {
4916     z_ex(tmp1_reg, 0, Z_R0, Z_R1);
4917   }
4918 
4919   bind(done);
4920 
4921   BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint");
4922 
4923   int block_end = offset();
4924   return block_end - block_start;
4925 }
4926 
4927 //------------------------------------------------------
4928 //   Special String Intrinsics. Implementation
4929 //------------------------------------------------------
4930 
4931 // Intrinsics for CompactStrings
4932 
4933 // Compress char[] to byte[].
4934 //   Restores: src, dst
4935 //   Uses:     cnt
4936 //   Kills:    tmp, Z_R0, Z_R1.
4937 //   Early clobber: result.
4938 // Note:
4939 //   cnt is signed int. Do not rely on high word!
4940 //       counts # characters, not bytes.
4941 // The result is the number of characters copied before the first incompatible character was found.
4942 // If precise is true, the processing stops exactly at this point. Otherwise, the result may be off
4943 // by a few bytes. The result always indicates the number of copied characters.
4944 // When used as a character index, the returned value points to the first incompatible character.
4945 //
4946 // Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
4947 // - Different number of characters may have been written to dead array (if precise is false).
4948 // - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
4949 unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt,
4950                                              Register tmp,    bool precise) {
4951   assert_different_registers(Z_R0, Z_R1, result, src, dst, cnt, tmp);
4952 
4953   if (precise) {
4954     BLOCK_COMMENT("encode_iso_array {");
4955   } else {
4956     BLOCK_COMMENT("string_compress {");
4957   }
4958   int  block_start = offset();
4959 
4960   Register       Rsrc  = src;
4961   Register       Rdst  = dst;
4962   Register       Rix   = tmp;
4963   Register       Rcnt  = cnt;
4964   Register       Rmask = result;  // holds incompatibility check mask until result value is stored.
4965   Label          ScalarShortcut, AllDone;
4966 
4967   z_iilf(Rmask, 0xFF00FF00);
4968   z_iihf(Rmask, 0xFF00FF00);
4969 
4970 #if 0  // Sacrifice shortcuts for code compactness
4971   {
4972     //---<  shortcuts for short strings (very frequent)   >---
4973     //   Strings with 4 and 8 characters were fond to occur very frequently.
4974     //   Therefore, we handle them right away with minimal overhead.
4975     Label     skipShortcut, skip4Shortcut, skip8Shortcut;
4976     Register  Rout = Z_R0;
4977     z_chi(Rcnt, 4);
4978     z_brne(skip4Shortcut);                 // 4 characters are very frequent
4979       z_lg(Z_R0, 0, Rsrc);                 // Treat exactly 4 characters specially.
4980       if (VM_Version::has_DistinctOpnds()) {
4981         Rout = Z_R0;
4982         z_ngrk(Rix, Z_R0, Rmask);
4983       } else {
4984         Rout = Rix;
4985         z_lgr(Rix, Z_R0);
4986         z_ngr(Z_R0, Rmask);
4987       }
4988       z_brnz(skipShortcut);
4989       z_stcmh(Rout, 5, 0, Rdst);
4990       z_stcm(Rout,  5, 2, Rdst);
4991       z_lgfr(result, Rcnt);
4992       z_bru(AllDone);
4993     bind(skip4Shortcut);
4994 
4995     z_chi(Rcnt, 8);
4996     z_brne(skip8Shortcut);                 // There's more to do...
4997       z_lmg(Z_R0, Z_R1, 0, Rsrc);          // Treat exactly 8 characters specially.
4998       if (VM_Version::has_DistinctOpnds()) {
4999         Rout = Z_R0;
5000         z_ogrk(Rix, Z_R0, Z_R1);
5001         z_ngr(Rix, Rmask);
5002       } else {
5003         Rout = Rix;
5004         z_lgr(Rix, Z_R0);
5005         z_ogr(Z_R0, Z_R1);
5006         z_ngr(Z_R0, Rmask);
5007       }
5008       z_brnz(skipShortcut);
5009       z_stcmh(Rout, 5, 0, Rdst);
5010       z_stcm(Rout,  5, 2, Rdst);
5011       z_stcmh(Z_R1, 5, 4, Rdst);
5012       z_stcm(Z_R1,  5, 6, Rdst);
5013       z_lgfr(result, Rcnt);
5014       z_bru(AllDone);
5015 
5016     bind(skip8Shortcut);
5017     clear_reg(Z_R0, true, false);          // #characters already processed (none). Precond for scalar loop.
5018     z_brl(ScalarShortcut);                 // Just a few characters
5019 
5020     bind(skipShortcut);
5021   }
5022 #endif
5023   clear_reg(Z_R0);                         // make sure register is properly initialized.
5024 
5025   if (VM_Version::has_VectorFacility()) {
5026     const int  min_vcnt     = 32;          // Minimum #characters required to use vector instructions.
5027                                            // Otherwise just do nothing in vector mode.
5028                                            // Must be multiple of 2*(vector register length in chars (8 HW = 128 bits)).
5029     const int  log_min_vcnt = exact_log2(min_vcnt);
5030     Label      VectorLoop, VectorDone, VectorBreak;
5031 
5032     VectorRegister Vtmp1      = Z_V16;
5033     VectorRegister Vtmp2      = Z_V17;
5034     VectorRegister Vmask      = Z_V18;
5035     VectorRegister Vzero      = Z_V19;
5036     VectorRegister Vsrc_first = Z_V20;
5037     VectorRegister Vsrc_last  = Z_V23;
5038 
5039     assert((Vsrc_last->encoding() - Vsrc_first->encoding() + 1) == min_vcnt/8, "logic error");
5040     assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
5041     z_srak(Rix, Rcnt, log_min_vcnt);       // # vector loop iterations
5042     z_brz(VectorDone);                     // not enough data for vector loop
5043 
5044     z_vzero(Vzero);                        // all zeroes
5045     z_vgmh(Vmask, 0, 7);                   // generate 0xff00 mask for all 2-byte elements
5046     z_sllg(Z_R0, Rix, log_min_vcnt);       // remember #chars that will be processed by vector loop
5047 
5048     bind(VectorLoop);
5049       z_vlm(Vsrc_first, Vsrc_last, 0, Rsrc);
5050       add2reg(Rsrc, min_vcnt*2);
5051 
5052       //---<  check for incompatible character  >---
5053       z_vo(Vtmp1, Z_V20, Z_V21);
5054       z_vo(Vtmp2, Z_V22, Z_V23);
5055       z_vo(Vtmp1, Vtmp1, Vtmp2);
5056       z_vn(Vtmp1, Vtmp1, Vmask);
5057       z_vceqhs(Vtmp1, Vtmp1, Vzero);       // high half of all chars must be zero for successful compress.
5058       z_bvnt(VectorBreak);                 // break vector loop if not all vector elements compare eq -> incompatible character found.
5059                                            // re-process data from current iteration in break handler.
5060 
5061       //---<  pack & store characters  >---
5062       z_vpkh(Vtmp1, Z_V20, Z_V21);         // pack (src1, src2) -> tmp1
5063       z_vpkh(Vtmp2, Z_V22, Z_V23);         // pack (src3, src4) -> tmp2
5064       z_vstm(Vtmp1, Vtmp2, 0, Rdst);       // store packed string
5065       add2reg(Rdst, min_vcnt);
5066 
5067       z_brct(Rix, VectorLoop);
5068 
5069     z_bru(VectorDone);
5070 
5071     bind(VectorBreak);
5072       add2reg(Rsrc, -min_vcnt*2);          // Fix Rsrc. Rsrc was already updated, but Rdst and Rix are not.
5073       z_sll(Rix, log_min_vcnt);            // # chars processed so far in VectorLoop, excl. current iteration.
5074       z_sr(Z_R0, Rix);                     // correct # chars processed in total.
5075 
5076     bind(VectorDone);
5077   }
5078 
5079   {
5080     const int  min_cnt     =  8;           // Minimum #characters required to use unrolled loop.
5081                                            // Otherwise just do nothing in unrolled loop.
5082                                            // Must be multiple of 8.
5083     const int  log_min_cnt = exact_log2(min_cnt);
5084     Label      UnrolledLoop, UnrolledDone, UnrolledBreak;
5085 
5086     if (VM_Version::has_DistinctOpnds()) {
5087       z_srk(Rix, Rcnt, Z_R0);              // remaining # chars to compress in unrolled loop
5088     } else {
5089       z_lr(Rix, Rcnt);
5090       z_sr(Rix, Z_R0);
5091     }
5092     z_sra(Rix, log_min_cnt);             // unrolled loop count
5093     z_brz(UnrolledDone);
5094 
5095     bind(UnrolledLoop);
5096       z_lmg(Z_R0, Z_R1, 0, Rsrc);
5097       if (precise) {
5098         z_ogr(Z_R1, Z_R0);                 // check all 8 chars for incompatibility
5099         z_ngr(Z_R1, Rmask);
5100         z_brnz(UnrolledBreak);
5101 
5102         z_lg(Z_R1, 8, Rsrc);               // reload destroyed register
5103         z_stcmh(Z_R0, 5, 0, Rdst);
5104         z_stcm(Z_R0,  5, 2, Rdst);
5105       } else {
5106         z_stcmh(Z_R0, 5, 0, Rdst);
5107         z_stcm(Z_R0,  5, 2, Rdst);
5108 
5109         z_ogr(Z_R0, Z_R1);
5110         z_ngr(Z_R0, Rmask);
5111         z_brnz(UnrolledBreak);
5112       }
5113       z_stcmh(Z_R1, 5, 4, Rdst);
5114       z_stcm(Z_R1,  5, 6, Rdst);
5115 
5116       add2reg(Rsrc, min_cnt*2);
5117       add2reg(Rdst, min_cnt);
5118       z_brct(Rix, UnrolledLoop);
5119 
5120     z_lgfr(Z_R0, Rcnt);                    // # chars processed in total after unrolled loop.
5121     z_nilf(Z_R0, ~(min_cnt-1));
5122     z_tmll(Rcnt, min_cnt-1);
5123     z_brnaz(ScalarShortcut);               // if all bits zero, there is nothing left to do for scalar loop.
5124                                            // Rix == 0 in all cases.
5125     z_sllg(Z_R1, Rcnt, 1);                 // # src bytes already processed. Only lower 32 bits are valid!
5126                                            //   Z_R1 contents must be treated as unsigned operand! For huge strings,
5127                                            //   (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
5128     z_lgfr(result, Rcnt);                  // all characters processed.
5129     z_slgfr(Rdst, Rcnt);                   // restore ptr
5130     z_slgfr(Rsrc, Z_R1);                   // restore ptr, double the element count for Rsrc restore
5131     z_bru(AllDone);
5132 
5133     bind(UnrolledBreak);
5134     z_lgfr(Z_R0, Rcnt);                    // # chars processed in total after unrolled loop
5135     z_nilf(Z_R0, ~(min_cnt-1));
5136     z_sll(Rix, log_min_cnt);               // # chars not yet processed in UnrolledLoop (due to break), broken iteration not included.
5137     z_sr(Z_R0, Rix);                       // fix # chars processed OK so far.
5138     if (!precise) {
5139       z_lgfr(result, Z_R0);
5140       z_sllg(Z_R1, Z_R0, 1);               // # src bytes already processed. Only lower 32 bits are valid!
5141                                            //   Z_R1 contents must be treated as unsigned operand! For huge strings,
5142                                            //   (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
5143       z_aghi(result, min_cnt/2);           // min_cnt/2 characters have already been written
5144                                            // but ptrs were not updated yet.
5145       z_slgfr(Rdst, Z_R0);                 // restore ptr
5146       z_slgfr(Rsrc, Z_R1);                 // restore ptr, double the element count for Rsrc restore
5147       z_bru(AllDone);
5148     }
5149     bind(UnrolledDone);
5150   }
5151 
5152   {
5153     Label     ScalarLoop, ScalarDone, ScalarBreak;
5154 
5155     bind(ScalarShortcut);
5156     z_ltgfr(result, Rcnt);
5157     z_brz(AllDone);
5158 
5159 #if 0  // Sacrifice shortcuts for code compactness
5160     {
5161       //---<  Special treatment for very short strings (one or two characters)  >---
5162       //   For these strings, we are sure that the above code was skipped.
5163       //   Thus, no registers were modified, register restore is not required.
5164       Label     ScalarDoit, Scalar2Char;
5165       z_chi(Rcnt, 2);
5166       z_brh(ScalarDoit);
5167       z_llh(Z_R1,  0, Z_R0, Rsrc);
5168       z_bre(Scalar2Char);
5169       z_tmll(Z_R1, 0xff00);
5170       z_lghi(result, 0);                   // cnt == 1, first char invalid, no chars successfully processed
5171       z_brnaz(AllDone);
5172       z_stc(Z_R1,  0, Z_R0, Rdst);
5173       z_lghi(result, 1);
5174       z_bru(AllDone);
5175 
5176       bind(Scalar2Char);
5177       z_llh(Z_R0,  2, Z_R0, Rsrc);
5178       z_tmll(Z_R1, 0xff00);
5179       z_lghi(result, 0);                   // cnt == 2, first char invalid, no chars successfully processed
5180       z_brnaz(AllDone);
5181       z_stc(Z_R1,  0, Z_R0, Rdst);
5182       z_tmll(Z_R0, 0xff00);
5183       z_lghi(result, 1);                   // cnt == 2, second char invalid, one char successfully processed
5184       z_brnaz(AllDone);
5185       z_stc(Z_R0,  1, Z_R0, Rdst);
5186       z_lghi(result, 2);
5187       z_bru(AllDone);
5188 
5189       bind(ScalarDoit);
5190     }
5191 #endif
5192 
5193     if (VM_Version::has_DistinctOpnds()) {
5194       z_srk(Rix, Rcnt, Z_R0);              // remaining # chars to compress in unrolled loop
5195     } else {
5196       z_lr(Rix, Rcnt);
5197       z_sr(Rix, Z_R0);
5198     }
5199     z_lgfr(result, Rcnt);                  // # processed characters (if all runs ok).
5200     z_brz(ScalarDone);                     // uses CC from Rix calculation
5201 
5202     bind(ScalarLoop);
5203       z_llh(Z_R1, 0, Z_R0, Rsrc);
5204       z_tmll(Z_R1, 0xff00);
5205       z_brnaz(ScalarBreak);
5206       z_stc(Z_R1, 0, Z_R0, Rdst);
5207       add2reg(Rsrc, 2);
5208       add2reg(Rdst, 1);
5209       z_brct(Rix, ScalarLoop);
5210 
5211     z_bru(ScalarDone);
5212 
5213     bind(ScalarBreak);
5214     z_sr(result, Rix);
5215 
5216     bind(ScalarDone);
5217     z_sgfr(Rdst, result);                  // restore ptr
5218     z_sgfr(Rsrc, result);                  // restore ptr, double the element count for Rsrc restore
5219     z_sgfr(Rsrc, result);
5220   }
5221   bind(AllDone);
5222 
5223   if (precise) {
5224     BLOCK_COMMENT("} encode_iso_array");
5225   } else {
5226     BLOCK_COMMENT("} string_compress");
5227   }
5228   return offset() - block_start;
5229 }
5230 
5231 // Inflate byte[] to char[].
5232 unsigned int MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) {
5233   int block_start = offset();
5234 
5235   BLOCK_COMMENT("string_inflate {");
5236 
5237   Register stop_char = Z_R0;
5238   Register table     = Z_R1;
5239   Register src_addr  = tmp;
5240 
5241   assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt);
5242   assert(dst->encoding()%2 == 0, "must be even reg");
5243   assert(cnt->encoding()%2 == 1, "must be odd reg");
5244   assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair");
5245 
5246   StubRoutines::zarch::generate_load_trot_table_addr(this, table);  // kills Z_R0 (if ASSERT)
5247   clear_reg(stop_char);  // Stop character. Not used here, but initialized to have a defined value.
5248   lgr_if_needed(src_addr, src);
5249   z_llgfr(cnt, cnt);     // # src characters, must be a positive simm32.
5250 
5251   translate_ot(dst, src_addr, /* mask = */ 0x0001);
5252 
5253   BLOCK_COMMENT("} string_inflate");
5254 
5255   return offset() - block_start;
5256 }
5257 
5258 // Inflate byte[] to char[].
5259 //   Restores: src, dst
5260 //   Uses:     cnt
5261 //   Kills:    tmp, Z_R0, Z_R1.
5262 // Note:
5263 //   cnt is signed int. Do not rely on high word!
5264 //       counts # characters, not bytes.
5265 unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
5266   assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp);
5267 
5268   BLOCK_COMMENT("string_inflate {");
5269   int block_start = offset();
5270 
5271   Register   Rcnt = cnt;   // # characters (src: bytes, dst: char (2-byte)), remaining after current loop.
5272   Register   Rix  = tmp;   // loop index
5273   Register   Rsrc = src;   // addr(src array)
5274   Register   Rdst = dst;   // addr(dst array)
5275   Label      ScalarShortcut, AllDone;
5276 
5277 #if 0  // Sacrifice shortcuts for code compactness
5278   {
5279     //---<  shortcuts for short strings (very frequent)   >---
5280     Label   skipShortcut, skip4Shortcut;
5281     z_ltr(Rcnt, Rcnt);                     // absolutely nothing to do for strings of len == 0.
5282     z_brz(AllDone);
5283     clear_reg(Z_R0);                       // make sure registers are properly initialized.
5284     clear_reg(Z_R1);
5285     z_chi(Rcnt, 4);
5286     z_brne(skip4Shortcut);                 // 4 characters are very frequent
5287       z_icm(Z_R0, 5,    0, Rsrc);          // Treat exactly 4 characters specially.
5288       z_icm(Z_R1, 5,    2, Rsrc);
5289       z_stm(Z_R0, Z_R1, 0, Rdst);
5290       z_bru(AllDone);
5291     bind(skip4Shortcut);
5292 
5293     z_chi(Rcnt, 8);
5294     z_brh(skipShortcut);                   // There's a lot to do...
5295     z_lgfr(Z_R0, Rcnt);                    // remaining #characters (<= 8). Precond for scalar loop.
5296                                            // This does not destroy the "register cleared" state of Z_R0.
5297     z_brl(ScalarShortcut);                 // Just a few characters
5298       z_icmh(Z_R0, 5, 0, Rsrc);            // Treat exactly 8 characters specially.
5299       z_icmh(Z_R1, 5, 4, Rsrc);
5300       z_icm(Z_R0,  5, 2, Rsrc);
5301       z_icm(Z_R1,  5, 6, Rsrc);
5302       z_stmg(Z_R0, Z_R1, 0, Rdst);
5303       z_bru(AllDone);
5304     bind(skipShortcut);
5305   }
5306 #endif
5307   clear_reg(Z_R0);                         // make sure register is properly initialized.
5308 
5309   if (VM_Version::has_VectorFacility()) {
5310     const int  min_vcnt     = 32;          // Minimum #characters required to use vector instructions.
5311                                            // Otherwise just do nothing in vector mode.
5312                                            // Must be multiple of vector register length (16 bytes = 128 bits).
5313     const int  log_min_vcnt = exact_log2(min_vcnt);
5314     Label      VectorLoop, VectorDone;
5315 
5316     assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
5317     z_srak(Rix, Rcnt, log_min_vcnt);       // calculate # vector loop iterations
5318     z_brz(VectorDone);                     // skip if none
5319 
5320     z_sllg(Z_R0, Rix, log_min_vcnt);       // remember #chars that will be processed by vector loop
5321 
5322     bind(VectorLoop);
5323       z_vlm(Z_V20, Z_V21, 0, Rsrc);        // get next 32 characters (single-byte)
5324       add2reg(Rsrc, min_vcnt);
5325 
5326       z_vuplhb(Z_V22, Z_V20);              // V2 <- (expand) V0(high)
5327       z_vupllb(Z_V23, Z_V20);              // V3 <- (expand) V0(low)
5328       z_vuplhb(Z_V24, Z_V21);              // V4 <- (expand) V1(high)
5329       z_vupllb(Z_V25, Z_V21);              // V5 <- (expand) V1(low)
5330       z_vstm(Z_V22, Z_V25, 0, Rdst);       // store next 32 bytes
5331       add2reg(Rdst, min_vcnt*2);
5332 
5333       z_brct(Rix, VectorLoop);
5334 
5335     bind(VectorDone);
5336   }
5337 
5338   const int  min_cnt     =  8;             // Minimum #characters required to use unrolled scalar loop.
5339                                            // Otherwise just do nothing in unrolled scalar mode.
5340                                            // Must be multiple of 8.
5341   {
5342     const int  log_min_cnt = exact_log2(min_cnt);
5343     Label      UnrolledLoop, UnrolledDone;
5344 
5345 
5346     if (VM_Version::has_DistinctOpnds()) {
5347       z_srk(Rix, Rcnt, Z_R0);              // remaining # chars to process in unrolled loop
5348     } else {
5349       z_lr(Rix, Rcnt);
5350       z_sr(Rix, Z_R0);
5351     }
5352     z_sra(Rix, log_min_cnt);               // unrolled loop count
5353     z_brz(UnrolledDone);
5354 
5355     clear_reg(Z_R0);
5356     clear_reg(Z_R1);
5357 
5358     bind(UnrolledLoop);
5359       z_icmh(Z_R0, 5, 0, Rsrc);
5360       z_icmh(Z_R1, 5, 4, Rsrc);
5361       z_icm(Z_R0,  5, 2, Rsrc);
5362       z_icm(Z_R1,  5, 6, Rsrc);
5363       add2reg(Rsrc, min_cnt);
5364 
5365       z_stmg(Z_R0, Z_R1, 0, Rdst);
5366 
5367       add2reg(Rdst, min_cnt*2);
5368       z_brct(Rix, UnrolledLoop);
5369 
5370     bind(UnrolledDone);
5371     z_lgfr(Z_R0, Rcnt);                    // # chars left over after unrolled loop.
5372     z_nilf(Z_R0, min_cnt-1);
5373     z_brnz(ScalarShortcut);                // if zero, there is nothing left to do for scalar loop.
5374                                            // Rix == 0 in all cases.
5375     z_sgfr(Z_R0, Rcnt);                    // negative # characters the ptrs have been advanced previously.
5376     z_agr(Rdst, Z_R0);                     // restore ptr, double the element count for Rdst restore.
5377     z_agr(Rdst, Z_R0);
5378     z_agr(Rsrc, Z_R0);                     // restore ptr.
5379     z_bru(AllDone);
5380   }
5381 
5382   {
5383     bind(ScalarShortcut);
5384     // Z_R0 must contain remaining # characters as 64-bit signed int here.
5385     //      register contents is preserved over scalar processing (for register fixup).
5386 
5387 #if 0  // Sacrifice shortcuts for code compactness
5388     {
5389       Label      ScalarDefault;
5390       z_chi(Rcnt, 2);
5391       z_brh(ScalarDefault);
5392       z_llc(Z_R0,  0, Z_R0, Rsrc);     // 6 bytes
5393       z_sth(Z_R0,  0, Z_R0, Rdst);     // 4 bytes
5394       z_brl(AllDone);
5395       z_llc(Z_R0,  1, Z_R0, Rsrc);     // 6 bytes
5396       z_sth(Z_R0,  2, Z_R0, Rdst);     // 4 bytes
5397       z_bru(AllDone);
5398       bind(ScalarDefault);
5399     }
5400 #endif
5401 
5402     Label   CodeTable;
5403     // Some comments on Rix calculation:
5404     //  - Rcnt is small, therefore no bits shifted out of low word (sll(g) instructions).
5405     //  - high word of both Rix and Rcnt may contain garbage
5406     //  - the final lngfr takes care of that garbage, extending the sign to high word
5407     z_sllg(Rix, Z_R0, 2);                // calculate 10*Rix = (4*Rix + Rix)*2
5408     z_ar(Rix, Z_R0);
5409     z_larl(Z_R1, CodeTable);
5410     z_sll(Rix, 1);
5411     z_lngfr(Rix, Rix);      // ix range: [0..7], after inversion & mult: [-(7*12)..(0*12)].
5412     z_bc(Assembler::bcondAlways, 0, Rix, Z_R1);
5413 
5414     z_llc(Z_R1,  6, Z_R0, Rsrc);  // 6 bytes
5415     z_sth(Z_R1, 12, Z_R0, Rdst);  // 4 bytes
5416 
5417     z_llc(Z_R1,  5, Z_R0, Rsrc);
5418     z_sth(Z_R1, 10, Z_R0, Rdst);
5419 
5420     z_llc(Z_R1,  4, Z_R0, Rsrc);
5421     z_sth(Z_R1,  8, Z_R0, Rdst);
5422 
5423     z_llc(Z_R1,  3, Z_R0, Rsrc);
5424     z_sth(Z_R1,  6, Z_R0, Rdst);
5425 
5426     z_llc(Z_R1,  2, Z_R0, Rsrc);
5427     z_sth(Z_R1,  4, Z_R0, Rdst);
5428 
5429     z_llc(Z_R1,  1, Z_R0, Rsrc);
5430     z_sth(Z_R1,  2, Z_R0, Rdst);
5431 
5432     z_llc(Z_R1,  0, Z_R0, Rsrc);
5433     z_sth(Z_R1,  0, Z_R0, Rdst);
5434     bind(CodeTable);
5435 
5436     z_chi(Rcnt, 8);                        // no fixup for small strings. Rdst, Rsrc were not modified.
5437     z_brl(AllDone);
5438 
5439     z_sgfr(Z_R0, Rcnt);                    // # characters the ptrs have been advanced previously.
5440     z_agr(Rdst, Z_R0);                     // restore ptr, double the element count for Rdst restore.
5441     z_agr(Rdst, Z_R0);
5442     z_agr(Rsrc, Z_R0);                     // restore ptr.
5443   }
5444   bind(AllDone);
5445 
5446   BLOCK_COMMENT("} string_inflate");
5447   return offset() - block_start;
5448 }
5449 
5450 // Inflate byte[] to char[], length known at compile time.
5451 //   Restores: src, dst
5452 //   Kills:    tmp, Z_R0, Z_R1.
5453 // Note:
5454 //   len is signed int. Counts # characters, not bytes.
5455 unsigned int MacroAssembler::string_inflate_const(Register src, Register dst, Register tmp, int len) {
5456   assert_different_registers(Z_R0, Z_R1, src, dst, tmp);
5457 
5458   BLOCK_COMMENT("string_inflate_const {");
5459   int block_start = offset();
5460 
5461   Register   Rix  = tmp;   // loop index
5462   Register   Rsrc = src;   // addr(src array)
5463   Register   Rdst = dst;   // addr(dst array)
5464   Label      ScalarShortcut, AllDone;
5465   int        nprocessed = 0;
5466   int        src_off    = 0;  // compensate for saved (optimized away) ptr advancement.
5467   int        dst_off    = 0;  // compensate for saved (optimized away) ptr advancement.
5468   bool       restore_inputs = false;
5469   bool       workreg_clear  = false;
5470 
5471   if ((len >= 32) && VM_Version::has_VectorFacility()) {
5472     const int  min_vcnt     = 32;          // Minimum #characters required to use vector instructions.
5473                                            // Otherwise just do nothing in vector mode.
5474                                            // Must be multiple of vector register length (16 bytes = 128 bits).
5475     const int  log_min_vcnt = exact_log2(min_vcnt);
5476     const int  iterations   = (len - nprocessed) >> log_min_vcnt;
5477     nprocessed             += iterations << log_min_vcnt;
5478     Label      VectorLoop;
5479 
5480     if (iterations == 1) {
5481       z_vlm(Z_V20, Z_V21, 0+src_off, Rsrc);  // get next 32 characters (single-byte)
5482       z_vuplhb(Z_V22, Z_V20);                // V2 <- (expand) V0(high)
5483       z_vupllb(Z_V23, Z_V20);                // V3 <- (expand) V0(low)
5484       z_vuplhb(Z_V24, Z_V21);                // V4 <- (expand) V1(high)
5485       z_vupllb(Z_V25, Z_V21);                // V5 <- (expand) V1(low)
5486       z_vstm(Z_V22, Z_V25, 0+dst_off, Rdst); // store next 32 bytes
5487 
5488       src_off += min_vcnt;
5489       dst_off += min_vcnt*2;
5490     } else {
5491       restore_inputs = true;
5492 
5493       z_lgfi(Rix, len>>log_min_vcnt);
5494       bind(VectorLoop);
5495         z_vlm(Z_V20, Z_V21, 0, Rsrc);        // get next 32 characters (single-byte)
5496         add2reg(Rsrc, min_vcnt);
5497 
5498         z_vuplhb(Z_V22, Z_V20);              // V2 <- (expand) V0(high)
5499         z_vupllb(Z_V23, Z_V20);              // V3 <- (expand) V0(low)
5500         z_vuplhb(Z_V24, Z_V21);              // V4 <- (expand) V1(high)
5501         z_vupllb(Z_V25, Z_V21);              // V5 <- (expand) V1(low)
5502         z_vstm(Z_V22, Z_V25, 0, Rdst);       // store next 32 bytes
5503         add2reg(Rdst, min_vcnt*2);
5504 
5505         z_brct(Rix, VectorLoop);
5506     }
5507   }
5508 
5509   if (((len-nprocessed) >= 16) && VM_Version::has_VectorFacility()) {
5510     const int  min_vcnt     = 16;          // Minimum #characters required to use vector instructions.
5511                                            // Otherwise just do nothing in vector mode.
5512                                            // Must be multiple of vector register length (16 bytes = 128 bits).
5513     const int  log_min_vcnt = exact_log2(min_vcnt);
5514     const int  iterations   = (len - nprocessed) >> log_min_vcnt;
5515     nprocessed             += iterations << log_min_vcnt;
5516     assert(iterations == 1, "must be!");
5517 
5518     z_vl(Z_V20, 0+src_off, Z_R0, Rsrc);    // get next 16 characters (single-byte)
5519     z_vuplhb(Z_V22, Z_V20);                // V2 <- (expand) V0(high)
5520     z_vupllb(Z_V23, Z_V20);                // V3 <- (expand) V0(low)
5521     z_vstm(Z_V22, Z_V23, 0+dst_off, Rdst); // store next 32 bytes
5522 
5523     src_off += min_vcnt;
5524     dst_off += min_vcnt*2;
5525   }
5526 
5527   if ((len-nprocessed) > 8) {
5528     const int  min_cnt     =  8;           // Minimum #characters required to use unrolled scalar loop.
5529                                            // Otherwise just do nothing in unrolled scalar mode.
5530                                            // Must be multiple of 8.
5531     const int  log_min_cnt = exact_log2(min_cnt);
5532     const int  iterations  = (len - nprocessed) >> log_min_cnt;
5533     nprocessed     += iterations << log_min_cnt;
5534 
5535     //---<  avoid loop overhead/ptr increment for small # iterations  >---
5536     if (iterations <= 2) {
5537       clear_reg(Z_R0);
5538       clear_reg(Z_R1);
5539       workreg_clear = true;
5540 
5541       z_icmh(Z_R0, 5, 0+src_off, Rsrc);
5542       z_icmh(Z_R1, 5, 4+src_off, Rsrc);
5543       z_icm(Z_R0,  5, 2+src_off, Rsrc);
5544       z_icm(Z_R1,  5, 6+src_off, Rsrc);
5545       z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
5546 
5547       src_off += min_cnt;
5548       dst_off += min_cnt*2;
5549     }
5550 
5551     if (iterations == 2) {
5552       z_icmh(Z_R0, 5, 0+src_off, Rsrc);
5553       z_icmh(Z_R1, 5, 4+src_off, Rsrc);
5554       z_icm(Z_R0,  5, 2+src_off, Rsrc);
5555       z_icm(Z_R1,  5, 6+src_off, Rsrc);
5556       z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
5557 
5558       src_off += min_cnt;
5559       dst_off += min_cnt*2;
5560     }
5561 
5562     if (iterations > 2) {
5563       Label      UnrolledLoop;
5564       restore_inputs  = true;
5565 
5566       clear_reg(Z_R0);
5567       clear_reg(Z_R1);
5568       workreg_clear = true;
5569 
5570       z_lgfi(Rix, iterations);
5571       bind(UnrolledLoop);
5572         z_icmh(Z_R0, 5, 0, Rsrc);
5573         z_icmh(Z_R1, 5, 4, Rsrc);
5574         z_icm(Z_R0,  5, 2, Rsrc);
5575         z_icm(Z_R1,  5, 6, Rsrc);
5576         add2reg(Rsrc, min_cnt);
5577 
5578         z_stmg(Z_R0, Z_R1, 0, Rdst);
5579         add2reg(Rdst, min_cnt*2);
5580 
5581         z_brct(Rix, UnrolledLoop);
5582     }
5583   }
5584 
5585   if ((len-nprocessed) > 0) {
5586     switch (len-nprocessed) {
5587       case 8:
5588         if (!workreg_clear) {
5589           clear_reg(Z_R0);
5590           clear_reg(Z_R1);
5591         }
5592         z_icmh(Z_R0, 5, 0+src_off, Rsrc);
5593         z_icmh(Z_R1, 5, 4+src_off, Rsrc);
5594         z_icm(Z_R0,  5, 2+src_off, Rsrc);
5595         z_icm(Z_R1,  5, 6+src_off, Rsrc);
5596         z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
5597         break;
5598       case 7:
5599         if (!workreg_clear) {
5600           clear_reg(Z_R0);
5601           clear_reg(Z_R1);
5602         }
5603         clear_reg(Rix);
5604         z_icm(Z_R0,  5, 0+src_off, Rsrc);
5605         z_icm(Z_R1,  5, 2+src_off, Rsrc);
5606         z_icm(Rix,   5, 4+src_off, Rsrc);
5607         z_stm(Z_R0,  Z_R1, 0+dst_off, Rdst);
5608         z_llc(Z_R0,  6+src_off, Z_R0, Rsrc);
5609         z_st(Rix,    8+dst_off, Z_R0, Rdst);
5610         z_sth(Z_R0, 12+dst_off, Z_R0, Rdst);
5611         break;
5612       case 6:
5613         if (!workreg_clear) {
5614           clear_reg(Z_R0);
5615           clear_reg(Z_R1);
5616         }
5617         clear_reg(Rix);
5618         z_icm(Z_R0, 5, 0+src_off, Rsrc);
5619         z_icm(Z_R1, 5, 2+src_off, Rsrc);
5620         z_icm(Rix,  5, 4+src_off, Rsrc);
5621         z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5622         z_st(Rix,   8+dst_off, Z_R0, Rdst);
5623         break;
5624       case 5:
5625         if (!workreg_clear) {
5626           clear_reg(Z_R0);
5627           clear_reg(Z_R1);
5628         }
5629         z_icm(Z_R0, 5, 0+src_off, Rsrc);
5630         z_icm(Z_R1, 5, 2+src_off, Rsrc);
5631         z_llc(Rix,  4+src_off, Z_R0, Rsrc);
5632         z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5633         z_sth(Rix,  8+dst_off, Z_R0, Rdst);
5634         break;
5635       case 4:
5636         if (!workreg_clear) {
5637           clear_reg(Z_R0);
5638           clear_reg(Z_R1);
5639         }
5640         z_icm(Z_R0, 5, 0+src_off, Rsrc);
5641         z_icm(Z_R1, 5, 2+src_off, Rsrc);
5642         z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5643         break;
5644       case 3:
5645         if (!workreg_clear) {
5646           clear_reg(Z_R0);
5647         }
5648         z_llc(Z_R1, 2+src_off, Z_R0, Rsrc);
5649         z_icm(Z_R0, 5, 0+src_off, Rsrc);
5650         z_sth(Z_R1, 4+dst_off, Z_R0, Rdst);
5651         z_st(Z_R0,  0+dst_off, Rdst);
5652         break;
5653       case 2:
5654         z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);
5655         z_llc(Z_R1, 1+src_off, Z_R0, Rsrc);
5656         z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);
5657         z_sth(Z_R1, 2+dst_off, Z_R0, Rdst);
5658         break;
5659       case 1:
5660         z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);
5661         z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);
5662         break;
5663       default:
5664         guarantee(false, "Impossible");
5665         break;
5666     }
5667     src_off   +=  len-nprocessed;
5668     dst_off   += (len-nprocessed)*2;
5669     nprocessed = len;
5670   }
5671 
5672   //---< restore modified input registers  >---
5673   if ((nprocessed > 0) && restore_inputs) {
5674     z_agfi(Rsrc, -(nprocessed-src_off));
5675     if (nprocessed < 1000000000) { // avoid int overflow
5676       z_agfi(Rdst, -(nprocessed*2-dst_off));
5677     } else {
5678       z_agfi(Rdst, -(nprocessed-dst_off));
5679       z_agfi(Rdst, -nprocessed);
5680     }
5681   }
5682 
5683   BLOCK_COMMENT("} string_inflate_const");
5684   return offset() - block_start;
5685 }
5686 
5687 // Kills src.
5688 unsigned int MacroAssembler::has_negatives(Register result, Register src, Register cnt,
5689                                            Register odd_reg, Register even_reg, Register tmp) {
5690   int block_start = offset();
5691   Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone;
5692   const Register addr = src, mask = tmp;
5693 
5694   BLOCK_COMMENT("has_negatives {");
5695 
5696   z_llgfr(Z_R1, cnt);      // Number of bytes to read. (Must be a positive simm32.)
5697   z_llilf(mask, 0x80808080);
5698   z_lhi(result, 1);        // Assume true.
5699   // Last possible addr for fast loop.
5700   z_lay(odd_reg, -16, Z_R1, src);
5701   z_chi(cnt, 16);
5702   z_brl(Lslow);
5703 
5704   // ind1: index, even_reg: index increment, odd_reg: index limit
5705   z_iihf(mask, 0x80808080);
5706   z_lghi(even_reg, 16);
5707 
5708   bind(Lloop1); // 16 bytes per iteration.
5709   z_lg(Z_R0, Address(addr));
5710   z_lg(Z_R1, Address(addr, 8));
5711   z_ogr(Z_R0, Z_R1);
5712   z_ngr(Z_R0, mask);
5713   z_brne(Ldone);           // If found return 1.
5714   z_brxlg(addr, even_reg, Lloop1);
5715 
5716   bind(Lslow);
5717   z_aghi(odd_reg, 16-1);   // Last possible addr for slow loop.
5718   z_lghi(even_reg, 1);
5719   z_cgr(addr, odd_reg);
5720   z_brh(Lnotfound);
5721 
5722   bind(Lloop2); // 1 byte per iteration.
5723   z_cli(Address(addr), 0x80);
5724   z_brnl(Ldone);           // If found return 1.
5725   z_brxlg(addr, even_reg, Lloop2);
5726 
5727   bind(Lnotfound);
5728   z_lhi(result, 0);
5729 
5730   bind(Ldone);
5731 
5732   BLOCK_COMMENT("} has_negatives");
5733 
5734   return offset() - block_start;
5735 }
5736 
5737 // kill: cnt1, cnt2, odd_reg, even_reg; early clobber: result
5738 unsigned int MacroAssembler::string_compare(Register str1, Register str2,
5739                                             Register cnt1, Register cnt2,
5740                                             Register odd_reg, Register even_reg, Register result, int ae) {
5741   int block_start = offset();
5742 
5743   assert_different_registers(str1, cnt1, cnt2, odd_reg, even_reg, result);
5744   assert_different_registers(str2, cnt1, cnt2, odd_reg, even_reg, result);
5745 
5746   // If strings are equal up to min length, return the length difference.
5747   const Register diff = result, // Pre-set result with length difference.
5748                  min  = cnt1,   // min number of bytes
5749                  tmp  = cnt2;
5750 
5751   // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
5752   // we interchange str1 and str2 in the UL case and negate the result.
5753   // Like this, str1 is always latin1 encoded, except for the UU case.
5754   // In addition, we need 0 (or sign which is 0) extend when using 64 bit register.
5755   const bool used_as_LU = (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL);
5756 
5757   BLOCK_COMMENT("string_compare {");
5758 
5759   if (used_as_LU) {
5760     z_srl(cnt2, 1);
5761   }
5762 
5763   // See if the lengths are different, and calculate min in cnt1.
5764   // Save diff in case we need it for a tie-breaker.
5765 
5766   // diff = cnt1 - cnt2
5767   if (VM_Version::has_DistinctOpnds()) {
5768     z_srk(diff, cnt1, cnt2);
5769   } else {
5770     z_lr(diff, cnt1);
5771     z_sr(diff, cnt2);
5772   }
5773   if (str1 != str2) {
5774     if (VM_Version::has_LoadStoreConditional()) {
5775       z_locr(min, cnt2, Assembler::bcondHigh);
5776     } else {
5777       Label Lskip;
5778       z_brl(Lskip);    // min ok if cnt1 < cnt2
5779       z_lr(min, cnt2); // min = cnt2
5780       bind(Lskip);
5781     }
5782   }
5783 
5784   if (ae == StrIntrinsicNode::UU) {
5785     z_sra(diff, 1);
5786   }
5787   if (str1 != str2) {
5788     Label Ldone;
5789     if (used_as_LU) {
5790       // Loop which searches the first difference character by character.
5791       Label Lloop;
5792       const Register ind1 = Z_R1,
5793                      ind2 = min;
5794       int stride1 = 1, stride2 = 2; // See comment above.
5795 
5796       // ind1: index, even_reg: index increment, odd_reg: index limit
5797       z_llilf(ind1, (unsigned int)(-stride1));
5798       z_lhi(even_reg, stride1);
5799       add2reg(odd_reg, -stride1, min);
5800       clear_reg(ind2); // kills min
5801 
5802       bind(Lloop);
5803       z_brxh(ind1, even_reg, Ldone);
5804       z_llc(tmp, Address(str1, ind1));
5805       z_llh(Z_R0, Address(str2, ind2));
5806       z_ahi(ind2, stride2);
5807       z_sr(tmp, Z_R0);
5808       z_bre(Lloop);
5809 
5810       z_lr(result, tmp);
5811 
5812     } else {
5813       // Use clcle in fast loop (only for same encoding).
5814       z_lgr(Z_R0, str1);
5815       z_lgr(even_reg, str2);
5816       z_llgfr(Z_R1, min);
5817       z_llgfr(odd_reg, min);
5818 
5819       if (ae == StrIntrinsicNode::LL) {
5820         compare_long_ext(Z_R0, even_reg, 0);
5821       } else {
5822         compare_long_uni(Z_R0, even_reg, 0);
5823       }
5824       z_bre(Ldone);
5825       z_lgr(Z_R1, Z_R0);
5826       if (ae == StrIntrinsicNode::LL) {
5827         z_llc(Z_R0, Address(even_reg));
5828         z_llc(result, Address(Z_R1));
5829       } else {
5830         z_llh(Z_R0, Address(even_reg));
5831         z_llh(result, Address(Z_R1));
5832       }
5833       z_sr(result, Z_R0);
5834     }
5835 
5836     // Otherwise, return the difference between the first mismatched chars.
5837     bind(Ldone);
5838   }
5839 
5840   if (ae == StrIntrinsicNode::UL) {
5841     z_lcr(result, result); // Negate result (see note above).
5842   }
5843 
5844   BLOCK_COMMENT("} string_compare");
5845 
5846   return offset() - block_start;
5847 }
5848 
5849 unsigned int MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
5850                                           Register odd_reg, Register even_reg, Register result, bool is_byte) {
5851   int block_start = offset();
5852 
5853   BLOCK_COMMENT("array_equals {");
5854 
5855   assert_different_registers(ary1, limit, odd_reg, even_reg);
5856   assert_different_registers(ary2, limit, odd_reg, even_reg);
5857 
5858   Label Ldone, Ldone_true, Ldone_false, Lclcle, CLC_template;
5859   int base_offset = 0;
5860 
5861   if (ary1 != ary2) {
5862     if (is_array_equ) {
5863       base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
5864 
5865       // Return true if the same array.
5866       compareU64_and_branch(ary1, ary2, Assembler::bcondEqual, Ldone_true);
5867 
5868       // Return false if one of them is NULL.
5869       compareU64_and_branch(ary1, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
5870       compareU64_and_branch(ary2, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
5871 
5872       // Load the lengths of arrays.
5873       z_llgf(odd_reg, Address(ary1, arrayOopDesc::length_offset_in_bytes()));
5874 
5875       // Return false if the two arrays are not equal length.
5876       z_c(odd_reg, Address(ary2, arrayOopDesc::length_offset_in_bytes()));
5877       z_brne(Ldone_false);
5878 
5879       // string len in bytes (right operand)
5880       if (!is_byte) {
5881         z_chi(odd_reg, 128);
5882         z_sll(odd_reg, 1); // preserves flags
5883         z_brh(Lclcle);
5884       } else {
5885         compareU32_and_branch(odd_reg, (intptr_t)256, Assembler::bcondHigh, Lclcle);
5886       }
5887     } else {
5888       z_llgfr(odd_reg, limit); // Need to zero-extend prior to using the value.
5889       compareU32_and_branch(limit, (intptr_t)256, Assembler::bcondHigh, Lclcle);
5890     }
5891 
5892 
5893     // Use clc instruction for up to 256 bytes.
5894     {
5895       Register str1_reg = ary1,
5896           str2_reg = ary2;
5897       if (is_array_equ) {
5898         str1_reg = Z_R1;
5899         str2_reg = even_reg;
5900         add2reg(str1_reg, base_offset, ary1); // string addr (left operand)
5901         add2reg(str2_reg, base_offset, ary2); // string addr (right operand)
5902       }
5903       z_ahi(odd_reg, -1); // Clc uses decremented limit. Also compare result to 0.
5904       z_brl(Ldone_true);
5905       // Note: We could jump to the template if equal.
5906 
5907       assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
5908       z_exrl(odd_reg, CLC_template);
5909       z_bre(Ldone_true);
5910       // fall through
5911 
5912       bind(Ldone_false);
5913       clear_reg(result);
5914       z_bru(Ldone);
5915 
5916       bind(CLC_template);
5917       z_clc(0, 0, str1_reg, 0, str2_reg);
5918     }
5919 
5920     // Use clcle instruction.
5921     {
5922       bind(Lclcle);
5923       add2reg(even_reg, base_offset, ary2); // string addr (right operand)
5924       add2reg(Z_R0, base_offset, ary1);     // string addr (left operand)
5925 
5926       z_lgr(Z_R1, odd_reg); // string len in bytes (left operand)
5927       if (is_byte) {
5928         compare_long_ext(Z_R0, even_reg, 0);
5929       } else {
5930         compare_long_uni(Z_R0, even_reg, 0);
5931       }
5932       z_lghi(result, 0); // Preserve flags.
5933       z_brne(Ldone);
5934     }
5935   }
5936   // fall through
5937 
5938   bind(Ldone_true);
5939   z_lghi(result, 1); // All characters are equal.
5940   bind(Ldone);
5941 
5942   BLOCK_COMMENT("} array_equals");
5943 
5944   return offset() - block_start;
5945 }
5946 
5947 // kill: haycnt, needlecnt, odd_reg, even_reg; early clobber: result
5948 unsigned int MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
5949                                             Register needle, Register needlecnt, int needlecntval,
5950                                             Register odd_reg, Register even_reg, int ae) {
5951   int block_start = offset();
5952 
5953   // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
5954   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
5955   const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
5956   const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
5957   Label L_needle1, L_Found, L_NotFound;
5958 
5959   BLOCK_COMMENT("string_indexof {");
5960 
5961   if (needle == haystack) {
5962     z_lhi(result, 0);
5963   } else {
5964 
5965   // Load first character of needle (R0 used by search_string instructions).
5966   if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); }
5967 
5968   // Compute last haystack addr to use if no match gets found.
5969   if (needlecnt != noreg) { // variable needlecnt
5970     z_ahi(needlecnt, -1); // Remaining characters after first one.
5971     z_sr(haycnt, needlecnt); // Compute index succeeding last element to compare.
5972     if (n_csize == 2) { z_sll(needlecnt, 1); } // In bytes.
5973   } else { // constant needlecnt
5974     assert((needlecntval & 0x7fff) == needlecntval, "must be positive simm16 immediate");
5975     // Compute index succeeding last element to compare.
5976     if (needlecntval != 1) { z_ahi(haycnt, 1 - needlecntval); }
5977   }
5978 
5979   z_llgfr(haycnt, haycnt); // Clear high half.
5980   z_lgr(result, haystack); // Final result will be computed from needle start pointer.
5981   if (h_csize == 2) { z_sll(haycnt, 1); } // Scale to number of bytes.
5982   z_agr(haycnt, haystack); // Point to address succeeding last element (haystack+scale*(haycnt-needlecnt+1)).
5983 
5984   if (h_csize != n_csize) {
5985     assert(ae == StrIntrinsicNode::UL, "Invalid encoding");
5986 
5987     if (needlecnt != noreg || needlecntval != 1) {
5988       if (needlecnt != noreg) {
5989         compare32_and_branch(needlecnt, (intptr_t)0, Assembler::bcondEqual, L_needle1);
5990       }
5991 
5992       // Main Loop: UL version (now we have at least 2 characters).
5993       Label L_OuterLoop, L_InnerLoop, L_Skip;
5994       bind(L_OuterLoop); // Search for 1st 2 characters.
5995       z_lgr(Z_R1, haycnt);
5996       MacroAssembler::search_string_uni(Z_R1, result);
5997       z_brc(Assembler::bcondNotFound, L_NotFound);
5998       z_lgr(result, Z_R1);
5999 
6000       z_lghi(Z_R1, n_csize);
6001       z_lghi(even_reg, h_csize);
6002       bind(L_InnerLoop);
6003       z_llgc(odd_reg, Address(needle, Z_R1));
6004       z_ch(odd_reg, Address(result, even_reg));
6005       z_brne(L_Skip);
6006       if (needlecnt != noreg) { z_cr(Z_R1, needlecnt); } else { z_chi(Z_R1, needlecntval - 1); }
6007       z_brnl(L_Found);
6008       z_aghi(Z_R1, n_csize);
6009       z_aghi(even_reg, h_csize);
6010       z_bru(L_InnerLoop);
6011 
6012       bind(L_Skip);
6013       z_aghi(result, h_csize); // This is the new address we want to use for comparing.
6014       z_bru(L_OuterLoop);
6015     }
6016 
6017   } else {
6018     const intptr_t needle_bytes = (n_csize == 2) ? ((needlecntval - 1) << 1) : (needlecntval - 1);
6019     Label L_clcle;
6020 
6021     if (needlecnt != noreg || (needlecntval != 1 && needle_bytes <= 256)) {
6022       if (needlecnt != noreg) {
6023         compare32_and_branch(needlecnt, 256, Assembler::bcondHigh, L_clcle);
6024         z_ahi(needlecnt, -1); // remaining bytes -1 (for CLC)
6025         z_brl(L_needle1);
6026       }
6027 
6028       // Main Loop: clc version (now we have at least 2 characters).
6029       Label L_OuterLoop, CLC_template;
6030       bind(L_OuterLoop); // Search for 1st 2 characters.
6031       z_lgr(Z_R1, haycnt);
6032       if (h_csize == 1) {
6033         MacroAssembler::search_string(Z_R1, result);
6034       } else {
6035         MacroAssembler::search_string_uni(Z_R1, result);
6036       }
6037       z_brc(Assembler::bcondNotFound, L_NotFound);
6038       z_lgr(result, Z_R1);
6039 
6040       if (needlecnt != noreg) {
6041         assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
6042         z_exrl(needlecnt, CLC_template);
6043       } else {
6044         z_clc(h_csize, needle_bytes -1, Z_R1, n_csize, needle);
6045       }
6046       z_bre(L_Found);
6047       z_aghi(result, h_csize); // This is the new address we want to use for comparing.
6048       z_bru(L_OuterLoop);
6049 
6050       if (needlecnt != noreg) {
6051         bind(CLC_template);
6052         z_clc(h_csize, 0, Z_R1, n_csize, needle);
6053       }
6054     }
6055 
6056     if (needlecnt != noreg || needle_bytes > 256) {
6057       bind(L_clcle);
6058 
6059       // Main Loop: clcle version (now we have at least 256 bytes).
6060       Label L_OuterLoop, CLC_template;
6061       bind(L_OuterLoop); // Search for 1st 2 characters.
6062       z_lgr(Z_R1, haycnt);
6063       if (h_csize == 1) {
6064         MacroAssembler::search_string(Z_R1, result);
6065       } else {
6066         MacroAssembler::search_string_uni(Z_R1, result);
6067       }
6068       z_brc(Assembler::bcondNotFound, L_NotFound);
6069 
6070       add2reg(Z_R0, n_csize, needle);
6071       add2reg(even_reg, h_csize, Z_R1);
6072       z_lgr(result, Z_R1);
6073       if (needlecnt != noreg) {
6074         z_llgfr(Z_R1, needlecnt); // needle len in bytes (left operand)
6075         z_llgfr(odd_reg, needlecnt);
6076       } else {
6077         load_const_optimized(Z_R1, needle_bytes);
6078         if (Immediate::is_simm16(needle_bytes)) { z_lghi(odd_reg, needle_bytes); } else { z_lgr(odd_reg, Z_R1); }
6079       }
6080       if (h_csize == 1) {
6081         compare_long_ext(Z_R0, even_reg, 0);
6082       } else {
6083         compare_long_uni(Z_R0, even_reg, 0);
6084       }
6085       z_bre(L_Found);
6086 
6087       if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); } // Reload.
6088       z_aghi(result, h_csize); // This is the new address we want to use for comparing.
6089       z_bru(L_OuterLoop);
6090     }
6091   }
6092 
6093   if (needlecnt != noreg || needlecntval == 1) {
6094     bind(L_needle1);
6095 
6096     // Single needle character version.
6097     if (h_csize == 1) {
6098       MacroAssembler::search_string(haycnt, result);
6099     } else {
6100       MacroAssembler::search_string_uni(haycnt, result);
6101     }
6102     z_lgr(result, haycnt);
6103     z_brc(Assembler::bcondFound, L_Found);
6104   }
6105 
6106   bind(L_NotFound);
6107   add2reg(result, -1, haystack); // Return -1.
6108 
6109   bind(L_Found); // Return index (or -1 in fallthrough case).
6110   z_sgr(result, haystack);
6111   if (h_csize == 2) { z_srag(result, result, exact_log2(sizeof(jchar))); }
6112   }
6113   BLOCK_COMMENT("} string_indexof");
6114 
6115   return offset() - block_start;
6116 }
6117 
6118 // early clobber: result
6119 unsigned int MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
6120                                                  Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte) {
6121   int block_start = offset();
6122 
6123   BLOCK_COMMENT("string_indexof_char {");
6124 
6125   if (needle == haystack) {
6126     z_lhi(result, 0);
6127   } else {
6128 
6129   Label Ldone;
6130 
6131   z_llgfr(odd_reg, haycnt);  // Preset loop ctr/searchrange end.
6132   if (needle == noreg) {
6133     load_const_optimized(Z_R0, (unsigned long)needleChar);
6134   } else {
6135     if (is_byte) {
6136       z_llgcr(Z_R0, needle); // First (and only) needle char.
6137     } else {
6138       z_llghr(Z_R0, needle); // First (and only) needle char.
6139     }
6140   }
6141 
6142   if (!is_byte) {
6143     z_agr(odd_reg, odd_reg); // Calc #bytes to be processed with SRSTU.
6144   }
6145 
6146   z_lgr(even_reg, haystack); // haystack addr
6147   z_agr(odd_reg, haystack);  // First char after range end.
6148   z_lghi(result, -1);
6149 
6150   if (is_byte) {
6151     MacroAssembler::search_string(odd_reg, even_reg);
6152   } else {
6153     MacroAssembler::search_string_uni(odd_reg, even_reg);
6154   }
6155   z_brc(Assembler::bcondNotFound, Ldone);
6156   if (is_byte) {
6157     if (VM_Version::has_DistinctOpnds()) {
6158       z_sgrk(result, odd_reg, haystack);
6159     } else {
6160       z_sgr(odd_reg, haystack);
6161       z_lgr(result, odd_reg);
6162     }
6163   } else {
6164     z_slgr(odd_reg, haystack);
6165     z_srlg(result, odd_reg, exact_log2(sizeof(jchar)));
6166   }
6167 
6168   bind(Ldone);
6169   }
6170   BLOCK_COMMENT("} string_indexof_char");
6171 
6172   return offset() - block_start;
6173 }
6174 
6175 
6176 //-------------------------------------------------
6177 //   Constants (scalar and oop) in constant pool
6178 //-------------------------------------------------
6179 
6180 // Add a non-relocated constant to the CP.
6181 int MacroAssembler::store_const_in_toc(AddressLiteral& val) {
6182   long    value  = val.value();
6183   address tocPos = long_constant(value);
6184 
6185   if (tocPos != NULL) {
6186     int tocOffset = (int)(tocPos - code()->consts()->start());
6187     return tocOffset;
6188   }
6189   // Address_constant returned NULL, so no constant entry has been created.
6190   // In that case, we return a "fatal" offset, just in case that subsequently
6191   // generated access code is executed.
6192   return -1;
6193 }
6194 
6195 // Returns the TOC offset where the address is stored.
6196 // Add a relocated constant to the CP.
6197 int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) {
6198   // Use RelocationHolder::none for the constant pool entry.
6199   // Otherwise we will end up with a failing NativeCall::verify(x),
6200   // where x is the address of the constant pool entry.
6201   address tocPos = address_constant((address)oop.value(), RelocationHolder::none);
6202 
6203   if (tocPos != NULL) {
6204     int              tocOffset = (int)(tocPos - code()->consts()->start());
6205     RelocationHolder rsp = oop.rspec();
6206     Relocation      *rel = rsp.reloc();
6207 
6208     // Store toc_offset in relocation, used by call_far_patchable.
6209     if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) {
6210       ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset);
6211     }
6212     // Relocate at the load's pc.
6213     relocate(rsp);
6214 
6215     return tocOffset;
6216   }
6217   // Address_constant returned NULL, so no constant entry has been created
6218   // in that case, we return a "fatal" offset, just in case that subsequently
6219   // generated access code is executed.
6220   return -1;
6221 }
6222 
6223 bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
6224   int     tocOffset = store_const_in_toc(a);
6225   if (tocOffset == -1) return false;
6226   address tocPos    = tocOffset + code()->consts()->start();
6227   assert((address)code()->consts()->start() != NULL, "Please add CP address");
6228 
6229   load_long_pcrelative(dst, tocPos);
6230   return true;
6231 }
6232 
6233 bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
6234   int     tocOffset = store_oop_in_toc(a);
6235   if (tocOffset == -1) return false;
6236   address tocPos    = tocOffset + code()->consts()->start();
6237   assert((address)code()->consts()->start() != NULL, "Please add CP address");
6238 
6239   load_addr_pcrelative(dst, tocPos);
6240   return true;
6241 }
6242 
6243 // If the instruction sequence at the given pc is a load_const_from_toc
6244 // sequence, return the value currently stored at the referenced position
6245 // in the TOC.
6246 intptr_t MacroAssembler::get_const_from_toc(address pc) {
6247 
6248   assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
6249 
6250   long    offset  = get_load_const_from_toc_offset(pc);
6251   address dataLoc = NULL;
6252   if (is_load_const_from_toc_pcrelative(pc)) {
6253     dataLoc = pc + offset;
6254   } else {
6255     CodeBlob* cb = CodeCache::find_blob_unsafe(pc);   // Else we get assertion if nmethod is zombie.
6256     assert(cb && cb->is_nmethod(), "sanity");
6257     nmethod* nm = (nmethod*)cb;
6258     dataLoc = nm->ctable_begin() + offset;
6259   }
6260   return *(intptr_t *)dataLoc;
6261 }
6262 
6263 // If the instruction sequence at the given pc is a load_const_from_toc
6264 // sequence, copy the passed-in new_data value into the referenced
6265 // position in the TOC.
6266 void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) {
6267   assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
6268 
6269   long    offset = MacroAssembler::get_load_const_from_toc_offset(pc);
6270   address dataLoc = NULL;
6271   if (is_load_const_from_toc_pcrelative(pc)) {
6272     dataLoc = pc+offset;
6273   } else {
6274     nmethod* nm = CodeCache::find_nmethod(pc);
6275     assert((cb == NULL) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob");
6276     dataLoc = nm->ctable_begin() + offset;
6277   }
6278   if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary.
6279     *(unsigned long *)dataLoc = new_data;
6280   }
6281 }
6282 
6283 // Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc
6284 // site. Verify by calling is_load_const_from_toc() before!!
6285 // Offset is +/- 2**32 -> use long.
6286 long MacroAssembler::get_load_const_from_toc_offset(address a) {
6287   assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load");
6288   //  expected code sequence:
6289   //    z_lgrl(t, simm32);    len = 6
6290   unsigned long inst;
6291   unsigned int  len = get_instruction(a, &inst);
6292   return get_pcrel_offset(inst);
6293 }
6294 
6295 //**********************************************************************************
6296 //  inspection of generated instruction sequences for a particular pattern
6297 //**********************************************************************************
6298 
6299 bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) {
6300 #ifdef ASSERT
6301   unsigned long inst;
6302   unsigned int  len = get_instruction(a+2, &inst);
6303   if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) {
6304     const int range = 128;
6305     Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl");
6306     VM_Version::z_SIGSEGV();
6307   }
6308 #endif
6309   // expected code sequence:
6310   //   z_lgrl(t, relAddr32);    len = 6
6311   //TODO: verify accessed data is in CP, if possible.
6312   return is_load_pcrelative_long(a);  // TODO: might be too general. Currently, only lgrl is used.
6313 }
6314 
6315 bool MacroAssembler::is_load_const_from_toc_call(address a) {
6316   return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size());
6317 }
6318 
6319 bool MacroAssembler::is_load_const_call(address a) {
6320   return is_load_const(a) && is_call_byregister(a + load_const_size());
6321 }
6322 
6323 //-------------------------------------------------
6324 //   Emitters for some really CICS instructions
6325 //-------------------------------------------------
6326 
6327 void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) {
6328   assert(dst->encoding()%2==0, "must be an even/odd register pair");
6329   assert(src->encoding()%2==0, "must be an even/odd register pair");
6330   assert(pad<256, "must be a padding BYTE");
6331 
6332   Label retry;
6333   bind(retry);
6334   Assembler::z_mvcle(dst, src, pad);
6335   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6336 }
6337 
6338 void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) {
6339   assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
6340   assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
6341   assert(pad<256, "must be a padding BYTE");
6342 
6343   Label retry;
6344   bind(retry);
6345   Assembler::z_clcle(left, right, pad, Z_R0);
6346   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6347 }
6348 
6349 void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) {
6350   assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
6351   assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
6352   assert(pad<=0xfff, "must be a padding HALFWORD");
6353   assert(VM_Version::has_ETF2(), "instruction must be available");
6354 
6355   Label retry;
6356   bind(retry);
6357   Assembler::z_clclu(left, right, pad, Z_R0);
6358   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6359 }
6360 
6361 void MacroAssembler::search_string(Register end, Register start) {
6362   assert(end->encoding() != 0, "end address must not be in R0");
6363   assert(start->encoding() != 0, "start address must not be in R0");
6364 
6365   Label retry;
6366   bind(retry);
6367   Assembler::z_srst(end, start);
6368   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6369 }
6370 
6371 void MacroAssembler::search_string_uni(Register end, Register start) {
6372   assert(end->encoding() != 0, "end address must not be in R0");
6373   assert(start->encoding() != 0, "start address must not be in R0");
6374   assert(VM_Version::has_ETF3(), "instruction must be available");
6375 
6376   Label retry;
6377   bind(retry);
6378   Assembler::z_srstu(end, start);
6379   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6380 }
6381 
6382 void MacroAssembler::kmac(Register srcBuff) {
6383   assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
6384   assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
6385 
6386   Label retry;
6387   bind(retry);
6388   Assembler::z_kmac(Z_R0, srcBuff);
6389   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6390 }
6391 
6392 void MacroAssembler::kimd(Register srcBuff) {
6393   assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
6394   assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
6395 
6396   Label retry;
6397   bind(retry);
6398   Assembler::z_kimd(Z_R0, srcBuff);
6399   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6400 }
6401 
6402 void MacroAssembler::klmd(Register srcBuff) {
6403   assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
6404   assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
6405 
6406   Label retry;
6407   bind(retry);
6408   Assembler::z_klmd(Z_R0, srcBuff);
6409   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6410 }
6411 
6412 void MacroAssembler::km(Register dstBuff, Register srcBuff) {
6413   // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
6414   // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
6415   assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
6416   assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
6417   assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
6418 
6419   Label retry;
6420   bind(retry);
6421   Assembler::z_km(dstBuff, srcBuff);
6422   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6423 }
6424 
6425 void MacroAssembler::kmc(Register dstBuff, Register srcBuff) {
6426   // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
6427   // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
6428   assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
6429   assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
6430   assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
6431 
6432   Label retry;
6433   bind(retry);
6434   Assembler::z_kmc(dstBuff, srcBuff);
6435   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6436 }
6437 
6438 void MacroAssembler::cksm(Register crcBuff, Register srcBuff) {
6439   assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
6440 
6441   Label retry;
6442   bind(retry);
6443   Assembler::z_cksm(crcBuff, srcBuff);
6444   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6445 }
6446 
6447 void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) {
6448   assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
6449   assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
6450 
6451   Label retry;
6452   bind(retry);
6453   Assembler::z_troo(r1, r2, m3);
6454   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6455 }
6456 
6457 void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) {
6458   assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
6459   assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
6460 
6461   Label retry;
6462   bind(retry);
6463   Assembler::z_trot(r1, r2, m3);
6464   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6465 }
6466 
6467 void MacroAssembler::translate_to(Register r1, Register r2, uint m3) {
6468   assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
6469   assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
6470 
6471   Label retry;
6472   bind(retry);
6473   Assembler::z_trto(r1, r2, m3);
6474   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6475 }
6476 
6477 void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) {
6478   assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
6479   assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
6480 
6481   Label retry;
6482   bind(retry);
6483   Assembler::z_trtt(r1, r2, m3);
6484   Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6485 }
6486 
6487 
6488 void MacroAssembler::generate_type_profiling(const Register Rdata,
6489                                              const Register Rreceiver_klass,
6490                                              const Register Rwanted_receiver_klass,
6491                                              const Register Rmatching_row,
6492                                              bool is_virtual_call) {
6493   const int row_size = in_bytes(ReceiverTypeData::receiver_offset(1)) -
6494                        in_bytes(ReceiverTypeData::receiver_offset(0));
6495   const int num_rows = ReceiverTypeData::row_limit();
6496   NearLabel found_free_row;
6497   NearLabel do_increment;
6498   NearLabel found_no_slot;
6499 
6500   BLOCK_COMMENT("type profiling {");
6501 
6502   // search for:
6503   //    a) The type given in Rwanted_receiver_klass.
6504   //    b) The *first* empty row.
6505 
6506   // First search for a) only, just running over b) with no regard.
6507   // This is possible because
6508   //    wanted_receiver_class == receiver_class  &&  wanted_receiver_class == 0
6509   // is never true (receiver_class can't be zero).
6510   for (int row_num = 0; row_num < num_rows; row_num++) {
6511     // Row_offset should be a well-behaved positive number. The generated code relies
6512     // on that wrt constant code size. Add2reg can handle all row_offset values, but
6513     // will have to vary generated code size.
6514     int row_offset = in_bytes(ReceiverTypeData::receiver_offset(row_num));
6515     assert(Displacement::is_shortDisp(row_offset), "Limitation of generated code");
6516 
6517     // Is Rwanted_receiver_klass in this row?
6518     if (VM_Version::has_CompareBranch()) {
6519       z_lg(Rwanted_receiver_klass, row_offset, Z_R0, Rdata);
6520       // Rmatching_row = Rdata + row_offset;
6521       add2reg(Rmatching_row, row_offset, Rdata);
6522       // if (*row_recv == (intptr_t) receiver_klass) goto fill_existing_slot;
6523       compare64_and_branch(Rwanted_receiver_klass, Rreceiver_klass, Assembler::bcondEqual, do_increment);
6524     } else {
6525       add2reg(Rmatching_row, row_offset, Rdata);
6526       z_cg(Rreceiver_klass, row_offset, Z_R0, Rdata);
6527       z_bre(do_increment);
6528     }
6529   }
6530 
6531   // Now that we did not find a match, let's search for b).
6532 
6533   // We could save the first calculation of Rmatching_row if we woud search for a) in reverse order.
6534   // We would then end up here with Rmatching_row containing the value for row_num == 0.
6535   // We would not see much benefit, if any at all, because the CPU can schedule
6536   // two instructions together with a branch anyway.
6537   for (int row_num = 0; row_num < num_rows; row_num++) {
6538     int row_offset = in_bytes(ReceiverTypeData::receiver_offset(row_num));
6539 
6540     // Has this row a zero receiver_klass, i.e. is it empty?
6541     if (VM_Version::has_CompareBranch()) {
6542       z_lg(Rwanted_receiver_klass, row_offset, Z_R0, Rdata);
6543       // Rmatching_row = Rdata + row_offset
6544       add2reg(Rmatching_row, row_offset, Rdata);
6545       // if (*row_recv == (intptr_t) 0) goto found_free_row
6546       compare64_and_branch(Rwanted_receiver_klass, (intptr_t)0, Assembler::bcondEqual, found_free_row);
6547     } else {
6548       add2reg(Rmatching_row, row_offset, Rdata);
6549       load_and_test_long(Rwanted_receiver_klass, Address(Rdata, row_offset));
6550       z_bre(found_free_row);  // zero -> Found a free row.
6551     }
6552   }
6553 
6554   // No match, no empty row found.
6555   // Increment total counter to indicate polymorphic case.
6556   if (is_virtual_call) {
6557     add2mem_64(Address(Rdata, CounterData::count_offset()), 1, Rmatching_row);
6558   }
6559   z_bru(found_no_slot);
6560 
6561   // Here we found an empty row, but we have not found Rwanted_receiver_klass.
6562   // Rmatching_row holds the address to the first empty row.
6563   bind(found_free_row);
6564   // Store receiver_klass into empty slot.
6565   z_stg(Rreceiver_klass, 0, Z_R0, Rmatching_row);
6566 
6567   // Increment the counter of Rmatching_row.
6568   bind(do_increment);
6569   ByteSize counter_offset = ReceiverTypeData::receiver_count_offset(0) - ReceiverTypeData::receiver_offset(0);
6570   add2mem_64(Address(Rmatching_row, counter_offset), 1, Rdata);
6571 
6572   bind(found_no_slot);
6573 
6574   BLOCK_COMMENT("} type profiling");
6575 }
6576 
6577 //---------------------------------------
6578 // Helpers for Intrinsic Emitters
6579 //---------------------------------------
6580 
6581 /**
6582  * uint32_t crc;
6583  * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
6584  */
6585 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) {
6586   assert_different_registers(crc, table, tmp);
6587   assert_different_registers(val, table);
6588   if (crc == val) {      // Must rotate first to use the unmodified value.
6589     rotate_then_insert(tmp, val, 56-2, 63-2, 2, true);  // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
6590     z_srl(crc, 8);       // Unsigned shift, clear leftmost 8 bits.
6591   } else {
6592     z_srl(crc, 8);       // Unsigned shift, clear leftmost 8 bits.
6593     rotate_then_insert(tmp, val, 56-2, 63-2, 2, true);  // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
6594   }
6595   z_x(crc, Address(table, tmp, 0));
6596 }
6597 
6598 /**
6599  * uint32_t crc;
6600  * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
6601  */
6602 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
6603   fold_byte_crc32(crc, crc, table, tmp);
6604 }
6605 
6606 /**
6607  * Emits code to update CRC-32 with a byte value according to constants in table.
6608  *
6609  * @param [in,out]crc Register containing the crc.
6610  * @param [in]val     Register containing the byte to fold into the CRC.
6611  * @param [in]table   Register containing the table of crc constants.
6612  *
6613  * uint32_t crc;
6614  * val = crc_table[(val ^ crc) & 0xFF];
6615  * crc = val ^ (crc >> 8);
6616  */
6617 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
6618   z_xr(val, crc);
6619   fold_byte_crc32(crc, val, table, val);
6620 }
6621 
6622 
6623 /**
6624  * @param crc   register containing existing CRC (32-bit)
6625  * @param buf   register pointing to input byte buffer (byte*)
6626  * @param len   register containing number of bytes
6627  * @param table register pointing to CRC table
6628  */
6629 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) {
6630   assert_different_registers(crc, buf, len, table, data);
6631 
6632   Label L_mainLoop, L_done;
6633   const int mainLoop_stepping = 1;
6634 
6635   // Process all bytes in a single-byte loop.
6636   z_ltr(len, len);
6637   z_brnh(L_done);
6638 
6639   bind(L_mainLoop);
6640     z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
6641     add2reg(buf, mainLoop_stepping);        // Advance buffer position.
6642     update_byte_crc32(crc, data, table);
6643     z_brct(len, L_mainLoop);                // Iterate.
6644 
6645   bind(L_done);
6646 }
6647 
6648 /**
6649  * Emits code to update CRC-32 with a 4-byte value according to constants in table.
6650  * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c.
6651  *
6652  */
6653 void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
6654                                         Register t0,  Register t1,  Register t2,    Register t3) {
6655   // This is what we implement (the DOBIG4 part):
6656   //
6657   // #define DOBIG4 c ^= *++buf4; \
6658   //         c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
6659   //             crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
6660   // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
6661   // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian.
6662   const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
6663   const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
6664   const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
6665   const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
6666 
6667   // XOR crc with next four bytes of buffer.
6668   lgr_if_needed(t0, crc);
6669   z_x(t0, Address(buf, bufDisp));
6670   if (bufInc != 0) {
6671     add2reg(buf, bufInc);
6672   }
6673 
6674   // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices.
6675   rotate_then_insert(t3, t0, 56-2, 63-2, 2,    true);  // ((c >>  0) & 0xff) << 2
6676   rotate_then_insert(t2, t0, 56-2, 63-2, 2-8,  true);  // ((c >>  8) & 0xff) << 2
6677   rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true);  // ((c >> 16) & 0xff) << 2
6678   rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true);  // ((c >> 24) & 0xff) << 2
6679 
6680   // XOR indexed table values to calculate updated crc.
6681   z_ly(t2, Address(table, t2, (intptr_t)ix1));
6682   z_ly(t0, Address(table, t0, (intptr_t)ix3));
6683   z_xy(t2, Address(table, t3, (intptr_t)ix0));
6684   z_xy(t0, Address(table, t1, (intptr_t)ix2));
6685   z_xr(t0, t2);           // Now t0 contains the updated CRC value.
6686   lgr_if_needed(crc, t0);
6687 }
6688 
6689 /**
6690  * @param crc   register containing existing CRC (32-bit)
6691  * @param buf   register pointing to input byte buffer (byte*)
6692  * @param len   register containing number of bytes
6693  * @param table register pointing to CRC table
6694  *
6695  * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
6696  */
6697 void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
6698                                         Register t0,  Register t1,  Register t2,  Register t3,
6699                                         bool invertCRC) {
6700   assert_different_registers(crc, buf, len, table);
6701 
6702   Label L_mainLoop, L_tail;
6703   Register  data = t0;
6704   Register  ctr  = Z_R0;
6705   const int mainLoop_stepping = 8;
6706   const int tailLoop_stepping = 1;
6707   const int log_stepping      = exact_log2(mainLoop_stepping);
6708 
6709   // Don't test for len <= 0 here. This pathological case should not occur anyway.
6710   // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
6711   // The situation itself is detected and handled correctly by the conditional branches
6712   // following aghi(len, -stepping) and aghi(len, +stepping).
6713 
6714   if (invertCRC) {
6715     not_(crc, noreg, false);           // 1s complement of crc
6716   }
6717 
6718 #if 0
6719   {
6720     // Pre-mainLoop alignment did not show any positive effect on performance.
6721     // We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment.
6722 
6723     z_cghi(len, mainLoop_stepping);    // Alignment is useless for short data streams.
6724     z_brnh(L_tail);
6725 
6726     // Align buf to word (4-byte) boundary.
6727     z_lcr(ctr, buf);
6728     rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
6729     z_sgfr(len, ctr);                  // Remaining len after alignment.
6730 
6731     update_byteLoop_crc32(crc, buf, ctr, table, data);
6732   }
6733 #endif
6734 
6735   // Check for short (<mainLoop_stepping bytes) buffer.
6736   z_srag(ctr, len, log_stepping);
6737   z_brnh(L_tail);
6738 
6739   z_lrvr(crc, crc);          // Revert byte order because we are dealing with big-endian data.
6740   rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
6741 
6742   BIND(L_mainLoop);
6743     update_1word_crc32(crc, buf, table, 0, 0, crc, t1, t2, t3);
6744     update_1word_crc32(crc, buf, table, 4, mainLoop_stepping, crc, t1, t2, t3);
6745     z_brct(ctr, L_mainLoop); // Iterate.
6746 
6747   z_lrvr(crc, crc);          // Revert byte order back to original.
6748 
6749   // Process last few (<8) bytes of buffer.
6750   BIND(L_tail);
6751   update_byteLoop_crc32(crc, buf, len, table, data);
6752 
6753   if (invertCRC) {
6754     not_(crc, noreg, false);           // 1s complement of crc
6755   }
6756 }
6757 
6758 /**
6759  * @param crc   register containing existing CRC (32-bit)
6760  * @param buf   register pointing to input byte buffer (byte*)
6761  * @param len   register containing number of bytes
6762  * @param table register pointing to CRC table
6763  *
6764  * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
6765  */
6766 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
6767                                         Register t0,  Register t1,  Register t2,  Register t3,
6768                                         bool invertCRC) {
6769   assert_different_registers(crc, buf, len, table);
6770 
6771   Label L_mainLoop, L_tail;
6772   Register  data = t0;
6773   Register  ctr  = Z_R0;
6774   const int mainLoop_stepping = 4;
6775   const int log_stepping      = exact_log2(mainLoop_stepping);
6776 
6777   // Don't test for len <= 0 here. This pathological case should not occur anyway.
6778   // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
6779   // The situation itself is detected and handled correctly by the conditional branches
6780   // following aghi(len, -stepping) and aghi(len, +stepping).
6781 
6782   if (invertCRC) {
6783     not_(crc, noreg, false);           // 1s complement of crc
6784   }
6785 
6786   // Check for short (<4 bytes) buffer.
6787   z_srag(ctr, len, log_stepping);
6788   z_brnh(L_tail);
6789 
6790   z_lrvr(crc, crc);          // Revert byte order because we are dealing with big-endian data.
6791   rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
6792 
6793   BIND(L_mainLoop);
6794     update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3);
6795     z_brct(ctr, L_mainLoop); // Iterate.
6796 
6797   z_lrvr(crc, crc);          // Revert byte order back to original.
6798 
6799   // Process last few (<8) bytes of buffer.
6800   BIND(L_tail);
6801   update_byteLoop_crc32(crc, buf, len, table, data);
6802 
6803   if (invertCRC) {
6804     not_(crc, noreg, false);           // 1s complement of crc
6805   }
6806 }
6807 
6808 /**
6809  * @param crc   register containing existing CRC (32-bit)
6810  * @param buf   register pointing to input byte buffer (byte*)
6811  * @param len   register containing number of bytes
6812  * @param table register pointing to CRC table
6813  */
6814 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
6815                                         Register t0,  Register t1,  Register t2,  Register t3,
6816                                         bool invertCRC) {
6817   assert_different_registers(crc, buf, len, table);
6818   Register data = t0;
6819 
6820   if (invertCRC) {
6821     not_(crc, noreg, false);           // 1s complement of crc
6822   }
6823 
6824   update_byteLoop_crc32(crc, buf, len, table, data);
6825 
6826   if (invertCRC) {
6827     not_(crc, noreg, false);           // 1s complement of crc
6828   }
6829 }
6830 
6831 void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
6832                                              bool invertCRC) {
6833   assert_different_registers(crc, buf, len, table, tmp);
6834 
6835   if (invertCRC) {
6836     not_(crc, noreg, false);           // 1s complement of crc
6837   }
6838 
6839   z_llgc(tmp, Address(buf, (intptr_t)0));  // Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
6840   update_byte_crc32(crc, tmp, table);
6841 
6842   if (invertCRC) {
6843     not_(crc, noreg, false);           // 1s complement of crc
6844   }
6845 }
6846 
6847 void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table,
6848                                                 bool invertCRC) {
6849   assert_different_registers(crc, val, table);
6850 
6851   if (invertCRC) {
6852     not_(crc, noreg, false);           // 1s complement of crc
6853   }
6854 
6855   update_byte_crc32(crc, val, table);
6856 
6857   if (invertCRC) {
6858     not_(crc, noreg, false);           // 1s complement of crc
6859   }
6860 }
6861 
6862 //
6863 // Code for BigInteger::multiplyToLen() intrinsic.
6864 //
6865 
6866 // dest_lo += src1 + src2
6867 // dest_hi += carry1 + carry2
6868 // Z_R7 is destroyed !
6869 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo,
6870                                      Register src1, Register src2) {
6871   clear_reg(Z_R7);
6872   z_algr(dest_lo, src1);
6873   z_alcgr(dest_hi, Z_R7);
6874   z_algr(dest_lo, src2);
6875   z_alcgr(dest_hi, Z_R7);
6876 }
6877 
6878 // Multiply 64 bit by 64 bit first loop.
6879 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart,
6880                                            Register x_xstart,
6881                                            Register y, Register y_idx,
6882                                            Register z,
6883                                            Register carry,
6884                                            Register product,
6885                                            Register idx, Register kdx) {
6886   // jlong carry, x[], y[], z[];
6887   // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
6888   //   huge_128 product = y[idx] * x[xstart] + carry;
6889   //   z[kdx] = (jlong)product;
6890   //   carry  = (jlong)(product >>> 64);
6891   // }
6892   // z[xstart] = carry;
6893 
6894   Label L_first_loop, L_first_loop_exit;
6895   Label L_one_x, L_one_y, L_multiply;
6896 
6897   z_aghi(xstart, -1);
6898   z_brl(L_one_x);   // Special case: length of x is 1.
6899 
6900   // Load next two integers of x.
6901   z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
6902   mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
6903 
6904 
6905   bind(L_first_loop);
6906 
6907   z_aghi(idx, -1);
6908   z_brl(L_first_loop_exit);
6909   z_aghi(idx, -1);
6910   z_brl(L_one_y);
6911 
6912   // Load next two integers of y.
6913   z_sllg(Z_R1_scratch, idx, LogBytesPerInt);
6914   mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0));
6915 
6916 
6917   bind(L_multiply);
6918 
6919   Register multiplicand = product->successor();
6920   Register product_low = multiplicand;
6921 
6922   lgr_if_needed(multiplicand, x_xstart);
6923   z_mlgr(product, y_idx);     // multiplicand * y_idx -> product::multiplicand
6924   clear_reg(Z_R7);
6925   z_algr(product_low, carry); // Add carry to result.
6926   z_alcgr(product, Z_R7);     // Add carry of the last addition.
6927   add2reg(kdx, -2);
6928 
6929   // Store result.
6930   z_sllg(Z_R7, kdx, LogBytesPerInt);
6931   reg2mem_opt(product_low, Address(z, Z_R7, 0));
6932   lgr_if_needed(carry, product);
6933   z_bru(L_first_loop);
6934 
6935 
6936   bind(L_one_y); // Load one 32 bit portion of y as (0,value).
6937 
6938   clear_reg(y_idx);
6939   mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false);
6940   z_bru(L_multiply);
6941 
6942 
6943   bind(L_one_x); // Load one 32 bit portion of x as (0,value).
6944 
6945   clear_reg(x_xstart);
6946   mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
6947   z_bru(L_first_loop);
6948 
6949   bind(L_first_loop_exit);
6950 }
6951 
6952 // Multiply 64 bit by 64 bit and add 128 bit.
6953 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y,
6954                                             Register z,
6955                                             Register yz_idx, Register idx,
6956                                             Register carry, Register product,
6957                                             int offset) {
6958   // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
6959   // z[kdx] = (jlong)product;
6960 
6961   Register multiplicand = product->successor();
6962   Register product_low = multiplicand;
6963 
6964   z_sllg(Z_R7, idx, LogBytesPerInt);
6965   mem2reg_opt(yz_idx, Address(y, Z_R7, offset));
6966 
6967   lgr_if_needed(multiplicand, x_xstart);
6968   z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
6969   mem2reg_opt(yz_idx, Address(z, Z_R7, offset));
6970 
6971   add2_with_carry(product, product_low, carry, yz_idx);
6972 
6973   z_sllg(Z_R7, idx, LogBytesPerInt);
6974   reg2mem_opt(product_low, Address(z, Z_R7, offset));
6975 
6976 }
6977 
6978 // Multiply 128 bit by 128 bit. Unrolled inner loop.
6979 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
6980                                              Register y, Register z,
6981                                              Register yz_idx, Register idx,
6982                                              Register jdx,
6983                                              Register carry, Register product,
6984                                              Register carry2) {
6985   // jlong carry, x[], y[], z[];
6986   // int kdx = ystart+1;
6987   // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
6988   //   huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
6989   //   z[kdx+idx+1] = (jlong)product;
6990   //   jlong carry2 = (jlong)(product >>> 64);
6991   //   product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
6992   //   z[kdx+idx] = (jlong)product;
6993   //   carry = (jlong)(product >>> 64);
6994   // }
6995   // idx += 2;
6996   // if (idx > 0) {
6997   //   product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
6998   //   z[kdx+idx] = (jlong)product;
6999   //   carry = (jlong)(product >>> 64);
7000   // }
7001 
7002   Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
7003 
7004   // scale the index
7005   lgr_if_needed(jdx, idx);
7006   and_imm(jdx, 0xfffffffffffffffcL);
7007   rshift(jdx, 2);
7008 
7009 
7010   bind(L_third_loop);
7011 
7012   z_aghi(jdx, -1);
7013   z_brl(L_third_loop_exit);
7014   add2reg(idx, -4);
7015 
7016   multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8);
7017   lgr_if_needed(carry2, product);
7018 
7019   multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0);
7020   lgr_if_needed(carry, product);
7021   z_bru(L_third_loop);
7022 
7023 
7024   bind(L_third_loop_exit);  // Handle any left-over operand parts.
7025 
7026   and_imm(idx, 0x3);
7027   z_brz(L_post_third_loop_done);
7028 
7029   Label L_check_1;
7030 
7031   z_aghi(idx, -2);
7032   z_brl(L_check_1);
7033 
7034   multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0);
7035   lgr_if_needed(carry, product);
7036 
7037 
7038   bind(L_check_1);
7039 
7040   add2reg(idx, 0x2);
7041   and_imm(idx, 0x1);
7042   z_aghi(idx, -1);
7043   z_brl(L_post_third_loop_done);
7044 
7045   Register   multiplicand = product->successor();
7046   Register   product_low = multiplicand;
7047 
7048   z_sllg(Z_R7, idx, LogBytesPerInt);
7049   clear_reg(yz_idx);
7050   mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false);
7051   lgr_if_needed(multiplicand, x_xstart);
7052   z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
7053   clear_reg(yz_idx);
7054   mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false);
7055 
7056   add2_with_carry(product, product_low, yz_idx, carry);
7057 
7058   z_sllg(Z_R7, idx, LogBytesPerInt);
7059   reg2mem_opt(product_low, Address(z, Z_R7, 0), false);
7060   rshift(product_low, 32);
7061 
7062   lshift(product, 32);
7063   z_ogr(product_low, product);
7064   lgr_if_needed(carry, product_low);
7065 
7066   bind(L_post_third_loop_done);
7067 }
7068 
7069 void MacroAssembler::multiply_to_len(Register x, Register xlen,
7070                                      Register y, Register ylen,
7071                                      Register z,
7072                                      Register tmp1, Register tmp2,
7073                                      Register tmp3, Register tmp4,
7074                                      Register tmp5) {
7075   ShortBranchVerifier sbv(this);
7076 
7077   assert_different_registers(x, xlen, y, ylen, z,
7078                              tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7);
7079   assert_different_registers(x, xlen, y, ylen, z,
7080                              tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8);
7081 
7082   z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
7083 
7084   // In openJdk, we store the argument as 32-bit value to slot.
7085   Address zlen(Z_SP, _z_abi(remaining_cargs));  // Int in long on big endian.
7086 
7087   const Register idx = tmp1;
7088   const Register kdx = tmp2;
7089   const Register xstart = tmp3;
7090 
7091   const Register y_idx = tmp4;
7092   const Register carry = tmp5;
7093   const Register product  = Z_R0_scratch;
7094   const Register x_xstart = Z_R8;
7095 
7096   // First Loop.
7097   //
7098   //   final static long LONG_MASK = 0xffffffffL;
7099   //   int xstart = xlen - 1;
7100   //   int ystart = ylen - 1;
7101   //   long carry = 0;
7102   //   for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
7103   //     long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
7104   //     z[kdx] = (int)product;
7105   //     carry = product >>> 32;
7106   //   }
7107   //   z[xstart] = (int)carry;
7108   //
7109 
7110   lgr_if_needed(idx, ylen);  // idx = ylen
7111   z_llgf(kdx, zlen);         // C2 does not respect int to long conversion for stub calls, thus load zero-extended.
7112   clear_reg(carry);          // carry = 0
7113 
7114   Label L_done;
7115 
7116   lgr_if_needed(xstart, xlen);
7117   z_aghi(xstart, -1);
7118   z_brl(L_done);
7119 
7120   multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
7121 
7122   NearLabel L_second_loop;
7123   compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop);
7124 
7125   NearLabel L_carry;
7126   z_aghi(kdx, -1);
7127   z_brz(L_carry);
7128 
7129   // Store lower 32 bits of carry.
7130   z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
7131   reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
7132   rshift(carry, 32);
7133   z_aghi(kdx, -1);
7134 
7135 
7136   bind(L_carry);
7137 
7138   // Store upper 32 bits of carry.
7139   z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
7140   reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
7141 
7142   // Second and third (nested) loops.
7143   //
7144   // for (int i = xstart-1; i >= 0; i--) { // Second loop
7145   //   carry = 0;
7146   //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
7147   //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
7148   //                    (z[k] & LONG_MASK) + carry;
7149   //     z[k] = (int)product;
7150   //     carry = product >>> 32;
7151   //   }
7152   //   z[i] = (int)carry;
7153   // }
7154   //
7155   // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
7156 
7157   const Register jdx = tmp1;
7158 
7159   bind(L_second_loop);
7160 
7161   clear_reg(carry);           // carry = 0;
7162   lgr_if_needed(jdx, ylen);   // j = ystart+1
7163 
7164   z_aghi(xstart, -1);         // i = xstart-1;
7165   z_brl(L_done);
7166 
7167   // Use free slots in the current stackframe instead of push/pop.
7168   Address zsave(Z_SP, _z_abi(carg_1));
7169   reg2mem_opt(z, zsave);
7170 
7171 
7172   Label L_last_x;
7173 
7174   z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
7175   load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j
7176   z_aghi(xstart, -1);                           // i = xstart-1;
7177   z_brl(L_last_x);
7178 
7179   z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
7180   mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
7181 
7182 
7183   Label L_third_loop_prologue;
7184 
7185   bind(L_third_loop_prologue);
7186 
7187   Address xsave(Z_SP, _z_abi(carg_2));
7188   Address xlensave(Z_SP, _z_abi(carg_3));
7189   Address ylensave(Z_SP, _z_abi(carg_4));
7190 
7191   reg2mem_opt(x, xsave);
7192   reg2mem_opt(xstart, xlensave);
7193   reg2mem_opt(ylen, ylensave);
7194 
7195 
7196   multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x);
7197 
7198   mem2reg_opt(z, zsave);
7199   mem2reg_opt(x, xsave);
7200   mem2reg_opt(xlen, xlensave);   // This is the decrement of the loop counter!
7201   mem2reg_opt(ylen, ylensave);
7202 
7203   add2reg(tmp3, 1, xlen);
7204   z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
7205   reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
7206   z_aghi(tmp3, -1);
7207   z_brl(L_done);
7208 
7209   rshift(carry, 32);
7210   z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
7211   reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
7212   z_bru(L_second_loop);
7213 
7214   // Next infrequent code is moved outside loops.
7215   bind(L_last_x);
7216 
7217   clear_reg(x_xstart);
7218   mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
7219   z_bru(L_third_loop_prologue);
7220 
7221   bind(L_done);
7222 
7223   z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
7224 }
7225 
7226 #ifndef PRODUCT
7227 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false).
7228 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) {
7229   Label ok;
7230   if (check_equal) {
7231     z_bre(ok);
7232   } else {
7233     z_brne(ok);
7234   }
7235   stop(msg, id);
7236   bind(ok);
7237 }
7238 
7239 // Assert if CC indicates "low".
7240 void MacroAssembler::asm_assert_low(const char *msg, int id) {
7241   Label ok;
7242   z_brnl(ok);
7243   stop(msg, id);
7244   bind(ok);
7245 }
7246 
7247 // Assert if CC indicates "high".
7248 void MacroAssembler::asm_assert_high(const char *msg, int id) {
7249   Label ok;
7250   z_brnh(ok);
7251   stop(msg, id);
7252   bind(ok);
7253 }
7254 
7255 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false)
7256 // generate non-relocatable code.
7257 void MacroAssembler::asm_assert_static(bool check_equal, const char *msg, int id) {
7258   Label ok;
7259   if (check_equal) { z_bre(ok); }
7260   else             { z_brne(ok); }
7261   stop_static(msg, id);
7262   bind(ok);
7263 }
7264 
7265 void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset,
7266                                           Register mem_base, const char* msg, int id) {
7267   switch (size) {
7268     case 4:
7269       load_and_test_int(Z_R0, Address(mem_base, mem_offset));
7270       break;
7271     case 8:
7272       load_and_test_long(Z_R0,  Address(mem_base, mem_offset));
7273       break;
7274     default:
7275       ShouldNotReachHere();
7276   }
7277   if (allow_relocation) { asm_assert(check_equal, msg, id); }
7278   else                  { asm_assert_static(check_equal, msg, id); }
7279 }
7280 
7281 // Check the condition
7282 //   expected_size == FP - SP
7283 // after transformation:
7284 //   expected_size - FP + SP == 0
7285 // Destroys Register expected_size if no tmp register is passed.
7286 void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) {
7287   if (tmp == noreg) {
7288     tmp = expected_size;
7289   } else {
7290     if (tmp != expected_size) {
7291       z_lgr(tmp, expected_size);
7292     }
7293     z_algr(tmp, Z_SP);
7294     z_slg(tmp, 0, Z_R0, Z_SP);
7295     asm_assert_eq(msg, id);
7296   }
7297 }
7298 #endif // !PRODUCT
7299 
7300 void MacroAssembler::verify_thread() {
7301   if (VerifyThread) {
7302     unimplemented("", 117);
7303   }
7304 }
7305 
7306 // Plausibility check for oops.
7307 void MacroAssembler::verify_oop(Register oop, const char* msg) {
7308   if (!VerifyOops) return;
7309 
7310   BLOCK_COMMENT("verify_oop {");
7311   Register tmp = Z_R0;
7312   unsigned int nbytes_save = 5*BytesPerWord;
7313   address entry = StubRoutines::verify_oop_subroutine_entry_address();
7314 
7315   save_return_pc();
7316   push_frame_abi160(nbytes_save);
7317   z_stmg(Z_R1, Z_R5, frame::z_abi_160_size, Z_SP);
7318 
7319   z_lgr(Z_ARG2, oop);
7320   load_const(Z_ARG1, (address) msg);
7321   load_const(Z_R1, entry);
7322   z_lg(Z_R1, 0, Z_R1);
7323   call_c(Z_R1);
7324 
7325   z_lmg(Z_R1, Z_R5, frame::z_abi_160_size, Z_SP);
7326   pop_frame();
7327   restore_return_pc();
7328 
7329   BLOCK_COMMENT("} verify_oop ");
7330 }
7331 
7332 const char* MacroAssembler::stop_types[] = {
7333   "stop",
7334   "untested",
7335   "unimplemented",
7336   "shouldnotreachhere"
7337 };
7338 
7339 static void stop_on_request(const char* tp, const char* msg) {
7340   tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg);
7341   guarantee(false, "Z assembly code requires stop: %s", msg);
7342 }
7343 
7344 void MacroAssembler::stop(int type, const char* msg, int id) {
7345   BLOCK_COMMENT(err_msg("stop: %s {", msg));
7346 
7347   // Setup arguments.
7348   load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
7349   load_const(Z_ARG2, (void*) msg);
7350   get_PC(Z_R14);     // Following code pushes a frame without entering a new function. Use current pc as return address.
7351   save_return_pc();  // Saves return pc Z_R14.
7352   push_frame_abi160(0);
7353   call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
7354   // The plain disassembler does not recognize illtrap. It instead displays
7355   // a 32-bit value. Issueing two illtraps assures the disassembler finds
7356   // the proper beginning of the next instruction.
7357   z_illtrap(); // Illegal instruction.
7358   z_illtrap(); // Illegal instruction.
7359 
7360   BLOCK_COMMENT(" } stop");
7361 }
7362 
7363 // Special version of stop() for code size reduction.
7364 // Reuses the previously generated call sequence, if any.
7365 // Generates the call sequence on its own, if necessary.
7366 // Note: This code will work only in non-relocatable code!
7367 //       The relative address of the data elements (arg1, arg2) must not change.
7368 //       The reentry point must not move relative to it's users. This prerequisite
7369 //       should be given for "hand-written" code, if all chain calls are in the same code blob.
7370 //       Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe.
7371 address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) {
7372   BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==NULL?"init":"cont", allow_relocation?"reloc ":"static", msg));
7373 
7374   // Setup arguments.
7375   if (allow_relocation) {
7376     // Relocatable version (for comparison purposes). Remove after some time.
7377     load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
7378     load_const(Z_ARG2, (void*) msg);
7379   } else {
7380     load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]);
7381     load_absolute_address(Z_ARG2, (address)msg);
7382   }
7383   if ((reentry != NULL) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) {
7384     BLOCK_COMMENT("branch to reentry point:");
7385     z_brc(bcondAlways, reentry);
7386   } else {
7387     BLOCK_COMMENT("reentry point:");
7388     reentry = pc();      // Re-entry point for subsequent stop calls.
7389     save_return_pc();    // Saves return pc Z_R14.
7390     push_frame_abi160(0);
7391     if (allow_relocation) {
7392       reentry = NULL;    // Prevent reentry if code relocation is allowed.
7393       call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
7394     } else {
7395       call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
7396     }
7397     z_illtrap(); // Illegal instruction as emergency stop, should the above call return.
7398   }
7399   BLOCK_COMMENT(" } stop_chain");
7400 
7401   return reentry;
7402 }
7403 
7404 // Special version of stop() for code size reduction.
7405 // Assumes constant relative addresses for data and runtime call.
7406 void MacroAssembler::stop_static(int type, const char* msg, int id) {
7407   stop_chain(NULL, type, msg, id, false);
7408 }
7409 
7410 void MacroAssembler::stop_subroutine() {
7411   unimplemented("stop_subroutine", 710);
7412 }
7413 
7414 // Prints msg to stdout from within generated code..
7415 void MacroAssembler::warn(const char* msg) {
7416   RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14);
7417   load_absolute_address(Z_R1, (address) warning);
7418   load_absolute_address(Z_ARG1, (address) msg);
7419   (void) call(Z_R1);
7420   RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers);
7421 }
7422 
7423 #ifndef PRODUCT
7424 
7425 // Write pattern 0x0101010101010101 in region [low-before, high+after].
7426 void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) {
7427   if (!ZapEmptyStackFields) return;
7428   BLOCK_COMMENT("zap memory region {");
7429   load_const_optimized(val, 0x0101010101010101);
7430   int size = before + after;
7431   if (low == high && size < 5 && size > 0) {
7432     int offset = -before*BytesPerWord;
7433     for (int i = 0; i < size; ++i) {
7434       z_stg(val, Address(low, offset));
7435       offset +=(1*BytesPerWord);
7436     }
7437   } else {
7438     add2reg(addr, -before*BytesPerWord, low);
7439     if (after) {
7440 #ifdef ASSERT
7441       jlong check = after * BytesPerWord;
7442       assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !");
7443 #endif
7444       add2reg(high, after * BytesPerWord);
7445     }
7446     NearLabel loop;
7447     bind(loop);
7448     z_stg(val, Address(addr));
7449     add2reg(addr, 8);
7450     compare64_and_branch(addr, high, bcondNotHigh, loop);
7451     if (after) {
7452       add2reg(high, -after * BytesPerWord);
7453     }
7454   }
7455   BLOCK_COMMENT("} zap memory region");
7456 }
7457 #endif // !PRODUCT
7458 
7459 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) {
7460   _masm = masm;
7461   _masm->load_absolute_address(_rscratch, (address)flag_addr);
7462   _masm->load_and_test_int(_rscratch, Address(_rscratch));
7463   if (value) {
7464     _masm->z_brne(_label); // Skip if true, i.e. != 0.
7465   } else {
7466     _masm->z_bre(_label);  // Skip if false, i.e. == 0.
7467   }
7468 }
7469 
7470 SkipIfEqual::~SkipIfEqual() {
7471   _masm->bind(_label);
7472 }