1 /*
   2  * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "registerSaver_s390.hpp"
  29 #include "gc/shared/cardTable.hpp"
  30 #include "gc/shared/cardTableModRefBS.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "interpreter/interp_masm.hpp"
  33 #include "nativeInst_s390.hpp"
  34 #include "oops/instanceOop.hpp"
  35 #include "oops/objArrayKlass.hpp"
  36 #include "oops/oop.inline.hpp"
  37 #include "prims/methodHandles.hpp"
  38 #include "runtime/frame.inline.hpp"
  39 #include "runtime/handles.inline.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubCodeGenerator.hpp"
  42 #include "runtime/stubRoutines.hpp"
  43 #include "runtime/thread.inline.hpp"
  44 
  45 // Declaration and definition of StubGenerator (no .hpp file).
  46 // For a more detailed description of the stub routine structure
  47 // see the comment in stubRoutines.hpp.
  48 
  49 #ifdef PRODUCT
  50 #define __ _masm->
  51 #else
  52 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
  53 #endif
  54 
  55 #define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str)
  56 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  57 
  58 // -----------------------------------------------------------------------
  59 // Stub Code definitions
  60 
  61 class StubGenerator: public StubCodeGenerator {
  62  private:
  63 
  64   //----------------------------------------------------------------------
  65   // Call stubs are used to call Java from C.
  66 
  67   //
  68   // Arguments:
  69   //
  70   //   R2        - call wrapper address     : address
  71   //   R3        - result                   : intptr_t*
  72   //   R4        - result type              : BasicType
  73   //   R5        - method                   : method
  74   //   R6        - frame mgr entry point    : address
  75   //   [SP+160]  - parameter block          : intptr_t*
  76   //   [SP+172]  - parameter count in words : int
  77   //   [SP+176]  - thread                   : Thread*
  78   //
  79   address generate_call_stub(address& return_address) {
  80     // Set up a new C frame, copy Java arguments, call frame manager
  81     // or native_entry, and process result.
  82 
  83     StubCodeMark mark(this, "StubRoutines", "call_stub");
  84     address start = __ pc();
  85 
  86     Register r_arg_call_wrapper_addr   = Z_ARG1;
  87     Register r_arg_result_addr         = Z_ARG2;
  88     Register r_arg_result_type         = Z_ARG3;
  89     Register r_arg_method              = Z_ARG4;
  90     Register r_arg_entry               = Z_ARG5;
  91 
  92     // offsets to fp
  93     #define d_arg_thread 176
  94     #define d_arg_argument_addr 160
  95     #define d_arg_argument_count 168+4
  96 
  97     Register r_entryframe_fp           = Z_tmp_1;
  98     Register r_top_of_arguments_addr   = Z_ARG4;
  99     Register r_new_arg_entry = Z_R14;
 100 
 101     // macros for frame offsets
 102     #define call_wrapper_address_offset \
 103                _z_entry_frame_locals_neg(call_wrapper_address)
 104     #define result_address_offset \
 105               _z_entry_frame_locals_neg(result_address)
 106     #define result_type_offset \
 107               _z_entry_frame_locals_neg(result_type)
 108     #define arguments_tos_address_offset \
 109               _z_entry_frame_locals_neg(arguments_tos_address)
 110 
 111     {
 112       //
 113       // STACK on entry to call_stub:
 114       //
 115       //     F1      [C_FRAME]
 116       //            ...
 117       //
 118 
 119       Register r_argument_addr              = Z_tmp_3;
 120       Register r_argumentcopy_addr          = Z_tmp_4;
 121       Register r_argument_size_in_bytes     = Z_ARG5;
 122       Register r_frame_size                 = Z_R1;
 123 
 124       Label arguments_copied;
 125 
 126       // Save non-volatile registers to ABI of caller frame.
 127       BLOCK_COMMENT("save registers, push frame {");
 128       __ z_stmg(Z_R6, Z_R14, 16, Z_SP);
 129       __ z_std(Z_F8, 96, Z_SP);
 130       __ z_std(Z_F9, 104, Z_SP);
 131       __ z_std(Z_F10, 112, Z_SP);
 132       __ z_std(Z_F11, 120, Z_SP);
 133       __ z_std(Z_F12, 128, Z_SP);
 134       __ z_std(Z_F13, 136, Z_SP);
 135       __ z_std(Z_F14, 144, Z_SP);
 136       __ z_std(Z_F15, 152, Z_SP);
 137 
 138       //
 139       // Push ENTRY_FRAME including arguments:
 140       //
 141       //     F0      [TOP_IJAVA_FRAME_ABI]
 142       //             [outgoing Java arguments]
 143       //             [ENTRY_FRAME_LOCALS]
 144       //     F1      [C_FRAME]
 145       //             ...
 146       //
 147 
 148       // Calculate new frame size and push frame.
 149       #define abi_plus_locals_size \
 150                 (frame::z_top_ijava_frame_abi_size + frame::z_entry_frame_locals_size)
 151       if (abi_plus_locals_size % BytesPerWord == 0) {
 152         // Preload constant part of frame size.
 153         __ load_const_optimized(r_frame_size, -abi_plus_locals_size/BytesPerWord);
 154         // Keep copy of our frame pointer (caller's SP).
 155         __ z_lgr(r_entryframe_fp, Z_SP);
 156         // Add space required by arguments to frame size.
 157         __ z_slgf(r_frame_size, d_arg_argument_count, Z_R0, Z_SP);
 158         // Move Z_ARG5 early, it will be used as a local.
 159         __ z_lgr(r_new_arg_entry, r_arg_entry);
 160         // Convert frame size from words to bytes.
 161         __ z_sllg(r_frame_size, r_frame_size, LogBytesPerWord);
 162         __ push_frame(r_frame_size, r_entryframe_fp,
 163                       false/*don't copy SP*/, true /*frame size sign inverted*/);
 164       } else {
 165         guarantee(false, "frame sizes should be multiples of word size (BytesPerWord)");
 166       }
 167       BLOCK_COMMENT("} save, push");
 168 
 169       // Load argument registers for call.
 170       BLOCK_COMMENT("prepare/copy arguments {");
 171       __ z_lgr(Z_method, r_arg_method);
 172       __ z_lg(Z_thread, d_arg_thread, r_entryframe_fp);
 173 
 174       // Calculate top_of_arguments_addr which will be tos (not prepushed) later.
 175       // Wimply use SP + frame::top_ijava_frame_size.
 176       __ add2reg(r_top_of_arguments_addr,
 177                  frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP);
 178 
 179       // Initialize call_stub locals (step 1).
 180       if ((call_wrapper_address_offset + BytesPerWord == result_address_offset) &&
 181           (result_address_offset + BytesPerWord == result_type_offset)          &&
 182           (result_type_offset + BytesPerWord == arguments_tos_address_offset)) {
 183 
 184         __ z_stmg(r_arg_call_wrapper_addr, r_top_of_arguments_addr,
 185                   call_wrapper_address_offset, r_entryframe_fp);
 186       } else {
 187         __ z_stg(r_arg_call_wrapper_addr,
 188                  call_wrapper_address_offset, r_entryframe_fp);
 189         __ z_stg(r_arg_result_addr,
 190                  result_address_offset, r_entryframe_fp);
 191         __ z_stg(r_arg_result_type,
 192                  result_type_offset, r_entryframe_fp);
 193         __ z_stg(r_top_of_arguments_addr,
 194                  arguments_tos_address_offset, r_entryframe_fp);
 195       }
 196 
 197       // Copy Java arguments.
 198 
 199       // Any arguments to copy?
 200       __ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count));
 201       __ z_bre(arguments_copied);
 202 
 203       // Prepare loop and copy arguments in reverse order.
 204       {
 205         // Calculate argument size in bytes.
 206         __ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord);
 207 
 208         // Get addr of first incoming Java argument.
 209         __ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp);
 210 
 211         // Let r_argumentcopy_addr point to last outgoing Java argument.
 212         __ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively.
 213 
 214         // Let r_argument_addr point to last incoming Java argument.
 215         __ add2reg_with_index(r_argument_addr, -BytesPerWord,
 216                               r_argument_size_in_bytes, r_argument_addr);
 217 
 218         // Now loop while Z_R1 > 0 and copy arguments.
 219         {
 220           Label next_argument;
 221           __ bind(next_argument);
 222           // Mem-mem move.
 223           __ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr);
 224           __ add2reg(r_argument_addr,    -BytesPerWord);
 225           __ add2reg(r_argumentcopy_addr, BytesPerWord);
 226           __ z_brct(Z_R1, next_argument);
 227         }
 228       }  // End of argument copy loop.
 229 
 230       __ bind(arguments_copied);
 231     }
 232     BLOCK_COMMENT("} arguments");
 233 
 234     BLOCK_COMMENT("call {");
 235     {
 236       // Call frame manager or native entry.
 237 
 238       //
 239       // Register state on entry to frame manager / native entry:
 240       //
 241       //   Z_ARG1 = r_top_of_arguments_addr  - intptr_t *sender tos (prepushed)
 242       //                                       Lesp = (SP) + copied_arguments_offset - 8
 243       //   Z_method                          - method
 244       //   Z_thread                          - JavaThread*
 245       //
 246 
 247       // Here, the usual SP is the initial_caller_sp.
 248       __ z_lgr(Z_R10, Z_SP);
 249 
 250       // Z_esp points to the slot below the last argument.
 251       __ z_lgr(Z_esp, r_top_of_arguments_addr);
 252 
 253       //
 254       // Stack on entry to frame manager / native entry:
 255       //
 256       //     F0      [TOP_IJAVA_FRAME_ABI]
 257       //             [outgoing Java arguments]
 258       //             [ENTRY_FRAME_LOCALS]
 259       //     F1      [C_FRAME]
 260       //             ...
 261       //
 262 
 263       // Do a light-weight C-call here, r_new_arg_entry holds the address
 264       // of the interpreter entry point (frame manager or native entry)
 265       // and save runtime-value of return_pc in return_address
 266       // (call by reference argument).
 267       return_address = __ call_stub(r_new_arg_entry);
 268     }
 269     BLOCK_COMMENT("} call");
 270 
 271     {
 272       BLOCK_COMMENT("restore registers {");
 273       // Returned from frame manager or native entry.
 274       // Now pop frame, process result, and return to caller.
 275 
 276       //
 277       // Stack on exit from frame manager / native entry:
 278       //
 279       //     F0      [ABI]
 280       //             ...
 281       //             [ENTRY_FRAME_LOCALS]
 282       //     F1      [C_FRAME]
 283       //             ...
 284       //
 285       // Just pop the topmost frame ...
 286       //
 287 
 288       Label ret_is_object;
 289       Label ret_is_long;
 290       Label ret_is_float;
 291       Label ret_is_double;
 292 
 293       // Restore frame pointer.
 294       __ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP);
 295       // Pop frame. Done here to minimize stalls.
 296       __ pop_frame();
 297 
 298       // Reload some volatile registers which we've spilled before the call
 299       // to frame manager / native entry.
 300       // Access all locals via frame pointer, because we know nothing about
 301       // the topmost frame's size.
 302       __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp);
 303       __ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp);
 304 
 305       // Restore non-volatiles.
 306       __ z_lmg(Z_R6, Z_R14, 16, Z_SP);
 307       __ z_ld(Z_F8, 96, Z_SP);
 308       __ z_ld(Z_F9, 104, Z_SP);
 309       __ z_ld(Z_F10, 112, Z_SP);
 310       __ z_ld(Z_F11, 120, Z_SP);
 311       __ z_ld(Z_F12, 128, Z_SP);
 312       __ z_ld(Z_F13, 136, Z_SP);
 313       __ z_ld(Z_F14, 144, Z_SP);
 314       __ z_ld(Z_F15, 152, Z_SP);
 315       BLOCK_COMMENT("} restore");
 316 
 317       //
 318       // Stack on exit from call_stub:
 319       //
 320       //     0       [C_FRAME]
 321       //             ...
 322       //
 323       // No call_stub frames left.
 324       //
 325 
 326       // All non-volatiles have been restored at this point!!
 327 
 328       //------------------------------------------------------------------------
 329       // The following code makes some assumptions on the T_<type> enum values.
 330       // The enum is defined in globalDefinitions.hpp.
 331       // The validity of the assumptions is tested as far as possible.
 332       //   The assigned values should not be shuffled
 333       //   T_BOOLEAN==4    - lowest used enum value
 334       //   T_NARROWOOP==16 - largest used enum value
 335       //------------------------------------------------------------------------
 336       BLOCK_COMMENT("process result {");
 337       Label firstHandler;
 338       int   handlerLen= 8;
 339 #ifdef ASSERT
 340       char  assertMsg[] = "check BasicType definition in globalDefinitions.hpp";
 341       __ z_chi(r_arg_result_type, T_BOOLEAN);
 342       __ asm_assert_low(assertMsg, 0x0234);
 343       __ z_chi(r_arg_result_type, T_NARROWOOP);
 344       __ asm_assert_high(assertMsg, 0x0235);
 345 #endif
 346       __ add2reg(r_arg_result_type, -T_BOOLEAN);          // Remove offset.
 347       __ z_larl(Z_R1, firstHandler);                      // location of first handler
 348       __ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long.
 349       __ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1);
 350 
 351       __ align(handlerLen);
 352       __ bind(firstHandler);
 353       // T_BOOLEAN:
 354         guarantee(T_BOOLEAN == 4, "check BasicType definition in globalDefinitions.hpp");
 355         __ z_st(Z_RET, 0, r_arg_result_addr);
 356         __ z_br(Z_R14); // Return to caller.
 357         __ align(handlerLen);
 358       // T_CHAR:
 359         guarantee(T_CHAR == T_BOOLEAN+1, "check BasicType definition in globalDefinitions.hpp");
 360         __ z_st(Z_RET, 0, r_arg_result_addr);
 361         __ z_br(Z_R14); // Return to caller.
 362         __ align(handlerLen);
 363       // T_FLOAT:
 364         guarantee(T_FLOAT == T_CHAR+1, "check BasicType definition in globalDefinitions.hpp");
 365         __ z_ste(Z_FRET, 0, r_arg_result_addr);
 366         __ z_br(Z_R14); // Return to caller.
 367         __ align(handlerLen);
 368       // T_DOUBLE:
 369         guarantee(T_DOUBLE == T_FLOAT+1, "check BasicType definition in globalDefinitions.hpp");
 370         __ z_std(Z_FRET, 0, r_arg_result_addr);
 371         __ z_br(Z_R14); // Return to caller.
 372         __ align(handlerLen);
 373       // T_BYTE:
 374         guarantee(T_BYTE == T_DOUBLE+1, "check BasicType definition in globalDefinitions.hpp");
 375         __ z_st(Z_RET, 0, r_arg_result_addr);
 376         __ z_br(Z_R14); // Return to caller.
 377         __ align(handlerLen);
 378       // T_SHORT:
 379         guarantee(T_SHORT == T_BYTE+1, "check BasicType definition in globalDefinitions.hpp");
 380         __ z_st(Z_RET, 0, r_arg_result_addr);
 381         __ z_br(Z_R14); // Return to caller.
 382         __ align(handlerLen);
 383       // T_INT:
 384         guarantee(T_INT == T_SHORT+1, "check BasicType definition in globalDefinitions.hpp");
 385         __ z_st(Z_RET, 0, r_arg_result_addr);
 386         __ z_br(Z_R14); // Return to caller.
 387         __ align(handlerLen);
 388       // T_LONG:
 389         guarantee(T_LONG == T_INT+1, "check BasicType definition in globalDefinitions.hpp");
 390         __ z_stg(Z_RET, 0, r_arg_result_addr);
 391         __ z_br(Z_R14); // Return to caller.
 392         __ align(handlerLen);
 393       // T_OBJECT:
 394         guarantee(T_OBJECT == T_LONG+1, "check BasicType definition in globalDefinitions.hpp");
 395         __ z_stg(Z_RET, 0, r_arg_result_addr);
 396         __ z_br(Z_R14); // Return to caller.
 397         __ align(handlerLen);
 398       // T_ARRAY:
 399         guarantee(T_ARRAY == T_OBJECT+1, "check BasicType definition in globalDefinitions.hpp");
 400         __ z_stg(Z_RET, 0, r_arg_result_addr);
 401         __ z_br(Z_R14); // Return to caller.
 402         __ align(handlerLen);
 403       // T_VOID:
 404         guarantee(T_VOID == T_ARRAY+1, "check BasicType definition in globalDefinitions.hpp");
 405         __ z_stg(Z_RET, 0, r_arg_result_addr);
 406         __ z_br(Z_R14); // Return to caller.
 407         __ align(handlerLen);
 408       // T_ADDRESS:
 409         guarantee(T_ADDRESS == T_VOID+1, "check BasicType definition in globalDefinitions.hpp");
 410         __ z_stg(Z_RET, 0, r_arg_result_addr);
 411         __ z_br(Z_R14); // Return to caller.
 412         __ align(handlerLen);
 413       // T_NARROWOOP:
 414         guarantee(T_NARROWOOP == T_ADDRESS+1, "check BasicType definition in globalDefinitions.hpp");
 415         __ z_st(Z_RET, 0, r_arg_result_addr);
 416         __ z_br(Z_R14); // Return to caller.
 417         __ align(handlerLen);
 418       BLOCK_COMMENT("} process result");
 419     }
 420     return start;
 421   }
 422 
 423   // Return point for a Java call if there's an exception thrown in
 424   // Java code. The exception is caught and transformed into a
 425   // pending exception stored in JavaThread that can be tested from
 426   // within the VM.
 427   address generate_catch_exception() {
 428     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 429 
 430     address start = __ pc();
 431 
 432     //
 433     // Registers alive
 434     //
 435     //   Z_thread
 436     //   Z_ARG1 - address of pending exception
 437     //   Z_ARG2 - return address in call stub
 438     //
 439 
 440     const Register exception_file = Z_R0;
 441     const Register exception_line = Z_R1;
 442 
 443     __ load_const_optimized(exception_file, (void*)__FILE__);
 444     __ load_const_optimized(exception_line, (void*)__LINE__);
 445 
 446     __ z_stg(Z_ARG1, thread_(pending_exception));
 447     // Store into `char *'.
 448     __ z_stg(exception_file, thread_(exception_file));
 449     // Store into `int'.
 450     __ z_st(exception_line, thread_(exception_line));
 451 
 452     // Complete return to VM.
 453     assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
 454 
 455     // Continue in call stub.
 456     __ z_br(Z_ARG2);
 457 
 458     return start;
 459   }
 460 
 461   // Continuation point for runtime calls returning with a pending
 462   // exception. The pending exception check happened in the runtime
 463   // or native call stub. The pending exception in Thread is
 464   // converted into a Java-level exception.
 465   //
 466   // Read:
 467   //   Z_R14: pc the runtime library callee wants to return to.
 468   //   Since the exception occurred in the callee, the return pc
 469   //   from the point of view of Java is the exception pc.
 470   //
 471   // Invalidate:
 472   //   Volatile registers (except below).
 473   //
 474   // Update:
 475   //   Z_ARG1: exception
 476   //   (Z_R14 is unchanged and is live out).
 477   //
 478   address generate_forward_exception() {
 479     StubCodeMark mark(this, "StubRoutines", "forward_exception");
 480     address start = __ pc();
 481 
 482     #define pending_exception_offset in_bytes(Thread::pending_exception_offset())
 483 #ifdef ASSERT
 484     // Get pending exception oop.
 485     __ z_lg(Z_ARG1, pending_exception_offset, Z_thread);
 486 
 487     // Make sure that this code is only executed if there is a pending exception.
 488     {
 489       Label L;
 490       __ z_ltgr(Z_ARG1, Z_ARG1);
 491       __ z_brne(L);
 492       __ stop("StubRoutines::forward exception: no pending exception (1)");
 493       __ bind(L);
 494     }
 495 
 496     __ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop");
 497 #endif
 498 
 499     __ z_lgr(Z_ARG2, Z_R14); // Copy exception pc into Z_ARG2.
 500     __ save_return_pc();
 501     __ push_frame_abi160(0);
 502     // Find exception handler.
 503     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
 504                     Z_thread,
 505                     Z_ARG2);
 506     // Copy handler's address.
 507     __ z_lgr(Z_R1, Z_RET);
 508     __ pop_frame();
 509     __ restore_return_pc();
 510 
 511     // Set up the arguments for the exception handler:
 512     // - Z_ARG1: exception oop
 513     // - Z_ARG2: exception pc
 514 
 515     // Load pending exception oop.
 516     __ z_lg(Z_ARG1, pending_exception_offset, Z_thread);
 517 
 518     // The exception pc is the return address in the caller,
 519     // must load it into Z_ARG2
 520     __ z_lgr(Z_ARG2, Z_R14);
 521 
 522 #ifdef ASSERT
 523     // Make sure exception is set.
 524     { Label L;
 525       __ z_ltgr(Z_ARG1, Z_ARG1);
 526       __ z_brne(L);
 527       __ stop("StubRoutines::forward exception: no pending exception (2)");
 528       __ bind(L);
 529     }
 530 #endif
 531     // Clear the pending exception.
 532     __ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *));
 533     // Jump to exception handler
 534     __ z_br(Z_R1 /*handler address*/);
 535 
 536     return start;
 537 
 538     #undef pending_exception_offset
 539   }
 540 
 541   // Continuation point for throwing of implicit exceptions that are
 542   // not handled in the current activation. Fabricates an exception
 543   // oop and initiates normal exception dispatching in this
 544   // frame. Only callee-saved registers are preserved (through the
 545   // normal RegisterMap handling). If the compiler
 546   // needs all registers to be preserved between the fault point and
 547   // the exception handler then it must assume responsibility for that
 548   // in AbstractCompiler::continuation_for_implicit_null_exception or
 549   // continuation_for_implicit_division_by_zero_exception. All other
 550   // implicit exceptions (e.g., NullPointerException or
 551   // AbstractMethodError on entry) are either at call sites or
 552   // otherwise assume that stack unwinding will be initiated, so
 553   // caller saved registers were assumed volatile in the compiler.
 554 
 555   // Note that we generate only this stub into a RuntimeStub, because
 556   // it needs to be properly traversed and ignored during GC, so we
 557   // change the meaning of the "__" macro within this method.
 558 
 559   // Note: the routine set_pc_not_at_call_for_caller in
 560   // SharedRuntime.cpp requires that this code be generated into a
 561   // RuntimeStub.
 562 #undef __
 563 #define __ masm->
 564 
 565   address generate_throw_exception(const char* name, address runtime_entry,
 566                                    bool restore_saved_exception_pc,
 567                                    Register arg1 = noreg, Register arg2 = noreg) {
 568     assert_different_registers(arg1, Z_R0_scratch);  // would be destroyed by push_frame()
 569     assert_different_registers(arg2, Z_R0_scratch);  // would be destroyed by push_frame()
 570 
 571     int insts_size = 256;
 572     int locs_size  = 0;
 573     CodeBuffer      code(name, insts_size, locs_size);
 574     MacroAssembler* masm = new MacroAssembler(&code);
 575     int framesize_in_bytes;
 576     address start = __ pc();
 577 
 578     __ save_return_pc();
 579     framesize_in_bytes = __ push_frame_abi160(0);
 580 
 581     address frame_complete_pc = __ pc();
 582     if (restore_saved_exception_pc) {
 583       __ unimplemented("StubGenerator::throw_exception", 74);
 584     }
 585 
 586     // Note that we always have a runtime stub frame on the top of stack at this point.
 587     __ get_PC(Z_R1);
 588     __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1);
 589 
 590     // Do the call.
 591     BLOCK_COMMENT("call runtime_entry");
 592     __ call_VM_leaf(runtime_entry, Z_thread, arg1, arg2);
 593 
 594     __ reset_last_Java_frame();
 595 
 596 #ifdef ASSERT
 597     // Make sure that this code is only executed if there is a pending exception.
 598     { Label L;
 599       __ z_lg(Z_R0,
 600                 in_bytes(Thread::pending_exception_offset()),
 601                 Z_thread);
 602       __ z_ltgr(Z_R0, Z_R0);
 603       __ z_brne(L);
 604       __ stop("StubRoutines::throw_exception: no pending exception");
 605       __ bind(L);
 606     }
 607 #endif
 608 
 609     __ pop_frame();
 610     __ restore_return_pc();
 611 
 612     __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
 613     __ z_br(Z_R1);
 614 
 615     RuntimeStub* stub =
 616       RuntimeStub::new_runtime_stub(name, &code,
 617                                     frame_complete_pc - start,
 618                                     framesize_in_bytes/wordSize,
 619                                     NULL /*oop_maps*/, false);
 620 
 621     return stub->entry_point();
 622   }
 623 
 624 #undef __
 625 #ifdef PRODUCT
 626 #define __ _masm->
 627 #else
 628 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
 629 #endif
 630 
 631   // Support for uint StubRoutine::zarch::partial_subtype_check(Klass
 632   // sub, Klass super);
 633   //
 634   // Arguments:
 635   //   ret  : Z_RET, returned
 636   //   sub  : Z_ARG2, argument, not changed
 637   //   super: Z_ARG3, argument, not changed
 638   //
 639   //   raddr: Z_R14, blown by call
 640   //
 641   address generate_partial_subtype_check() {
 642     StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
 643     Label miss;
 644 
 645     address start = __ pc();
 646 
 647     const Register Rsubklass   = Z_ARG2; // subklass
 648     const Register Rsuperklass = Z_ARG3; // superklass
 649 
 650     // No args, but tmp registers that are killed.
 651     const Register Rlength     = Z_ARG4; // cache array length
 652     const Register Rarray_ptr  = Z_ARG5; // Current value from cache array.
 653 
 654     if (UseCompressedOops) {
 655       assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub");
 656     }
 657 
 658     // Always take the slow path (see SPARC).
 659     __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass,
 660                                      Rarray_ptr, Rlength, NULL, &miss);
 661 
 662     // Match falls through here.
 663     __ clear_reg(Z_RET);               // Zero indicates a match. Set EQ flag in CC.
 664     __ z_br(Z_R14);
 665 
 666     __ BIND(miss);
 667     __ load_const_optimized(Z_RET, 1); // One indicates a miss.
 668     __ z_ltgr(Z_RET, Z_RET);           // Set NE flag in CR.
 669     __ z_br(Z_R14);
 670 
 671     return start;
 672   }
 673 
 674   // Return address of code to be called from code generated by
 675   // MacroAssembler::verify_oop.
 676   //
 677   // Don't generate, rather use C++ code.
 678   address generate_verify_oop_subroutine() {
 679     // Don't generate a StubCodeMark, because no code is generated!
 680     // Generating the mark triggers notifying the oprofile jvmti agent
 681     // about the dynamic code generation, but the stub without
 682     // code (code_size == 0) confuses opjitconv
 683     // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
 684 
 685     address start = 0;
 686     return start;
 687   }
 688 
 689   // Generate pre-write barrier for array.
 690   //
 691   // Input:
 692   //    addr  - register containing starting address
 693   //    count - register containing element count
 694   //
 695   // The input registers are overwritten.
 696   void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
 697 
 698     BarrierSet* const bs = Universe::heap()->barrier_set();
 699     switch (bs->kind()) {
 700       case BarrierSet::G1BarrierSet:
 701         // With G1, don't generate the call if we statically know that the target is uninitialized.
 702         if (!dest_uninitialized) {
 703           // Is marking active?
 704           Label filtered;
 705           assert_different_registers(addr,  Z_R0_scratch);  // would be destroyed by push_frame()
 706           assert_different_registers(count, Z_R0_scratch);  // would be destroyed by push_frame()
 707           Register Rtmp1 = Z_R0_scratch;
 708           const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() +
 709                                              SATBMarkQueue::byte_offset_of_active());
 710           if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 711             __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
 712           } else {
 713             guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 714             __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
 715           }
 716           __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
 717 
 718           // __ push_frame_abi160(0);  // implicitly done in save_live_registers()
 719           (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers);
 720           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), addr, count);
 721           (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers);
 722           // __ pop_frame();  // implicitly done in restore_live_registers()
 723 
 724           __ bind(filtered);
 725         }
 726         break;
 727       case BarrierSet::CardTableModRef:
 728       case BarrierSet::ModRef:
 729         break;
 730       default:
 731         ShouldNotReachHere();
 732     }
 733   }
 734 
 735   // Generate post-write barrier for array.
 736   //
 737   // Input:
 738   //    addr  - register containing starting address
 739   //    count - register containing element count
 740   //
 741   // The input registers are overwritten.
 742   void gen_write_ref_array_post_barrier(Register addr, Register count, bool branchToEnd) {
 743     BarrierSet* const bs = Universe::heap()->barrier_set();
 744     switch (bs->kind()) {
 745       case BarrierSet::G1BarrierSet:
 746         {
 747           if (branchToEnd) {
 748             assert_different_registers(addr,  Z_R0_scratch);  // would be destroyed by push_frame()
 749             assert_different_registers(count, Z_R0_scratch);  // would be destroyed by push_frame()
 750             // __ push_frame_abi160(0);  // implicitly done in save_live_registers()
 751             (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers);
 752             __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
 753             (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers);
 754             // __ pop_frame();   // implicitly done in restore_live_registers()
 755           } else {
 756             // Tail call: call c and return to stub caller.
 757             address entry_point = CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
 758             __ lgr_if_needed(Z_ARG1, addr);
 759             __ lgr_if_needed(Z_ARG2, count);
 760             __ load_const(Z_R1, entry_point);
 761             __ z_br(Z_R1); // Branch without linking, callee will return to stub caller.
 762           }
 763         }
 764         break;
 765       case BarrierSet::CardTableModRef:
 766         // These cases formerly known as
 767         //   void array_store_check(Register addr, Register count, bool branchToEnd).
 768         {
 769           NearLabel doXC, done;
 770           CardTableModRefBS* ctbs = barrier_set_cast<CardTableModRefBS>(bs);
 771           CardTable* ct = ctbs->card_table();
 772           assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
 773           assert_different_registers(Z_R0, Z_R1, addr, count);
 774 
 775           // Nothing to do if count <= 0.
 776           if (branchToEnd) {
 777             __ compare64_and_branch(count, (intptr_t) 0, Assembler::bcondNotHigh, done);
 778           } else {
 779             __ z_ltgr(count, count);
 780             __ z_bcr(Assembler::bcondNotPositive, Z_R14);
 781           }
 782 
 783           // Note: We can't combine the shifts. We could lose a carry
 784           // from calculating the array end address.
 785           // count = (count-1)*BytesPerHeapOop + addr
 786           // Count holds addr of last oop in array then.
 787           __ z_sllg(count, count, LogBytesPerHeapOop);
 788           __ add2reg_with_index(count, -BytesPerHeapOop, count, addr);
 789 
 790           // Get base address of card table.
 791           __ load_const_optimized(Z_R1, (address)ct->byte_map_base());
 792 
 793           // count = (count>>shift) - (addr>>shift)
 794           __ z_srlg(addr,  addr,  CardTable::card_shift);
 795           __ z_srlg(count, count, CardTable::card_shift);
 796 
 797           // Prefetch first elements of card table for update.
 798           if (VM_Version::has_Prefetch()) {
 799             __ z_pfd(0x02, 0, addr, Z_R1);
 800           }
 801 
 802           // Special case: clear just one byte.
 803           __ clear_reg(Z_R0, true, false);  // Used for doOneByte.
 804           __ z_sgr(count, addr);            // Count = n-1 now, CC used for brc below.
 805           __ z_stc(Z_R0, 0, addr, Z_R1);    // Must preserve CC from z_sgr.
 806           if (branchToEnd) {
 807             __ z_brz(done);
 808           } else {
 809             __ z_bcr(Assembler::bcondZero, Z_R14);
 810           }
 811 
 812           __ z_cghi(count, 255);
 813           __ z_brnh(doXC);
 814 
 815           // MVCLE: clear a long area.
 816           // Start addr of card table range = base + addr.
 817           // # bytes in    card table range = (count + 1)
 818           __ add2reg_with_index(Z_R0, 0, Z_R1, addr);
 819           __ add2reg(Z_R1, 1, count);
 820 
 821           // dirty hack:
 822           // There are just two callers. Both pass
 823           // count in Z_ARG3 = Z_R4
 824           // addr  in Z_ARG2 = Z_R3
 825           // ==> use Z_ARG2 as src len reg = 0
 826           //         Z_ARG1 as src addr (ignored)
 827           assert(count == Z_ARG3, "count: unexpected register number");
 828           assert(addr  == Z_ARG2, "addr:  unexpected register number");
 829           __ clear_reg(Z_ARG2, true, false);
 830 
 831           __ MacroAssembler::move_long_ext(Z_R0, Z_ARG1, 0);
 832 
 833           if (branchToEnd) {
 834             __ z_bru(done);
 835           } else {
 836             __ z_bcr(Assembler::bcondAlways, Z_R14);
 837           }
 838 
 839           // XC: clear a short area.
 840           Label XC_template; // Instr template, never exec directly!
 841           __ bind(XC_template);
 842           __ z_xc(0, 0, addr, 0, addr);
 843 
 844           __ bind(doXC);
 845           // start addr of card table range = base + addr
 846           // end   addr of card table range = base + addr + count
 847           __ add2reg_with_index(addr, 0, Z_R1, addr);
 848 
 849           if (VM_Version::has_ExecuteExtensions()) {
 850             __ z_exrl(count, XC_template);   // Execute XC with var. len.
 851           } else {
 852             __ z_larl(Z_R1, XC_template);
 853             __ z_ex(count, 0, Z_R0, Z_R1);   // Execute XC with var. len.
 854           }
 855           if (!branchToEnd) {
 856             __ z_br(Z_R14);
 857           }
 858 
 859           __ bind(done);
 860         }
 861         break;
 862       case BarrierSet::ModRef:
 863         if (!branchToEnd) { __ z_br(Z_R14); }
 864         break;
 865       default:
 866         ShouldNotReachHere();
 867     }
 868   }
 869 
 870 
 871   // This is to test that the count register contains a positive int value.
 872   // Required because C2 does not respect int to long conversion for stub calls.
 873   void assert_positive_int(Register count) {
 874 #ifdef ASSERT
 875     __ z_srag(Z_R0, count, 31);  // Just leave the sign (must be zero) in Z_R0.
 876     __ asm_assert_eq("missing zero extend", 0xAFFE);
 877 #endif
 878   }
 879 
 880   //  Generate overlap test for array copy stubs.
 881   //  If no actual overlap is detected, control is transferred to the
 882   //  "normal" copy stub (entry address passed in disjoint_copy_target).
 883   //  Otherwise, execution continues with the code generated by the
 884   //  caller of array_overlap_test.
 885   //
 886   //  Input:
 887   //    Z_ARG1    - from
 888   //    Z_ARG2    - to
 889   //    Z_ARG3    - element count
 890   void array_overlap_test(address disjoint_copy_target, int log2_elem_size) {
 891     __ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh,
 892                                                     disjoint_copy_target, /*len64=*/true, /*has_sign=*/false);
 893 
 894     Register index = Z_ARG3;
 895     if (log2_elem_size > 0) {
 896       __ z_sllg(Z_R1, Z_ARG3, log2_elem_size);  // byte count
 897       index = Z_R1;
 898     }
 899     __ add2reg_with_index(Z_R1, 0, index, Z_ARG1);  // First byte after "from" range.
 900 
 901     __ MacroAssembler::compare_and_branch_optimized(Z_R1, Z_ARG2, Assembler::bcondNotHigh,
 902                                                     disjoint_copy_target, /*len64=*/true, /*has_sign=*/false);
 903 
 904     // Destructive overlap: let caller generate code for that.
 905   }
 906 
 907   //  Generate stub for disjoint array copy. If "aligned" is true, the
 908   //  "from" and "to" addresses are assumed to be heapword aligned.
 909   //
 910   //  Arguments for generated stub:
 911   //      from:  Z_ARG1
 912   //      to:    Z_ARG2
 913   //      count: Z_ARG3 treated as signed
 914   void generate_disjoint_copy(bool aligned, int element_size,
 915                               bool branchToEnd,
 916                               bool restoreArgs) {
 917     // This is the zarch specific stub generator for general array copy tasks.
 918     // It has the following prereqs and features:
 919     //
 920     // - No destructive overlap allowed (else unpredictable results).
 921     // - Destructive overlap does not exist if the leftmost byte of the target
 922     //   does not coincide with any of the source bytes (except the leftmost).
 923     //
 924     //   Register usage upon entry:
 925     //      Z_ARG1 == Z_R2 :   address of source array
 926     //      Z_ARG2 == Z_R3 :   address of target array
 927     //      Z_ARG3 == Z_R4 :   length of operands (# of elements on entry)
 928     //
 929     // Register usage within the generator:
 930     // - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len).
 931     //                 Used as pair register operand in complex moves, scratch registers anyway.
 932     // - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg).
 933     //                  Same as R0/R1, but no scratch register.
 934     // - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine,
 935     //                          but they might get temporarily overwritten.
 936 
 937     Register  save_reg    = Z_ARG4;   // (= Z_R5), holds original target operand address for restore.
 938 
 939     {
 940       Register   llen_reg = Z_R1;     // Holds left operand len (odd reg).
 941       Register  laddr_reg = Z_R0;     // Holds left operand addr (even reg), overlaps with data_reg.
 942       Register   rlen_reg = Z_R5;     // Holds right operand len (odd reg), overlaps with save_reg.
 943       Register  raddr_reg = Z_R4;     // Holds right operand addr (even reg), overlaps with len_reg.
 944 
 945       Register   data_reg = Z_R0;     // Holds copied data chunk in alignment process and copy loop.
 946       Register    len_reg = Z_ARG3;   // Holds operand len (#elements at entry, #bytes shortly after).
 947       Register    dst_reg = Z_ARG2;   // Holds left (target)  operand addr.
 948       Register    src_reg = Z_ARG1;   // Holds right (source) operand addr.
 949 
 950       Label     doMVCLOOP, doMVCLOOPcount, doMVCLOOPiterate;
 951       Label     doMVCUnrolled;
 952       NearLabel doMVC,  doMVCgeneral, done;
 953       Label     MVC_template;
 954       address   pcMVCblock_b, pcMVCblock_e;
 955 
 956       bool      usedMVCLE       = true;
 957       bool      usedMVCLOOP     = true;
 958       bool      usedMVCUnrolled = false;
 959       bool      usedMVC         = false;
 960       bool      usedMVCgeneral  = false;
 961 
 962       int       stride;
 963       Register  stride_reg;
 964       Register  ix_reg;
 965 
 966       assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2");
 967       unsigned int log2_size = exact_log2(element_size);
 968 
 969       switch (element_size) {
 970         case 1:  BLOCK_COMMENT("ARRAYCOPY DISJOINT byte  {"); break;
 971         case 2:  BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break;
 972         case 4:  BLOCK_COMMENT("ARRAYCOPY DISJOINT int   {"); break;
 973         case 8:  BLOCK_COMMENT("ARRAYCOPY DISJOINT long  {"); break;
 974         default: BLOCK_COMMENT("ARRAYCOPY DISJOINT       {"); break;
 975       }
 976 
 977       assert_positive_int(len_reg);
 978 
 979       BLOCK_COMMENT("preparation {");
 980 
 981       // No copying if len <= 0.
 982       if (branchToEnd) {
 983         __ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done);
 984       } else {
 985         if (VM_Version::has_CompareBranch()) {
 986           __ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14);
 987         } else {
 988           __ z_ltgr(len_reg, len_reg);
 989           __ z_bcr(Assembler::bcondNotPositive, Z_R14);
 990         }
 991       }
 992 
 993       // Prefetch just one cache line. Speculative opt for short arrays.
 994       // Do not use Z_R1 in prefetch. Is undefined here.
 995       if (VM_Version::has_Prefetch()) {
 996         __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
 997         __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
 998       }
 999 
1000       BLOCK_COMMENT("} preparation");
1001 
1002       // Save args only if really needed.
1003       // Keep len test local to branch. Is generated only once.
1004 
1005       BLOCK_COMMENT("mode selection {");
1006 
1007       // Special handling for arrays with only a few elements.
1008       // Nothing fancy: just an executed MVC.
1009       if (log2_size > 0) {
1010         __ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1.
1011       }
1012       if (element_size != 8) {
1013         __ z_cghi(len_reg, 256/element_size);
1014         __ z_brnh(doMVC);
1015         usedMVC = true;
1016       }
1017       if (element_size == 8) { // Long and oop arrays are always aligned.
1018         __ z_cghi(len_reg, 256/element_size);
1019         __ z_brnh(doMVCUnrolled);
1020         usedMVCUnrolled = true;
1021       }
1022 
1023       // Prefetch another cache line. We, for sure, have more than one line to copy.
1024       if (VM_Version::has_Prefetch()) {
1025         __ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access.
1026         __ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access.
1027       }
1028 
1029       if (restoreArgs) {
1030         // Remember entry value of ARG2 to restore all arguments later from that knowledge.
1031         __ z_lgr(save_reg, dst_reg);
1032       }
1033 
1034       __ z_cghi(len_reg, 4096/element_size);
1035       if (log2_size == 0) {
1036         __ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes
1037       }
1038       __ z_brnh(doMVCLOOP);
1039 
1040       // Fall through to MVCLE case.
1041 
1042       BLOCK_COMMENT("} mode selection");
1043 
1044       // MVCLE: for long arrays
1045       //   DW aligned: Best performance for sizes > 4kBytes.
1046       //   unaligned:  Least complex for sizes > 256 bytes.
1047       if (usedMVCLE) {
1048         BLOCK_COMMENT("mode MVCLE {");
1049 
1050         // Setup registers for mvcle.
1051         //__ z_lgr(llen_reg, len_reg);// r1 <- r4  #bytes already in Z_R1, aka llen_reg.
1052         __ z_lgr(laddr_reg, dst_reg); // r0 <- r3
1053         __ z_lgr(raddr_reg, src_reg); // r4 <- r2
1054         __ z_lgr(rlen_reg, llen_reg); // r5 <- r1
1055 
1056         __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0);    // special: bypass cache
1057         // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache.
1058         // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0);
1059 
1060         if (restoreArgs) {
1061           // MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs.
1062           // Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required.
1063           // Len_reg (Z_ARG3) is destroyed and must be restored.
1064           __ z_slgr(laddr_reg, dst_reg);    // copied #bytes
1065           if (log2_size > 0) {
1066             __ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements.
1067           } else {
1068             __ z_lgr(Z_ARG3, laddr_reg);
1069           }
1070         }
1071         if (branchToEnd) {
1072           __ z_bru(done);
1073         } else {
1074           __ z_br(Z_R14);
1075         }
1076         BLOCK_COMMENT("} mode MVCLE");
1077       }
1078       // No fallthru possible here.
1079 
1080       //  MVCUnrolled: for short, aligned arrays.
1081 
1082       if (usedMVCUnrolled) {
1083         BLOCK_COMMENT("mode MVC unrolled {");
1084         stride = 8;
1085 
1086         // Generate unrolled MVC instructions.
1087         for (int ii = 32; ii > 1; ii--) {
1088           __ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy
1089           if (branchToEnd) {
1090             __ z_bru(done);
1091           } else {
1092             __ z_br(Z_R14);
1093           }
1094         }
1095 
1096         pcMVCblock_b = __ pc();
1097         __ z_mvc(0, 1 * stride-1, dst_reg, 0, src_reg); // 8 byte copy
1098         if (branchToEnd) {
1099           __ z_bru(done);
1100         } else {
1101           __ z_br(Z_R14);
1102         }
1103 
1104         pcMVCblock_e = __ pc();
1105         Label MVC_ListEnd;
1106         __ bind(MVC_ListEnd);
1107 
1108         // This is an absolute fast path:
1109         // - Array len in bytes must be not greater than 256.
1110         // - Array len in bytes must be an integer mult of DW
1111         //   to save expensive handling of trailing bytes.
1112         // - Argument restore is not done,
1113         //   i.e. previous code must not alter arguments (this code doesn't either).
1114 
1115         __ bind(doMVCUnrolled);
1116 
1117         // Avoid mul, prefer shift where possible.
1118         // Combine shift right (for #DW) with shift left (for block size).
1119         // Set CC for zero test below (asm_assert).
1120         // Note: #bytes comes in Z_R1, #DW in len_reg.
1121         unsigned int MVCblocksize    = pcMVCblock_e - pcMVCblock_b;
1122         unsigned int logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning).
1123 
1124         if (log2_size > 0) { // Len was scaled into Z_R1.
1125           switch (MVCblocksize) {
1126 
1127             case  8: logMVCblocksize = 3;
1128                      __ z_ltgr(Z_R0, Z_R1); // #bytes is index
1129                      break;                 // reasonable size, use shift
1130 
1131             case 16: logMVCblocksize = 4;
1132                      __ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size);
1133                      break;                 // reasonable size, use shift
1134 
1135             default: logMVCblocksize = 0;
1136                      __ z_ltgr(Z_R0, len_reg); // #DW for mul
1137                      break;                 // all other sizes: use mul
1138           }
1139         } else {
1140           guarantee(log2_size, "doMVCUnrolled: only for DW entities");
1141         }
1142 
1143         // This test (and branch) is redundant. Previous code makes sure that
1144         //  - element count > 0
1145         //  - element size == 8.
1146         // Thus, len reg should never be zero here. We insert an asm_assert() here,
1147         // just to double-check and to be on the safe side.
1148         __ asm_assert(false, "zero len cannot occur", 99);
1149 
1150         __ z_larl(Z_R1, MVC_ListEnd);        // Get addr of last instr block.
1151         // Avoid mul, prefer shift where possible.
1152         if (logMVCblocksize == 0) {
1153           __ z_mghi(Z_R0, MVCblocksize);
1154         }
1155         __ z_slgr(Z_R1, Z_R0);
1156         __ z_br(Z_R1);
1157         BLOCK_COMMENT("} mode MVC unrolled");
1158       }
1159       // No fallthru possible here.
1160 
1161       // MVC execute template
1162       // Must always generate. Usage may be switched on below.
1163       // There is no suitable place after here to put the template.
1164       __ bind(MVC_template);
1165       __ z_mvc(0,0,dst_reg,0,src_reg);      // Instr template, never exec directly!
1166 
1167 
1168       // MVC Loop: for medium-sized arrays
1169 
1170       // Only for DW aligned arrays (src and dst).
1171       // #bytes to copy must be at least 256!!!
1172       // Non-aligned cases handled separately.
1173       stride     = 256;
1174       stride_reg = Z_R1;   // Holds #bytes when control arrives here.
1175       ix_reg     = Z_ARG3; // Alias for len_reg.
1176 
1177 
1178       if (usedMVCLOOP) {
1179         BLOCK_COMMENT("mode MVC loop {");
1180         __ bind(doMVCLOOP);
1181 
1182         __ z_lcgr(ix_reg, Z_R1);         // Ix runs from -(n-2)*stride to 1*stride (inclusive).
1183         __ z_llill(stride_reg, stride);
1184         __ add2reg(ix_reg, 2*stride);    // Thus: increment ix by 2*stride.
1185 
1186         __ bind(doMVCLOOPiterate);
1187           __ z_mvc(0, stride-1, dst_reg, 0, src_reg);
1188           __ add2reg(dst_reg, stride);
1189           __ add2reg(src_reg, stride);
1190           __ bind(doMVCLOOPcount);
1191           __ z_brxlg(ix_reg, stride_reg, doMVCLOOPiterate);
1192 
1193         // Don 't use add2reg() here, since we must set the condition code!
1194         __ z_aghi(ix_reg, -2*stride);       // Compensate incr from above: zero diff means "all copied".
1195 
1196         if (restoreArgs) {
1197           __ z_lcgr(Z_R1, ix_reg);          // Prepare ix_reg for copy loop, #bytes expected in Z_R1.
1198           __ z_brnz(doMVCgeneral);          // We're not done yet, ix_reg is not zero.
1199 
1200           // ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg.
1201           __ z_slgr(dst_reg, save_reg);     // copied #bytes
1202           __ z_slgr(src_reg, dst_reg);      // = ARG1 (now restored)
1203           if (log2_size) {
1204             __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3.
1205           } else {
1206             __ z_lgr(Z_ARG3, dst_reg);
1207           }
1208           __ z_lgr(Z_ARG2, save_reg);       // ARG2 now restored.
1209 
1210           if (branchToEnd) {
1211             __ z_bru(done);
1212           } else {
1213             __ z_br(Z_R14);
1214           }
1215 
1216         } else {
1217             if (branchToEnd) {
1218               __ z_brz(done);                        // CC set by aghi instr.
1219           } else {
1220               __ z_bcr(Assembler::bcondZero, Z_R14); // We're all done if zero.
1221             }
1222 
1223           __ z_lcgr(Z_R1, ix_reg);    // Prepare ix_reg for copy loop, #bytes expected in Z_R1.
1224           // __ z_bru(doMVCgeneral);  // fallthru
1225         }
1226         usedMVCgeneral = true;
1227         BLOCK_COMMENT("} mode MVC loop");
1228       }
1229       // Fallthru to doMVCgeneral
1230 
1231       // MVCgeneral: for short, unaligned arrays, after other copy operations
1232 
1233       // Somewhat expensive due to use of EX instruction, but simple.
1234       if (usedMVCgeneral) {
1235         BLOCK_COMMENT("mode MVC general {");
1236         __ bind(doMVCgeneral);
1237 
1238         __ add2reg(len_reg, -1, Z_R1);             // Get #bytes-1 for EXECUTE.
1239         if (VM_Version::has_ExecuteExtensions()) {
1240           __ z_exrl(len_reg, MVC_template);        // Execute MVC with variable length.
1241         } else {
1242           __ z_larl(Z_R1, MVC_template);           // Get addr of instr template.
1243           __ z_ex(len_reg, 0, Z_R0, Z_R1);         // Execute MVC with variable length.
1244         }                                          // penalty: 9 ticks
1245 
1246         if (restoreArgs) {
1247           // ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg
1248           __ z_slgr(dst_reg, save_reg);            // Copied #bytes without the "doMVCgeneral" chunk
1249           __ z_slgr(src_reg, dst_reg);             // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk
1250           __ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet.
1251           if (log2_size) {
1252             __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3
1253           } else {
1254              __ z_lgr(Z_ARG3, dst_reg);
1255           }
1256           __ z_lgr(Z_ARG2, save_reg);              // ARG2 now restored.
1257         }
1258 
1259         if (usedMVC) {
1260           if (branchToEnd) {
1261             __ z_bru(done);
1262           } else {
1263             __ z_br(Z_R14);
1264         }
1265         } else {
1266           if (!branchToEnd) __ z_br(Z_R14);
1267         }
1268         BLOCK_COMMENT("} mode MVC general");
1269       }
1270       // Fallthru possible if following block not generated.
1271 
1272       // MVC: for short, unaligned arrays
1273 
1274       // Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks.
1275       // Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4.
1276       if (usedMVC) {
1277         BLOCK_COMMENT("mode MVC {");
1278         __ bind(doMVC);
1279 
1280         // get #bytes-1 for EXECUTE
1281         if (log2_size) {
1282           __ add2reg(Z_R1, -1);                // Length was scaled into Z_R1.
1283         } else {
1284           __ add2reg(Z_R1, -1, len_reg);       // Length was not scaled.
1285         }
1286 
1287         if (VM_Version::has_ExecuteExtensions()) {
1288           __ z_exrl(Z_R1, MVC_template);       // Execute MVC with variable length.
1289         } else {
1290           __ z_lgr(Z_R0, Z_R5);                // Save ARG4, may be unnecessary.
1291           __ z_larl(Z_R5, MVC_template);       // Get addr of instr template.
1292           __ z_ex(Z_R1, 0, Z_R0, Z_R5);        // Execute MVC with variable length.
1293           __ z_lgr(Z_R5, Z_R0);                // Restore ARG4, may be unnecessary.
1294         }
1295 
1296         if (!branchToEnd) {
1297           __ z_br(Z_R14);
1298         }
1299         BLOCK_COMMENT("} mode MVC");
1300       }
1301 
1302       __ bind(done);
1303 
1304       switch (element_size) {
1305         case 1:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break;
1306         case 2:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break;
1307         case 4:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT int  "); break;
1308         case 8:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break;
1309         default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT      "); break;
1310       }
1311     }
1312   }
1313 
1314   // Generate stub for conjoint array copy. If "aligned" is true, the
1315   // "from" and "to" addresses are assumed to be heapword aligned.
1316   //
1317   // Arguments for generated stub:
1318   //   from:  Z_ARG1
1319   //   to:    Z_ARG2
1320   //   count: Z_ARG3 treated as signed
1321   void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) {
1322 
1323     // This is the zarch specific stub generator for general array copy tasks.
1324     // It has the following prereqs and features:
1325     //
1326     // - Destructive overlap exists and is handled by reverse copy.
1327     // - Destructive overlap exists if the leftmost byte of the target
1328     //   does coincide with any of the source bytes (except the leftmost).
1329     // - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride)
1330     // - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine.
1331     // - Z_ARG3 is USED but preserved by the stub routine.
1332     // - Z_ARG4 is used as index register and is thus KILLed.
1333     //
1334     {
1335       Register stride_reg = Z_R1;     // Stride & compare value in loop (negative element_size).
1336       Register   data_reg = Z_R0;     // Holds value of currently processed element.
1337       Register     ix_reg = Z_ARG4;   // Holds byte index of currently processed element.
1338       Register    len_reg = Z_ARG3;   // Holds length (in #elements) of arrays.
1339       Register    dst_reg = Z_ARG2;   // Holds left  operand addr.
1340       Register    src_reg = Z_ARG1;   // Holds right operand addr.
1341 
1342       assert(256%element_size == 0, "Element size must be power of 2.");
1343       assert(element_size     <= 8, "Can't handle more than DW units.");
1344 
1345       switch (element_size) {
1346         case 1:  BLOCK_COMMENT("ARRAYCOPY CONJOINT byte  {"); break;
1347         case 2:  BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break;
1348         case 4:  BLOCK_COMMENT("ARRAYCOPY CONJOINT int   {"); break;
1349         case 8:  BLOCK_COMMENT("ARRAYCOPY CONJOINT long  {"); break;
1350         default: BLOCK_COMMENT("ARRAYCOPY CONJOINT       {"); break;
1351       }
1352 
1353       assert_positive_int(len_reg);
1354 
1355       if (VM_Version::has_Prefetch()) {
1356         __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
1357         __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
1358       }
1359 
1360       unsigned int log2_size = exact_log2(element_size);
1361       if (log2_size) {
1362         __ z_sllg(ix_reg, len_reg, log2_size);
1363       } else {
1364         __ z_lgr(ix_reg, len_reg);
1365       }
1366 
1367       // Optimize reverse copy loop.
1368       // Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks.
1369       // Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic.
1370       // Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length.
1371 
1372       Label countLoop1;
1373       Label copyLoop1;
1374       Label skipBY;
1375       Label skipHW;
1376       int   stride = -8;
1377 
1378       __ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop.
1379 
1380       if (element_size == 8)    // Nothing to do here.
1381         __ z_bru(countLoop1);
1382       else {                    // Do not generate dead code.
1383         __ z_tmll(ix_reg, 7);   // Check the "odd" bits.
1384         __ z_bre(countLoop1);   // There are none, very good!
1385       }
1386 
1387       if (log2_size == 0) {     // Handle leftover Byte.
1388         __ z_tmll(ix_reg, 1);
1389         __ z_bre(skipBY);
1390         __ z_lb(data_reg,   -1, ix_reg, src_reg);
1391         __ z_stcy(data_reg, -1, ix_reg, dst_reg);
1392         __ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI.
1393         __ bind(skipBY);
1394         // fallthru
1395       }
1396       if (log2_size <= 1) {     // Handle leftover HW.
1397         __ z_tmll(ix_reg, 2);
1398         __ z_bre(skipHW);
1399         __ z_lhy(data_reg,  -2, ix_reg, src_reg);
1400         __ z_sthy(data_reg, -2, ix_reg, dst_reg);
1401         __ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI.
1402         __ bind(skipHW);
1403         __ z_tmll(ix_reg, 4);
1404         __ z_bre(countLoop1);
1405         // fallthru
1406       }
1407       if (log2_size <= 2) {     // There are just 4 bytes (left) that need to be copied.
1408         __ z_ly(data_reg,  -4, ix_reg, src_reg);
1409         __ z_sty(data_reg, -4, ix_reg, dst_reg);
1410         __ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI.
1411         __ z_bru(countLoop1);
1412       }
1413 
1414       // Control can never get to here. Never! Never ever!
1415       __ z_illtrap(0x99);
1416       __ bind(copyLoop1);
1417       __ z_lg(data_reg,  0, ix_reg, src_reg);
1418       __ z_stg(data_reg, 0, ix_reg, dst_reg);
1419       __ bind(countLoop1);
1420       __ z_brxhg(ix_reg, stride_reg, copyLoop1);
1421 
1422       if (!branchToEnd)
1423         __ z_br(Z_R14);
1424 
1425       switch (element_size) {
1426         case 1:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break;
1427         case 2:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break;
1428         case 4:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT int  "); break;
1429         case 8:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break;
1430         default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT      "); break;
1431       }
1432     }
1433   }
1434 
1435   // Generate stub for disjoint byte copy. If "aligned" is true, the
1436   // "from" and "to" addresses are assumed to be heapword aligned.
1437   address generate_disjoint_byte_copy(bool aligned, const char * name) {
1438     StubCodeMark mark(this, "StubRoutines", name);
1439 
1440     // This is the zarch specific stub generator for byte array copy.
1441     // Refer to generate_disjoint_copy for a list of prereqs and features:
1442     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1443     generate_disjoint_copy(aligned, 1, false, false);
1444     return __ addr_at(start_off);
1445   }
1446 
1447 
1448   address generate_disjoint_short_copy(bool aligned, const char * name) {
1449     StubCodeMark mark(this, "StubRoutines", name);
1450     // This is the zarch specific stub generator for short array copy.
1451     // Refer to generate_disjoint_copy for a list of prereqs and features:
1452     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1453     generate_disjoint_copy(aligned, 2, false, false);
1454     return __ addr_at(start_off);
1455   }
1456 
1457 
1458   address generate_disjoint_int_copy(bool aligned, const char * name) {
1459     StubCodeMark mark(this, "StubRoutines", name);
1460     // This is the zarch specific stub generator for int array copy.
1461     // Refer to generate_disjoint_copy for a list of prereqs and features:
1462     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1463     generate_disjoint_copy(aligned, 4, false, false);
1464     return __ addr_at(start_off);
1465   }
1466 
1467 
1468   address generate_disjoint_long_copy(bool aligned, const char * name) {
1469     StubCodeMark mark(this, "StubRoutines", name);
1470     // This is the zarch specific stub generator for long array copy.
1471     // Refer to generate_disjoint_copy for a list of prereqs and features:
1472     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1473     generate_disjoint_copy(aligned, 8, false, false);
1474     return __ addr_at(start_off);
1475   }
1476 
1477 
1478   address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
1479     StubCodeMark mark(this, "StubRoutines", name);
1480     // This is the zarch specific stub generator for oop array copy.
1481     // Refer to generate_disjoint_copy for a list of prereqs and features.
1482     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1483     unsigned int size      = UseCompressedOops ? 4 : 8;
1484 
1485     gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized);
1486 
1487     generate_disjoint_copy(aligned, size, true, true);
1488 
1489     gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false);
1490 
1491     return __ addr_at(start_off);
1492   }
1493 
1494 
1495   address generate_conjoint_byte_copy(bool aligned, const char * name) {
1496     StubCodeMark mark(this, "StubRoutines", name);
1497     // This is the zarch specific stub generator for overlapping byte array copy.
1498     // Refer to generate_conjoint_copy for a list of prereqs and features:
1499     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1500     address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy()
1501                                        : StubRoutines::jbyte_disjoint_arraycopy();
1502 
1503     array_overlap_test(nooverlap_target, 0); // Branch away to nooverlap_target if disjoint.
1504     generate_conjoint_copy(aligned, 1, false);
1505 
1506     return __ addr_at(start_off);
1507   }
1508 
1509 
1510   address generate_conjoint_short_copy(bool aligned, const char * name) {
1511     StubCodeMark mark(this, "StubRoutines", name);
1512     // This is the zarch specific stub generator for overlapping short array copy.
1513     // Refer to generate_conjoint_copy for a list of prereqs and features:
1514     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1515     address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy()
1516                                        : StubRoutines::jshort_disjoint_arraycopy();
1517 
1518     array_overlap_test(nooverlap_target, 1); // Branch away to nooverlap_target if disjoint.
1519     generate_conjoint_copy(aligned, 2, false);
1520 
1521     return __ addr_at(start_off);
1522   }
1523 
1524   address generate_conjoint_int_copy(bool aligned, const char * name) {
1525     StubCodeMark mark(this, "StubRoutines", name);
1526     // This is the zarch specific stub generator for overlapping int array copy.
1527     // Refer to generate_conjoint_copy for a list of prereqs and features:
1528 
1529     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1530     address nooverlap_target = aligned ? StubRoutines::arrayof_jint_disjoint_arraycopy()
1531                                        : StubRoutines::jint_disjoint_arraycopy();
1532 
1533     array_overlap_test(nooverlap_target, 2); // Branch away to nooverlap_target if disjoint.
1534     generate_conjoint_copy(aligned, 4, false);
1535 
1536     return __ addr_at(start_off);
1537   }
1538 
1539   address generate_conjoint_long_copy(bool aligned, const char * name) {
1540     StubCodeMark mark(this, "StubRoutines", name);
1541     // This is the zarch specific stub generator for overlapping long array copy.
1542     // Refer to generate_conjoint_copy for a list of prereqs and features:
1543 
1544     unsigned int start_off   = __ offset();  // Remember stub start address (is rtn value).
1545     address nooverlap_target = aligned ? StubRoutines::arrayof_jlong_disjoint_arraycopy()
1546                                        : StubRoutines::jlong_disjoint_arraycopy();
1547 
1548     array_overlap_test(nooverlap_target, 3); // Branch away to nooverlap_target if disjoint.
1549     generate_conjoint_copy(aligned, 8, false);
1550 
1551     return __ addr_at(start_off);
1552   }
1553 
1554   address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
1555     StubCodeMark mark(this, "StubRoutines", name);
1556     // This is the zarch specific stub generator for overlapping oop array copy.
1557     // Refer to generate_conjoint_copy for a list of prereqs and features.
1558     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1559     unsigned int size      = UseCompressedOops ? 4 : 8;
1560     unsigned int shift     = UseCompressedOops ? 2 : 3;
1561 
1562     address nooverlap_target = aligned ? StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized)
1563                                        : StubRoutines::oop_disjoint_arraycopy(dest_uninitialized);
1564 
1565     // Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier.
1566     array_overlap_test(nooverlap_target, shift);  // Branch away to nooverlap_target if disjoint.
1567 
1568     gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized);
1569 
1570     generate_conjoint_copy(aligned, size, true);  // Must preserve ARG2, ARG3.
1571 
1572     gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false);
1573 
1574     return __ addr_at(start_off);
1575   }
1576 
1577 
1578   void generate_arraycopy_stubs() {
1579 
1580     // Note: the disjoint stubs must be generated first, some of
1581     // the conjoint stubs use them.
1582     StubRoutines::_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy (false, "jbyte_disjoint_arraycopy");
1583     StubRoutines::_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
1584     StubRoutines::_jint_disjoint_arraycopy       = generate_disjoint_int_copy  (false, "jint_disjoint_arraycopy");
1585     StubRoutines::_jlong_disjoint_arraycopy      = generate_disjoint_long_copy (false, "jlong_disjoint_arraycopy");
1586     StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_oop_copy  (false, "oop_disjoint_arraycopy", false);
1587     StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy  (false, "oop_disjoint_arraycopy_uninit", true);
1588 
1589     StubRoutines::_arrayof_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy (true, "arrayof_jbyte_disjoint_arraycopy");
1590     StubRoutines::_arrayof_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
1591     StubRoutines::_arrayof_jint_disjoint_arraycopy       = generate_disjoint_int_copy  (true, "arrayof_jint_disjoint_arraycopy");
1592     StubRoutines::_arrayof_jlong_disjoint_arraycopy      = generate_disjoint_long_copy (true, "arrayof_jlong_disjoint_arraycopy");
1593     StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy  (true, "arrayof_oop_disjoint_arraycopy", false);
1594     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy  (true, "arrayof_oop_disjoint_arraycopy_uninit", true);
1595 
1596     StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy (false, "jbyte_arraycopy");
1597     StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, "jshort_arraycopy");
1598     StubRoutines::_jint_arraycopy            = generate_conjoint_int_copy  (false, "jint_arraycopy");
1599     StubRoutines::_jlong_arraycopy           = generate_conjoint_long_copy (false, "jlong_arraycopy");
1600     StubRoutines::_oop_arraycopy             = generate_conjoint_oop_copy  (false, "oop_arraycopy", false);
1601     StubRoutines::_oop_arraycopy_uninit      = generate_conjoint_oop_copy  (false, "oop_arraycopy_uninit", true);
1602 
1603     StubRoutines::_arrayof_jbyte_arraycopy      = generate_conjoint_byte_copy (true, "arrayof_jbyte_arraycopy");
1604     StubRoutines::_arrayof_jshort_arraycopy     = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
1605     StubRoutines::_arrayof_jint_arraycopy       = generate_conjoint_int_copy  (true, "arrayof_jint_arraycopy");
1606     StubRoutines::_arrayof_jlong_arraycopy      = generate_conjoint_long_copy (true, "arrayof_jlong_arraycopy");
1607     StubRoutines::_arrayof_oop_arraycopy        = generate_conjoint_oop_copy  (true, "arrayof_oop_arraycopy", false);
1608     StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy  (true, "arrayof_oop_arraycopy_uninit", true);
1609   }
1610 
1611   void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) {
1612 
1613     // safefetch signatures:
1614     //   int      SafeFetch32(int*      adr, int      errValue);
1615     //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
1616     //
1617     // arguments:
1618     //   Z_ARG1 = adr
1619     //   Z_ARG2 = errValue
1620     //
1621     // result:
1622     //   Z_RET  = *adr or errValue
1623 
1624     StubCodeMark mark(this, "StubRoutines", name);
1625 
1626     // entry point
1627     // Load *adr into Z_ARG2, may fault.
1628     *entry = *fault_pc = __ pc();
1629     switch (size) {
1630       case 4:
1631         // Sign extended int32_t.
1632         __ z_lgf(Z_ARG2, 0, Z_ARG1);
1633         break;
1634       case 8:
1635         // int64_t
1636         __ z_lg(Z_ARG2, 0, Z_ARG1);
1637         break;
1638       default:
1639         ShouldNotReachHere();
1640     }
1641 
1642     // Return errValue or *adr.
1643     *continuation_pc = __ pc();
1644     __ z_lgr(Z_RET, Z_ARG2);
1645     __ z_br(Z_R14);
1646 
1647   }
1648 
1649   // Call interface for AES_encryptBlock, AES_decryptBlock stubs.
1650   //
1651   //   Z_ARG1 - source data block. Ptr to leftmost byte to be processed.
1652   //   Z_ARG2 - destination data block. Ptr to leftmost byte to be stored.
1653   //            For in-place encryption/decryption, ARG1 and ARG2 can point
1654   //            to the same piece of storage.
1655   //   Z_ARG3 - Crypto key address (expanded key). The first n bits of
1656   //            the expanded key constitute the original AES-<n> key (see below).
1657   //
1658   //   Z_RET  - return value. First unprocessed byte offset in src buffer.
1659   //
1660   // Some remarks:
1661   //   The crypto key, as passed from the caller to these encryption stubs,
1662   //   is a so-called expanded key. It is derived from the original key
1663   //   by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule
1664   //   With the expanded key, the cipher/decipher task is decomposed in
1665   //   multiple, less complex steps, called rounds. Sun SPARC and Intel
1666   //   processors obviously implement support for those less complex steps.
1667   //   z/Architecture provides instructions for full cipher/decipher complexity.
1668   //   Therefore, we need the original, not the expanded key here.
1669   //   Luckily, the first n bits of an AES-<n> expanded key are formed
1670   //   by the original key itself. That takes us out of trouble. :-)
1671   //   The key length (in bytes) relation is as follows:
1672   //     original    expanded   rounds  key bit     keylen
1673   //    key bytes   key bytes            length   in words
1674   //           16         176       11      128         44
1675   //           24         208       13      192         52
1676   //           32         240       15      256         60
1677   //
1678   // The crypto instructions used in the AES* stubs have some specific register requirements.
1679   //   Z_R0   holds the crypto function code. Please refer to the KM/KMC instruction
1680   //          description in the "z/Architecture Principles of Operation" manual for details.
1681   //   Z_R1   holds the parameter block address. The parameter block contains the cryptographic key
1682   //          (KM instruction) and the chaining value (KMC instruction).
1683   //   dst    must designate an even-numbered register, holding the address of the output message.
1684   //   src    must designate an even/odd register pair, holding the address/length of the original message
1685 
1686   // Helper function which generates code to
1687   //  - load the function code in register fCode (== Z_R0).
1688   //  - load the data block length (depends on cipher function) into register srclen if requested.
1689   //  - is_decipher switches between cipher/decipher function codes
1690   //  - set_len requests (if true) loading the data block length in register srclen
1691   void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) {
1692 
1693     BLOCK_COMMENT("Set fCode {"); {
1694       Label fCode_set;
1695       int   mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher;
1696       bool  identical_dataBlk_len =  (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk)
1697                                   && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk);
1698       // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256.
1699       __ z_cghi(keylen, 52); // Check only once at the beginning. keylen and fCode may share the same register.
1700 
1701       __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode);
1702       if (!identical_dataBlk_len) {
1703         __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk);
1704       }
1705       __ z_brl(fCode_set);  // keyLen <  52: AES128
1706 
1707       __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode);
1708       if (!identical_dataBlk_len) {
1709         __ z_lghi(srclen, VM_Version::Cipher::_AES192_dataBlk);
1710       }
1711       __ z_bre(fCode_set);  // keyLen == 52: AES192
1712 
1713       __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode);
1714       if (!identical_dataBlk_len) {
1715         __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk);
1716       }
1717       // __ z_brh(fCode_set);  // keyLen <  52: AES128           // fallthru
1718 
1719       __ bind(fCode_set);
1720       if (identical_dataBlk_len) {
1721         __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk);
1722       }
1723     }
1724     BLOCK_COMMENT("} Set fCode");
1725   }
1726 
1727   // Push a parameter block for the cipher/decipher instruction on the stack.
1728   // Layout of the additional stack space allocated for AES_cipherBlockChaining:
1729   //
1730   //   |        |
1731   //   +--------+ <-- SP before expansion
1732   //   |        |
1733   //   :        :  alignment loss, 0..(AES_parmBlk_align-8) bytes
1734   //   |        |
1735   //   +--------+
1736   //   |        |
1737   //   :        :  space for parameter block, size VM_Version::Cipher::_AES*_parmBlk_C
1738   //   |        |
1739   //   +--------+ <-- parmBlk, octoword-aligned, start of parameter block
1740   //   |        |
1741   //   :        :  additional stack space for spills etc., size AES_parmBlk_addspace, DW @ Z_SP not usable!!!
1742   //   |        |
1743   //   +--------+ <-- Z_SP after expansion
1744 
1745   void generate_push_Block(int dataBlk_len, int parmBlk_len, int crypto_fCode,
1746                            Register parmBlk, Register keylen, Register fCode, Register cv, Register key) {
1747     const int AES_parmBlk_align    = 32;  // octoword alignment.
1748     const int AES_parmBlk_addspace = 24;  // Must be sufficiently large to hold all spilled registers
1749                                           // (currently 2) PLUS 1 DW for the frame pointer.
1750 
1751     const int cv_len     = dataBlk_len;
1752     const int key_len    = parmBlk_len - cv_len;
1753     // This len must be known at JIT compile time. Only then are we able to recalc the SP before resize.
1754     // We buy this knowledge by wasting some (up to AES_parmBlk_align) bytes of stack space.
1755     const int resize_len = cv_len + key_len + AES_parmBlk_align + AES_parmBlk_addspace;
1756 
1757     // Use parmBlk as temp reg here to hold the frame pointer.
1758     __ resize_frame(-resize_len, parmBlk, true);
1759 
1760     // calculate parmBlk address from updated (resized) SP.
1761     __ add2reg(parmBlk, resize_len - (cv_len + key_len), Z_SP);
1762     __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block.
1763 
1764     // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace+8, parmBlk).
1765     __ z_stg(keylen,  -8, parmBlk);                        // Spill keylen for later use.
1766 
1767     // calculate (SP before resize) from updated SP.
1768     __ add2reg(keylen, resize_len, Z_SP);                  // keylen holds prev SP for now.
1769     __ z_stg(keylen, -16, parmBlk);                        // Spill prev SP for easy revert.
1770 
1771     __ z_mvc(0,      cv_len-1,  parmBlk, 0, cv);     // Copy cv.
1772     __ z_mvc(cv_len, key_len-1, parmBlk, 0, key);    // Copy key.
1773     __ z_lghi(fCode, crypto_fCode);
1774   }
1775 
1776   // NOTE:
1777   //   Before returning, the stub has to copy the chaining value from
1778   //   the parmBlk, where it was updated by the crypto instruction, back
1779   //   to the chaining value array the address of which was passed in the cv argument.
1780   //   As all the available registers are used and modified by KMC, we need to save
1781   //   the key length across the KMC instruction. We do so by spilling it to the stack,
1782   //   just preceding the parmBlk (at (parmBlk - 8)).
1783   void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) {
1784     int       mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher;
1785     Label     parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set;
1786 
1787     BLOCK_COMMENT("push parmBlk {");
1788     if (VM_Version::has_Crypto_AES()   ) { __ z_cghi(keylen, 52); }
1789     if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); }  // keyLen <  52: AES128
1790     if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); }  // keyLen == 52: AES192
1791     if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); }  // keyLen >  52: AES256
1792 
1793     // Security net: requested AES function not available on this CPU.
1794     // NOTE:
1795     //   As of now (March 2015), this safety net is not required. JCE policy files limit the
1796     //   cryptographic strength of the keys used to 128 bit. If we have AES hardware support
1797     //   at all, we have at least AES-128.
1798     __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0);
1799 
1800     if (VM_Version::has_Crypto_AES256()) {
1801       __ bind(parmBlk_256);
1802       generate_push_Block(VM_Version::Cipher::_AES256_dataBlk,
1803                           VM_Version::Cipher::_AES256_parmBlk_C,
1804                           VM_Version::Cipher::_AES256 + mode,
1805                           parmBlk, keylen, fCode, cv, key);
1806       if (VM_Version::has_Crypto_AES128() || VM_Version::has_Crypto_AES192()) {
1807         __ z_bru(parmBlk_set);  // Fallthru otherwise.
1808       }
1809     }
1810 
1811     if (VM_Version::has_Crypto_AES192()) {
1812       __ bind(parmBlk_192);
1813       generate_push_Block(VM_Version::Cipher::_AES192_dataBlk,
1814                           VM_Version::Cipher::_AES192_parmBlk_C,
1815                           VM_Version::Cipher::_AES192 + mode,
1816                           parmBlk, keylen, fCode, cv, key);
1817       if (VM_Version::has_Crypto_AES128()) {
1818         __ z_bru(parmBlk_set);  // Fallthru otherwise.
1819       }
1820     }
1821 
1822     if (VM_Version::has_Crypto_AES128()) {
1823       __ bind(parmBlk_128);
1824       generate_push_Block(VM_Version::Cipher::_AES128_dataBlk,
1825                           VM_Version::Cipher::_AES128_parmBlk_C,
1826                           VM_Version::Cipher::_AES128 + mode,
1827                           parmBlk, keylen, fCode, cv, key);
1828       // Fallthru
1829     }
1830 
1831     __ bind(parmBlk_set);
1832     BLOCK_COMMENT("} push parmBlk");
1833   }
1834 
1835   // Pop a parameter block from the stack. The chaining value portion of the parameter block
1836   // is copied back to the cv array as it is needed for subsequent cipher steps.
1837   // The keylen value as well as the original SP (before resizing) was pushed to the stack
1838   // when pushing the parameter block.
1839   void generate_pop_parmBlk(Register keylen, Register parmBlk, Register key, Register cv) {
1840 
1841     BLOCK_COMMENT("pop parmBlk {");
1842     bool identical_dataBlk_len =  (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) &&
1843                                   (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk);
1844     if (identical_dataBlk_len) {
1845       int cv_len = VM_Version::Cipher::_AES128_dataBlk;
1846       __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
1847     } else {
1848       int cv_len;
1849       Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set;
1850       __ z_lg(keylen, -8, parmBlk);  // restore keylen
1851       __ z_cghi(keylen, 52);
1852       if (VM_Version::has_Crypto_AES256()) __ z_brh(parmBlk_256);  // keyLen >  52: AES256
1853       if (VM_Version::has_Crypto_AES192()) __ z_bre(parmBlk_192);  // keyLen == 52: AES192
1854       // if (VM_Version::has_Crypto_AES128()) __ z_brl(parmBlk_128);  // keyLen <  52: AES128  // fallthru
1855 
1856       // Security net: there is no one here. If we would need it, we should have
1857       // fallen into it already when pushing the parameter block.
1858       if (VM_Version::has_Crypto_AES128()) {
1859         __ bind(parmBlk_128);
1860         cv_len = VM_Version::Cipher::_AES128_dataBlk;
1861         __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
1862         if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) {
1863           __ z_bru(parmBlk_set);
1864         }
1865       }
1866 
1867       if (VM_Version::has_Crypto_AES192()) {
1868         __ bind(parmBlk_192);
1869         cv_len = VM_Version::Cipher::_AES192_dataBlk;
1870         __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
1871         if (VM_Version::has_Crypto_AES256()) {
1872           __ z_bru(parmBlk_set);
1873         }
1874       }
1875 
1876       if (VM_Version::has_Crypto_AES256()) {
1877         __ bind(parmBlk_256);
1878         cv_len = VM_Version::Cipher::_AES256_dataBlk;
1879         __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
1880         // __ z_bru(parmBlk_set);  // fallthru
1881       }
1882       __ bind(parmBlk_set);
1883     }
1884     __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute. Z_SP saved by push_parmBlk.
1885     BLOCK_COMMENT("} pop parmBlk");
1886   }
1887 
1888   // Compute AES encrypt/decrypt function.
1889   void generate_AES_cipherBlock(bool is_decipher) {
1890     // Incoming arguments.
1891     Register       from    = Z_ARG1; // source byte array
1892     Register       to      = Z_ARG2; // destination byte array
1893     Register       key     = Z_ARG3; // expanded key array
1894 
1895     const Register keylen  = Z_R0;   // Temporarily (until fCode is set) holds the expanded key array length.
1896 
1897     // Register definitions as required by KM instruction.
1898     const Register fCode   = Z_R0;   // crypto function code
1899     const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
1900     const Register src     = Z_ARG1; // Must be even reg (KM requirement).
1901     const Register srclen  = Z_ARG2; // Must be odd reg and pair with src. Overwrites destination address.
1902     const Register dst     = Z_ARG3; // Must be even reg (KM requirement). Overwrites expanded key address.
1903 
1904     // Read key len of expanded key (in 4-byte words).
1905     __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
1906 
1907     // Copy arguments to registers as required by crypto instruction.
1908     __ z_lgr(parmBlk, key);          // crypto key (in T_INT array).
1909     __ lgr_if_needed(src, from);     // Copy src address. Will not emit, src/from are identical.
1910     __ z_lgr(dst, to);               // Copy dst address, even register required.
1911 
1912     // Construct function code into fCode(Z_R0), data block length into srclen(Z_ARG2).
1913     generate_load_AES_fCode(keylen, fCode, srclen, is_decipher);
1914 
1915     __ km(dst, src);                 // Cipher the message.
1916 
1917     __ z_br(Z_R14);
1918   }
1919 
1920   // Compute AES encrypt function.
1921   address generate_AES_encryptBlock(const char* name) {
1922     __ align(CodeEntryAlignment);
1923     StubCodeMark mark(this, "StubRoutines", name);
1924     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1925 
1926     generate_AES_cipherBlock(false);
1927 
1928     return __ addr_at(start_off);
1929   }
1930 
1931   // Compute AES decrypt function.
1932   address generate_AES_decryptBlock(const char* name) {
1933     __ align(CodeEntryAlignment);
1934     StubCodeMark mark(this, "StubRoutines", name);
1935     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1936 
1937     generate_AES_cipherBlock(true);
1938 
1939     return __ addr_at(start_off);
1940   }
1941 
1942   // These stubs receive the addresses of the cryptographic key and of the chaining value as two separate
1943   // arguments (registers "key" and "cv", respectively). The KMC instruction, on the other hand, requires
1944   // chaining value and key to be, in this sequence, adjacent in storage. Thus, we need to allocate some
1945   // thread-local working storage. Using heap memory incurs all the hassles of allocating/freeing.
1946   // Stack space, on the contrary, is deallocated automatically when we return from the stub to the caller.
1947   // *** WARNING ***
1948   // Please note that we do not formally allocate stack space, nor do we
1949   // update the stack pointer. Therefore, no function calls are allowed
1950   // and nobody else must use the stack range where the parameter block
1951   // is located.
1952   // We align the parameter block to the next available octoword.
1953   //
1954   // Compute chained AES encrypt function.
1955   void generate_AES_cipherBlockChaining(bool is_decipher) {
1956 
1957     Register       from    = Z_ARG1; // source byte array (clear text)
1958     Register       to      = Z_ARG2; // destination byte array (ciphered)
1959     Register       key     = Z_ARG3; // expanded key array.
1960     Register       cv      = Z_ARG4; // chaining value
1961     const Register msglen  = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned
1962                                      // in Z_RET upon completion of this stub. Is 32-bit integer.
1963 
1964     const Register keylen  = Z_R0;   // Expanded key length, as read from key array. Temp only.
1965     const Register fCode   = Z_R0;   // crypto function code
1966     const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
1967     const Register src     = Z_ARG1; // is Z_R2
1968     const Register srclen  = Z_ARG2; // Overwrites destination address.
1969     const Register dst     = Z_ARG3; // Overwrites key address.
1970 
1971     // Read key len of expanded key (in 4-byte words).
1972     __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
1973 
1974     // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block.
1975     // Construct function code in fCode (Z_R0).
1976     generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, is_decipher);
1977 
1978     // Prepare other registers for instruction.
1979     __ lgr_if_needed(src, from);     // Copy src address. Will not emit, src/from are identical.
1980     __ z_lgr(dst, to);
1981     __ z_llgfr(srclen, msglen);      // We pass the offsets as ints, not as longs as required.
1982 
1983     __ kmc(dst, src);                // Cipher the message.
1984 
1985     generate_pop_parmBlk(keylen, parmBlk, key, cv);
1986 
1987     __ z_llgfr(Z_RET, msglen);       // We pass the offsets as ints, not as longs as required.
1988     __ z_br(Z_R14);
1989   }
1990 
1991   // Compute chained AES encrypt function.
1992   address generate_cipherBlockChaining_AES_encrypt(const char* name) {
1993     __ align(CodeEntryAlignment);
1994     StubCodeMark mark(this, "StubRoutines", name);
1995     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1996 
1997     generate_AES_cipherBlockChaining(false);
1998 
1999     return __ addr_at(start_off);
2000   }
2001 
2002   // Compute chained AES encrypt function.
2003   address generate_cipherBlockChaining_AES_decrypt(const char* name) {
2004     __ align(CodeEntryAlignment);
2005     StubCodeMark mark(this, "StubRoutines", name);
2006     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
2007 
2008     generate_AES_cipherBlockChaining(true);
2009 
2010     return __ addr_at(start_off);
2011   }
2012 
2013 
2014   // Call interface for all SHA* stubs.
2015   //
2016   //   Z_ARG1 - source data block. Ptr to leftmost byte to be processed.
2017   //   Z_ARG2 - current SHA state. Ptr to state area. This area serves as
2018   //            parameter block as required by the crypto instruction.
2019   //   Z_ARG3 - current byte offset in source data block.
2020   //   Z_ARG4 - last byte offset in source data block.
2021   //            (Z_ARG4 - Z_ARG3) gives the #bytes remaining to be processed.
2022   //
2023   //   Z_RET  - return value. First unprocessed byte offset in src buffer.
2024   //
2025   //   A few notes on the call interface:
2026   //    - All stubs, whether they are single-block or multi-block, are assumed to
2027   //      digest an integer multiple of the data block length of data. All data
2028   //      blocks are digested using the intermediate message digest (KIMD) instruction.
2029   //      Special end processing, as done by the KLMD instruction, seems to be
2030   //      emulated by the calling code.
2031   //
2032   //    - Z_ARG1 addresses the first byte of source data. The offset (Z_ARG3) is
2033   //      already accounted for.
2034   //
2035   //    - The current SHA state (the intermediate message digest value) is contained
2036   //      in an area addressed by Z_ARG2. The area size depends on the SHA variant
2037   //      and is accessible via the enum VM_Version::MsgDigest::_SHA<n>_parmBlk_I
2038   //
2039   //    - The single-block stub is expected to digest exactly one data block, starting
2040   //      at the address passed in Z_ARG1.
2041   //
2042   //    - The multi-block stub is expected to digest all data blocks which start in
2043   //      the offset interval [srcOff(Z_ARG3), srcLimit(Z_ARG4)). The exact difference
2044   //      (srcLimit-srcOff), rounded up to the next multiple of the data block length,
2045   //      gives the number of blocks to digest. It must be assumed that the calling code
2046   //      provides for a large enough source data buffer.
2047   //
2048   // Compute SHA-1 function.
2049   address generate_SHA1_stub(bool multiBlock, const char* name) {
2050     __ align(CodeEntryAlignment);
2051     StubCodeMark mark(this, "StubRoutines", name);
2052     unsigned int start_off = __ offset();   // Remember stub start address (is rtn value).
2053 
2054     const Register srcBuff        = Z_ARG1; // Points to first block to process (offset already added).
2055     const Register SHAState       = Z_ARG2; // Only on entry. Reused soon thereafter for kimd register pairs.
2056     const Register srcOff         = Z_ARG3; // int
2057     const Register srcLimit       = Z_ARG4; // Only passed in multiBlock case. int
2058 
2059     const Register SHAState_local = Z_R1;
2060     const Register SHAState_save  = Z_ARG3;
2061     const Register srcBufLen      = Z_ARG2; // Destroys state address, must be copied before.
2062     Label useKLMD, rtn;
2063 
2064     __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA1);   // function code
2065     __ z_lgr(SHAState_local, SHAState);                                 // SHAState == parameter block
2066 
2067     if (multiBlock) {  // Process everything from offset to limit.
2068 
2069       // The following description is valid if we get a raw (unpimped) source data buffer,
2070       // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
2071       // the calling convention for these stubs is different. We leave the description in
2072       // to inform the reader what must be happening hidden in the calling code.
2073       //
2074       // The data block to be processed can have arbitrary length, i.e. its length does not
2075       // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
2076       // two different paths. If the length is an integer multiple, we use KIMD, saving us
2077       // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
2078       // to the stack, execute a KLMD instruction on it and copy the result back to the
2079       // caller's SHA state location.
2080 
2081       // Total #srcBuff blocks to process.
2082       if (VM_Version::has_DistinctOpnds()) {
2083         __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference
2084         __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);   // round up
2085         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff);
2086         __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value.
2087         __ z_llgfr(srcBufLen, srcBufLen);      // Cast to 64-bit.
2088       } else {
2089         __ z_lgfr(srcBufLen, srcLimit);        // Exact difference. srcLimit passed as int.
2090         __ z_sgfr(srcBufLen, srcOff);          // SrcOff passed as int, now properly casted to long.
2091         __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);   // round up
2092         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff);
2093         __ z_lgr(srcLimit, srcOff);            // SrcLimit temporarily holds return value.
2094         __ z_agr(srcLimit, srcBufLen);
2095       }
2096 
2097       // Integral #blocks to digest?
2098       // As a result of the calculations above, srcBufLen MUST be an integer
2099       // multiple of _SHA1_dataBlk, or else we are in big trouble.
2100       // We insert an asm_assert into the KLMD case to guard against that.
2101       __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);
2102       __ z_brc(Assembler::bcondNotAllZero, useKLMD);
2103 
2104       // Process all full blocks.
2105       __ kimd(srcBuff);
2106 
2107       __ z_lgr(Z_RET, srcLimit);  // Offset of first unprocessed byte in buffer.
2108     } else {  // Process one data block only.
2109       __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA1_dataBlk);   // #srcBuff bytes to process
2110       __ kimd(srcBuff);
2111       __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA1_dataBlk, srcOff);            // Offset of first unprocessed byte in buffer. No 32 to 64 bit extension needed.
2112     }
2113 
2114     __ bind(rtn);
2115     __ z_br(Z_R14);
2116 
2117     if (multiBlock) {
2118       __ bind(useKLMD);
2119 
2120 #if 1
2121       // Security net: this stub is believed to be called for full-sized data blocks only
2122       // NOTE: The following code is believed to be correct, but is is not tested.
2123       __ stop_static("SHA128 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
2124 #endif
2125     }
2126 
2127     return __ addr_at(start_off);
2128   }
2129 
2130   // Compute SHA-256 function.
2131   address generate_SHA256_stub(bool multiBlock, const char* name) {
2132     __ align(CodeEntryAlignment);
2133     StubCodeMark mark(this, "StubRoutines", name);
2134     unsigned int start_off = __ offset();   // Remember stub start address (is rtn value).
2135 
2136     const Register srcBuff        = Z_ARG1;
2137     const Register SHAState       = Z_ARG2; // Only on entry. Reused soon thereafter.
2138     const Register SHAState_local = Z_R1;
2139     const Register SHAState_save  = Z_ARG3;
2140     const Register srcOff         = Z_ARG3;
2141     const Register srcLimit       = Z_ARG4;
2142     const Register srcBufLen      = Z_ARG2; // Destroys state address, must be copied before.
2143     Label useKLMD, rtn;
2144 
2145     __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA256); // function code
2146     __ z_lgr(SHAState_local, SHAState);                                 // SHAState == parameter block
2147 
2148     if (multiBlock) {  // Process everything from offset to limit.
2149       // The following description is valid if we get a raw (unpimped) source data buffer,
2150       // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
2151       // the calling convention for these stubs is different. We leave the description in
2152       // to inform the reader what must be happening hidden in the calling code.
2153       //
2154       // The data block to be processed can have arbitrary length, i.e. its length does not
2155       // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
2156       // two different paths. If the length is an integer multiple, we use KIMD, saving us
2157       // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
2158       // to the stack, execute a KLMD instruction on it and copy the result back to the
2159       // caller's SHA state location.
2160 
2161       // total #srcBuff blocks to process
2162       if (VM_Version::has_DistinctOpnds()) {
2163         __ z_srk(srcBufLen, srcLimit, srcOff);   // exact difference
2164         __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up
2165         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff);
2166         __ z_ark(srcLimit, srcOff, srcBufLen);   // Srclimit temporarily holds return value.
2167         __ z_llgfr(srcBufLen, srcBufLen);        // Cast to 64-bit.
2168       } else {
2169         __ z_lgfr(srcBufLen, srcLimit);          // exact difference
2170         __ z_sgfr(srcBufLen, srcOff);
2171         __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up
2172         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff);
2173         __ z_lgr(srcLimit, srcOff);              // Srclimit temporarily holds return value.
2174         __ z_agr(srcLimit, srcBufLen);
2175       }
2176 
2177       // Integral #blocks to digest?
2178       // As a result of the calculations above, srcBufLen MUST be an integer
2179       // multiple of _SHA1_dataBlk, or else we are in big trouble.
2180       // We insert an asm_assert into the KLMD case to guard against that.
2181       __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1);
2182       __ z_brc(Assembler::bcondNotAllZero, useKLMD);
2183 
2184       // Process all full blocks.
2185       __ kimd(srcBuff);
2186 
2187       __ z_lgr(Z_RET, srcLimit);  // Offset of first unprocessed byte in buffer.
2188     } else {  // Process one data block only.
2189       __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA256_dataBlk); // #srcBuff bytes to process
2190       __ kimd(srcBuff);
2191       __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA256_dataBlk, srcOff);          // Offset of first unprocessed byte in buffer.
2192     }
2193 
2194     __ bind(rtn);
2195     __ z_br(Z_R14);
2196 
2197     if (multiBlock) {
2198       __ bind(useKLMD);
2199 #if 1
2200       // Security net: this stub is believed to be called for full-sized data blocks only.
2201       // NOTE:
2202       //   The following code is believed to be correct, but is is not tested.
2203       __ stop_static("SHA256 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
2204 #endif
2205     }
2206 
2207     return __ addr_at(start_off);
2208   }
2209 
2210   // Compute SHA-512 function.
2211   address generate_SHA512_stub(bool multiBlock, const char* name) {
2212     __ align(CodeEntryAlignment);
2213     StubCodeMark mark(this, "StubRoutines", name);
2214     unsigned int start_off = __ offset();   // Remember stub start address (is rtn value).
2215 
2216     const Register srcBuff        = Z_ARG1;
2217     const Register SHAState       = Z_ARG2; // Only on entry. Reused soon thereafter.
2218     const Register SHAState_local = Z_R1;
2219     const Register SHAState_save  = Z_ARG3;
2220     const Register srcOff         = Z_ARG3;
2221     const Register srcLimit       = Z_ARG4;
2222     const Register srcBufLen      = Z_ARG2; // Destroys state address, must be copied before.
2223     Label useKLMD, rtn;
2224 
2225     __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA512); // function code
2226     __ z_lgr(SHAState_local, SHAState);                                 // SHAState == parameter block
2227 
2228     if (multiBlock) {  // Process everything from offset to limit.
2229       // The following description is valid if we get a raw (unpimped) source data buffer,
2230       // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
2231       // the calling convention for these stubs is different. We leave the description in
2232       // to inform the reader what must be happening hidden in the calling code.
2233       //
2234       // The data block to be processed can have arbitrary length, i.e. its length does not
2235       // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
2236       // two different paths. If the length is an integer multiple, we use KIMD, saving us
2237       // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
2238       // to the stack, execute a KLMD instruction on it and copy the result back to the
2239       // caller's SHA state location.
2240 
2241       // total #srcBuff blocks to process
2242       if (VM_Version::has_DistinctOpnds()) {
2243         __ z_srk(srcBufLen, srcLimit, srcOff);   // exact difference
2244         __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up
2245         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff);
2246         __ z_ark(srcLimit, srcOff, srcBufLen);   // Srclimit temporarily holds return value.
2247         __ z_llgfr(srcBufLen, srcBufLen);        // Cast to 64-bit.
2248       } else {
2249         __ z_lgfr(srcBufLen, srcLimit);          // exact difference
2250         __ z_sgfr(srcBufLen, srcOff);
2251         __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up
2252         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff);
2253         __ z_lgr(srcLimit, srcOff);              // Srclimit temporarily holds return value.
2254         __ z_agr(srcLimit, srcBufLen);
2255       }
2256 
2257       // integral #blocks to digest?
2258       // As a result of the calculations above, srcBufLen MUST be an integer
2259       // multiple of _SHA1_dataBlk, or else we are in big trouble.
2260       // We insert an asm_assert into the KLMD case to guard against that.
2261       __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1);
2262       __ z_brc(Assembler::bcondNotAllZero, useKLMD);
2263 
2264       // Process all full blocks.
2265       __ kimd(srcBuff);
2266 
2267       __ z_lgr(Z_RET, srcLimit);  // Offset of first unprocessed byte in buffer.
2268     } else {  // Process one data block only.
2269       __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA512_dataBlk); // #srcBuff bytes to process
2270       __ kimd(srcBuff);
2271       __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA512_dataBlk, srcOff);          // Offset of first unprocessed byte in buffer.
2272     }
2273 
2274     __ bind(rtn);
2275     __ z_br(Z_R14);
2276 
2277     if (multiBlock) {
2278       __ bind(useKLMD);
2279 #if 1
2280       // Security net: this stub is believed to be called for full-sized data blocks only
2281       // NOTE:
2282       //   The following code is believed to be correct, but is is not tested.
2283       __ stop_static("SHA512 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
2284 #endif
2285     }
2286 
2287     return __ addr_at(start_off);
2288   }
2289 
2290 
2291   /**
2292    *  Arguments:
2293    *
2294    * Inputs:
2295    *   Z_ARG1    - int   crc
2296    *   Z_ARG2    - byte* buf
2297    *   Z_ARG3    - int   length (of buffer)
2298    *
2299    * Result:
2300    *   Z_RET     - int   crc result
2301    **/
2302   // Compute CRC function (generic, for all polynomials).
2303   void generate_CRC_updateBytes(const char* name, Register table, bool invertCRC) {
2304 
2305     // arguments to kernel_crc32:
2306     Register       crc     = Z_ARG1;  // Current checksum, preset by caller or result from previous call, int.
2307     Register       data    = Z_ARG2;  // source byte array
2308     Register       dataLen = Z_ARG3;  // #bytes to process, int
2309 //    Register       table   = Z_ARG4;  // crc table address. Preloaded and passed in by caller.
2310     const Register t0      = Z_R10;   // work reg for kernel* emitters
2311     const Register t1      = Z_R11;   // work reg for kernel* emitters
2312     const Register t2      = Z_R12;   // work reg for kernel* emitters
2313     const Register t3      = Z_R13;   // work reg for kernel* emitters
2314 
2315     assert_different_registers(crc, data, dataLen, table);
2316 
2317     // We pass these values as ints, not as longs as required by C calling convention.
2318     // Crc used as int.
2319     __ z_llgfr(dataLen, dataLen);
2320 
2321     __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
2322     __ z_stmg(Z_R10, Z_R13, 1*8, Z_SP);  // Spill regs 10..11 to make them available as work registers.
2323     __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, invertCRC);
2324     __ z_lmg(Z_R10, Z_R13, 1*8, Z_SP);   // Spill regs 10..11 back from stack.
2325     __ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
2326 
2327     __ z_llgfr(Z_RET, crc);  // Updated crc is function result. No copying required, just zero upper 32 bits.
2328     __ z_br(Z_R14);          // Result already in Z_RET == Z_ARG1.
2329   }
2330 
2331 
2332   // Compute CRC32 function.
2333   address generate_CRC32_updateBytes(const char* name) {
2334     __ align(CodeEntryAlignment);
2335     StubCodeMark mark(this, "StubRoutines", name);
2336     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
2337 
2338     assert(UseCRC32Intrinsics, "should not generate this stub (%s) with CRC32 intrinsics disabled", name);
2339 
2340     BLOCK_COMMENT("CRC32_updateBytes {");
2341     Register       table   = Z_ARG4;  // crc32 table address.
2342     StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
2343 
2344     generate_CRC_updateBytes(name, table, true);
2345     BLOCK_COMMENT("} CRC32_updateBytes");
2346 
2347     return __ addr_at(start_off);
2348   }
2349 
2350 
2351   // Compute CRC32C function.
2352   address generate_CRC32C_updateBytes(const char* name) {
2353     __ align(CodeEntryAlignment);
2354     StubCodeMark mark(this, "StubRoutines", name);
2355     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
2356 
2357     assert(UseCRC32CIntrinsics, "should not generate this stub (%s) with CRC32C intrinsics disabled", name);
2358 
2359     BLOCK_COMMENT("CRC32C_updateBytes {");
2360     Register       table   = Z_ARG4;  // crc32c table address.
2361     StubRoutines::zarch::generate_load_crc32c_table_addr(_masm, table);
2362 
2363     generate_CRC_updateBytes(name, table, false);
2364     BLOCK_COMMENT("} CRC32C_updateBytes");
2365 
2366     return __ addr_at(start_off);
2367   }
2368 
2369 
2370   // Arguments:
2371   //   Z_ARG1    - x address
2372   //   Z_ARG2    - x length
2373   //   Z_ARG3    - y address
2374   //   Z_ARG4    - y length
2375   //   Z_ARG5    - z address
2376   //   160[Z_SP] - z length
2377   address generate_multiplyToLen() {
2378     __ align(CodeEntryAlignment);
2379     StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
2380 
2381     address start = __ pc();
2382 
2383     const Register x    = Z_ARG1;
2384     const Register xlen = Z_ARG2;
2385     const Register y    = Z_ARG3;
2386     const Register ylen = Z_ARG4;
2387     const Register z    = Z_ARG5;
2388     // zlen is passed on the stack:
2389     // Address zlen(Z_SP, _z_abi(remaining_cargs));
2390 
2391     // Next registers will be saved on stack in multiply_to_len().
2392     const Register tmp1 = Z_tmp_1;
2393     const Register tmp2 = Z_tmp_2;
2394     const Register tmp3 = Z_tmp_3;
2395     const Register tmp4 = Z_tmp_4;
2396     const Register tmp5 = Z_R9;
2397 
2398     BLOCK_COMMENT("Entry:");
2399 
2400     __ z_llgfr(xlen, xlen);
2401     __ z_llgfr(ylen, ylen);
2402 
2403     __ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5);
2404 
2405     __ z_br(Z_R14);  // Return to caller.
2406 
2407     return start;
2408   }
2409 
2410   void generate_initial() {
2411     // Generates all stubs and initializes the entry points.
2412 
2413     // Entry points that exist in all platforms.
2414     // Note: This is code that could be shared among different
2415     // platforms - however the benefit seems to be smaller than the
2416     // disadvantage of having a much more complicated generator
2417     // structure. See also comment in stubRoutines.hpp.
2418     StubRoutines::_forward_exception_entry                 = generate_forward_exception();
2419 
2420     StubRoutines::_call_stub_entry                         = generate_call_stub(StubRoutines::_call_stub_return_address);
2421     StubRoutines::_catch_exception_entry                   = generate_catch_exception();
2422 
2423     // Build this early so it's available for the interpreter.
2424     StubRoutines::_throw_StackOverflowError_entry          =
2425       generate_throw_exception("StackOverflowError throw_exception",
2426                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
2427     StubRoutines::_throw_delayed_StackOverflowError_entry  =
2428       generate_throw_exception("delayed StackOverflowError throw_exception",
2429                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), false);
2430 
2431     //----------------------------------------------------------------------
2432     // Entry points that are platform specific.
2433 
2434     if (UseCRC32Intrinsics) {
2435       StubRoutines::_crc_table_adr     = (address)StubRoutines::zarch::_crc_table;
2436       StubRoutines::_updateBytesCRC32  = generate_CRC32_updateBytes("CRC32_updateBytes");
2437     }
2438 
2439     if (UseCRC32CIntrinsics) {
2440       StubRoutines::_crc32c_table_addr = (address)StubRoutines::zarch::_crc32c_table;
2441       StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes("CRC32C_updateBytes");
2442     }
2443 
2444     // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
2445     StubRoutines::zarch::_trot_table_addr = (address)StubRoutines::zarch::_trot_table;
2446   }
2447 
2448 
2449   void generate_all() {
2450     // Generates all stubs and initializes the entry points.
2451 
2452     StubRoutines::zarch::_partial_subtype_check            = generate_partial_subtype_check();
2453 
2454     // These entry points require SharedInfo::stack0 to be set up in non-core builds.
2455     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
2456     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
2457     StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
2458 
2459     // Support for verify_oop (must happen after universe_init).
2460     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop_subroutine();
2461 
2462     // Arraycopy stubs used by compilers.
2463     generate_arraycopy_stubs();
2464 
2465     // safefetch stubs
2466     generate_safefetch("SafeFetch32", sizeof(int),      &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc);
2467     generate_safefetch("SafeFetchN",  sizeof(intptr_t), &StubRoutines::_safefetchN_entry,  &StubRoutines::_safefetchN_fault_pc,  &StubRoutines::_safefetchN_continuation_pc);
2468 
2469     // Generate AES intrinsics code.
2470     if (UseAESIntrinsics) {
2471       StubRoutines::_aescrypt_encryptBlock = generate_AES_encryptBlock("AES_encryptBlock");
2472       StubRoutines::_aescrypt_decryptBlock = generate_AES_decryptBlock("AES_decryptBlock");
2473       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_AES_encrypt("AES_encryptBlock_chaining");
2474       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_AES_decrypt("AES_decryptBlock_chaining");
2475     }
2476 
2477     // Generate SHA1/SHA256/SHA512 intrinsics code.
2478     if (UseSHA1Intrinsics) {
2479       StubRoutines::_sha1_implCompress     = generate_SHA1_stub(false,   "SHA1_singleBlock");
2480       StubRoutines::_sha1_implCompressMB   = generate_SHA1_stub(true,    "SHA1_multiBlock");
2481     }
2482     if (UseSHA256Intrinsics) {
2483       StubRoutines::_sha256_implCompress   = generate_SHA256_stub(false, "SHA256_singleBlock");
2484       StubRoutines::_sha256_implCompressMB = generate_SHA256_stub(true,  "SHA256_multiBlock");
2485     }
2486     if (UseSHA512Intrinsics) {
2487       StubRoutines::_sha512_implCompress   = generate_SHA512_stub(false, "SHA512_singleBlock");
2488       StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(true,  "SHA512_multiBlock");
2489     }
2490 
2491 #ifdef COMPILER2
2492     if (UseMultiplyToLenIntrinsic) {
2493       StubRoutines::_multiplyToLen = generate_multiplyToLen();
2494     }
2495     if (UseMontgomeryMultiplyIntrinsic) {
2496       StubRoutines::_montgomeryMultiply
2497         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
2498     }
2499     if (UseMontgomerySquareIntrinsic) {
2500       StubRoutines::_montgomerySquare
2501         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
2502     }
2503 #endif
2504   }
2505 
2506  public:
2507   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2508     // Replace the standard masm with a special one:
2509     _masm = new MacroAssembler(code);
2510 
2511     _stub_count = !all ? 0x100 : 0x200;
2512     if (all) {
2513       generate_all();
2514     } else {
2515       generate_initial();
2516     }
2517   }
2518 
2519  private:
2520   int _stub_count;
2521   void stub_prolog(StubCodeDesc* cdesc) {
2522 #ifdef ASSERT
2523     // Put extra information in the stub code, to make it more readable.
2524     // Write the high part of the address.
2525     // [RGV] Check if there is a dependency on the size of this prolog.
2526     __ emit_32((intptr_t)cdesc >> 32);
2527     __ emit_32((intptr_t)cdesc);
2528     __ emit_32(++_stub_count);
2529 #endif
2530     align(true);
2531   }
2532 
2533   void align(bool at_header = false) {
2534     // z/Architecture cache line size is 256 bytes.
2535     // There is no obvious benefit in aligning stub
2536     // code to cache lines. Use CodeEntryAlignment instead.
2537     const unsigned int icache_line_size      = CodeEntryAlignment;
2538     const unsigned int icache_half_line_size = MIN2<unsigned int>(32, CodeEntryAlignment);
2539 
2540     if (at_header) {
2541       while ((intptr_t)(__ pc()) % icache_line_size != 0) {
2542         __ emit_16(0);
2543       }
2544     } else {
2545       while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
2546         __ z_nop();
2547       }
2548     }
2549   }
2550 
2551 };
2552 
2553 void StubGenerator_generate(CodeBuffer* code, bool all) {
2554   StubGenerator g(code, all);
2555 }