1 /*
   2  * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "registerSaver_s390.hpp"
  29 #include "interpreter/interpreter.hpp"
  30 #include "interpreter/interp_masm.hpp"
  31 #include "nativeInst_s390.hpp"
  32 #include "oops/instanceOop.hpp"
  33 #include "oops/objArrayKlass.hpp"
  34 #include "oops/oop.inline.hpp"
  35 #include "prims/methodHandles.hpp"
  36 #include "runtime/frame.inline.hpp"
  37 #include "runtime/handles.inline.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "runtime/thread.inline.hpp"
  42 
  43 // Declaration and definition of StubGenerator (no .hpp file).
  44 // For a more detailed description of the stub routine structure
  45 // see the comment in stubRoutines.hpp.
  46 
  47 #ifdef PRODUCT
  48 #define __ _masm->
  49 #else
  50 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
  51 #endif
  52 
  53 #define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str)
  54 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  55 
  56 // -----------------------------------------------------------------------
  57 // Stub Code definitions
  58 
  59 class StubGenerator: public StubCodeGenerator {
  60  private:
  61 
  62   //----------------------------------------------------------------------
  63   // Call stubs are used to call Java from C.
  64 
  65   //
  66   // Arguments:
  67   //
  68   //   R2        - call wrapper address     : address
  69   //   R3        - result                   : intptr_t*
  70   //   R4        - result type              : BasicType
  71   //   R5        - method                   : method
  72   //   R6        - frame mgr entry point    : address
  73   //   [SP+160]  - parameter block          : intptr_t*
  74   //   [SP+172]  - parameter count in words : int
  75   //   [SP+176]  - thread                   : Thread*
  76   //
  77   address generate_call_stub(address& return_address) {
  78     // Set up a new C frame, copy Java arguments, call frame manager
  79     // or native_entry, and process result.
  80 
  81     StubCodeMark mark(this, "StubRoutines", "call_stub");
  82     address start = __ pc();
  83 
  84     Register r_arg_call_wrapper_addr   = Z_ARG1;
  85     Register r_arg_result_addr         = Z_ARG2;
  86     Register r_arg_result_type         = Z_ARG3;
  87     Register r_arg_method              = Z_ARG4;
  88     Register r_arg_entry               = Z_ARG5;
  89 
  90     // offsets to fp
  91     #define d_arg_thread 176
  92     #define d_arg_argument_addr 160
  93     #define d_arg_argument_count 168+4
  94 
  95     Register r_entryframe_fp           = Z_tmp_1;
  96     Register r_top_of_arguments_addr   = Z_ARG4;
  97     Register r_new_arg_entry = Z_R14;
  98 
  99     // macros for frame offsets
 100     #define call_wrapper_address_offset \
 101                _z_entry_frame_locals_neg(call_wrapper_address)
 102     #define result_address_offset \
 103               _z_entry_frame_locals_neg(result_address)
 104     #define result_type_offset \
 105               _z_entry_frame_locals_neg(result_type)
 106     #define arguments_tos_address_offset \
 107               _z_entry_frame_locals_neg(arguments_tos_address)
 108 
 109     {
 110       //
 111       // STACK on entry to call_stub:
 112       //
 113       //     F1      [C_FRAME]
 114       //            ...
 115       //
 116 
 117       Register r_argument_addr              = Z_tmp_3;
 118       Register r_argumentcopy_addr          = Z_tmp_4;
 119       Register r_argument_size_in_bytes     = Z_ARG5;
 120       Register r_frame_size                 = Z_R1;
 121 
 122       Label arguments_copied;
 123 
 124       // Save non-volatile registers to ABI of caller frame.
 125       BLOCK_COMMENT("save registers, push frame {");
 126       __ z_stmg(Z_R6, Z_R14, 16, Z_SP);
 127       __ z_std(Z_F8, 96, Z_SP);
 128       __ z_std(Z_F9, 104, Z_SP);
 129       __ z_std(Z_F10, 112, Z_SP);
 130       __ z_std(Z_F11, 120, Z_SP);
 131       __ z_std(Z_F12, 128, Z_SP);
 132       __ z_std(Z_F13, 136, Z_SP);
 133       __ z_std(Z_F14, 144, Z_SP);
 134       __ z_std(Z_F15, 152, Z_SP);
 135 
 136       //
 137       // Push ENTRY_FRAME including arguments:
 138       //
 139       //     F0      [TOP_IJAVA_FRAME_ABI]
 140       //             [outgoing Java arguments]
 141       //             [ENTRY_FRAME_LOCALS]
 142       //     F1      [C_FRAME]
 143       //             ...
 144       //
 145 
 146       // Calculate new frame size and push frame.
 147       #define abi_plus_locals_size \
 148                 (frame::z_top_ijava_frame_abi_size + frame::z_entry_frame_locals_size)
 149       if (abi_plus_locals_size % BytesPerWord == 0) {
 150         // Preload constant part of frame size.
 151         __ load_const_optimized(r_frame_size, -abi_plus_locals_size/BytesPerWord);
 152         // Keep copy of our frame pointer (caller's SP).
 153         __ z_lgr(r_entryframe_fp, Z_SP);
 154         // Add space required by arguments to frame size.
 155         __ z_slgf(r_frame_size, d_arg_argument_count, Z_R0, Z_SP);
 156         // Move Z_ARG5 early, it will be used as a local.
 157         __ z_lgr(r_new_arg_entry, r_arg_entry);
 158         // Convert frame size from words to bytes.
 159         __ z_sllg(r_frame_size, r_frame_size, LogBytesPerWord);
 160         __ push_frame(r_frame_size, r_entryframe_fp,
 161                       false/*don't copy SP*/, true /*frame size sign inverted*/);
 162       } else {
 163         guarantee(false, "frame sizes should be multiples of word size (BytesPerWord)");
 164       }
 165       BLOCK_COMMENT("} save, push");
 166 
 167       // Load argument registers for call.
 168       BLOCK_COMMENT("prepare/copy arguments {");
 169       __ z_lgr(Z_method, r_arg_method);
 170       __ z_lg(Z_thread, d_arg_thread, r_entryframe_fp);
 171 
 172       // Calculate top_of_arguments_addr which will be tos (not prepushed) later.
 173       // Wimply use SP + frame::top_ijava_frame_size.
 174       __ add2reg(r_top_of_arguments_addr,
 175                  frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP);
 176 
 177       // Initialize call_stub locals (step 1).
 178       if ((call_wrapper_address_offset + BytesPerWord == result_address_offset) &&
 179           (result_address_offset + BytesPerWord == result_type_offset)          &&
 180           (result_type_offset + BytesPerWord == arguments_tos_address_offset)) {
 181 
 182         __ z_stmg(r_arg_call_wrapper_addr, r_top_of_arguments_addr,
 183                   call_wrapper_address_offset, r_entryframe_fp);
 184       } else {
 185         __ z_stg(r_arg_call_wrapper_addr,
 186                  call_wrapper_address_offset, r_entryframe_fp);
 187         __ z_stg(r_arg_result_addr,
 188                  result_address_offset, r_entryframe_fp);
 189         __ z_stg(r_arg_result_type,
 190                  result_type_offset, r_entryframe_fp);
 191         __ z_stg(r_top_of_arguments_addr,
 192                  arguments_tos_address_offset, r_entryframe_fp);
 193       }
 194 
 195       // Copy Java arguments.
 196 
 197       // Any arguments to copy?
 198       __ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count));
 199       __ z_bre(arguments_copied);
 200 
 201       // Prepare loop and copy arguments in reverse order.
 202       {
 203         // Calculate argument size in bytes.
 204         __ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord);
 205 
 206         // Get addr of first incoming Java argument.
 207         __ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp);
 208 
 209         // Let r_argumentcopy_addr point to last outgoing Java argument.
 210         __ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively.
 211 
 212         // Let r_argument_addr point to last incoming Java argument.
 213         __ add2reg_with_index(r_argument_addr, -BytesPerWord,
 214                               r_argument_size_in_bytes, r_argument_addr);
 215 
 216         // Now loop while Z_R1 > 0 and copy arguments.
 217         {
 218           Label next_argument;
 219           __ bind(next_argument);
 220           // Mem-mem move.
 221           __ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr);
 222           __ add2reg(r_argument_addr,    -BytesPerWord);
 223           __ add2reg(r_argumentcopy_addr, BytesPerWord);
 224           __ z_brct(Z_R1, next_argument);
 225         }
 226       }  // End of argument copy loop.
 227 
 228       __ bind(arguments_copied);
 229     }
 230     BLOCK_COMMENT("} arguments");
 231 
 232     BLOCK_COMMENT("call {");
 233     {
 234       // Call frame manager or native entry.
 235 
 236       //
 237       // Register state on entry to frame manager / native entry:
 238       //
 239       //   Z_ARG1 = r_top_of_arguments_addr  - intptr_t *sender tos (prepushed)
 240       //                                       Lesp = (SP) + copied_arguments_offset - 8
 241       //   Z_method                          - method
 242       //   Z_thread                          - JavaThread*
 243       //
 244 
 245       // Here, the usual SP is the initial_caller_sp.
 246       __ z_lgr(Z_R10, Z_SP);
 247 
 248       // Z_esp points to the slot below the last argument.
 249       __ z_lgr(Z_esp, r_top_of_arguments_addr);
 250 
 251       //
 252       // Stack on entry to frame manager / native entry:
 253       //
 254       //     F0      [TOP_IJAVA_FRAME_ABI]
 255       //             [outgoing Java arguments]
 256       //             [ENTRY_FRAME_LOCALS]
 257       //     F1      [C_FRAME]
 258       //             ...
 259       //
 260 
 261       // Do a light-weight C-call here, r_new_arg_entry holds the address
 262       // of the interpreter entry point (frame manager or native entry)
 263       // and save runtime-value of return_pc in return_address
 264       // (call by reference argument).
 265       return_address = __ call_stub(r_new_arg_entry);
 266     }
 267     BLOCK_COMMENT("} call");
 268 
 269     {
 270       BLOCK_COMMENT("restore registers {");
 271       // Returned from frame manager or native entry.
 272       // Now pop frame, process result, and return to caller.
 273 
 274       //
 275       // Stack on exit from frame manager / native entry:
 276       //
 277       //     F0      [ABI]
 278       //             ...
 279       //             [ENTRY_FRAME_LOCALS]
 280       //     F1      [C_FRAME]
 281       //             ...
 282       //
 283       // Just pop the topmost frame ...
 284       //
 285 
 286       Label ret_is_object;
 287       Label ret_is_long;
 288       Label ret_is_float;
 289       Label ret_is_double;
 290 
 291       // Restore frame pointer.
 292       __ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP);
 293       // Pop frame. Done here to minimize stalls.
 294       __ pop_frame();
 295 
 296       // Reload some volatile registers which we've spilled before the call
 297       // to frame manager / native entry.
 298       // Access all locals via frame pointer, because we know nothing about
 299       // the topmost frame's size.
 300       __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp);
 301       __ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp);
 302 
 303       // Restore non-volatiles.
 304       __ z_lmg(Z_R6, Z_R14, 16, Z_SP);
 305       __ z_ld(Z_F8, 96, Z_SP);
 306       __ z_ld(Z_F9, 104, Z_SP);
 307       __ z_ld(Z_F10, 112, Z_SP);
 308       __ z_ld(Z_F11, 120, Z_SP);
 309       __ z_ld(Z_F12, 128, Z_SP);
 310       __ z_ld(Z_F13, 136, Z_SP);
 311       __ z_ld(Z_F14, 144, Z_SP);
 312       __ z_ld(Z_F15, 152, Z_SP);
 313       BLOCK_COMMENT("} restore");
 314 
 315       //
 316       // Stack on exit from call_stub:
 317       //
 318       //     0       [C_FRAME]
 319       //             ...
 320       //
 321       // No call_stub frames left.
 322       //
 323 
 324       // All non-volatiles have been restored at this point!!
 325 
 326       //------------------------------------------------------------------------
 327       // The following code makes some assumptions on the T_<type> enum values.
 328       // The enum is defined in globalDefinitions.hpp.
 329       // The validity of the assumptions is tested as far as possible.
 330       //   The assigned values should not be shuffled
 331       //   T_BOOLEAN==4    - lowest used enum value
 332       //   T_NARROWOOP==16 - largest used enum value
 333       //------------------------------------------------------------------------
 334       BLOCK_COMMENT("process result {");
 335       Label firstHandler;
 336       int   handlerLen= 8;
 337 #ifdef ASSERT
 338       char  assertMsg[] = "check BasicType definition in globalDefinitions.hpp";
 339       __ z_chi(r_arg_result_type, T_BOOLEAN);
 340       __ asm_assert_low(assertMsg, 0x0234);
 341       __ z_chi(r_arg_result_type, T_NARROWOOP);
 342       __ asm_assert_high(assertMsg, 0x0235);
 343 #endif
 344       __ add2reg(r_arg_result_type, -T_BOOLEAN);          // Remove offset.
 345       __ z_larl(Z_R1, firstHandler);                      // location of first handler
 346       __ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long.
 347       __ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1);
 348 
 349       __ align(handlerLen);
 350       __ bind(firstHandler);
 351       // T_BOOLEAN:
 352         guarantee(T_BOOLEAN == 4, "check BasicType definition in globalDefinitions.hpp");
 353         __ z_st(Z_RET, 0, r_arg_result_addr);
 354         __ z_br(Z_R14); // Return to caller.
 355         __ align(handlerLen);
 356       // T_CHAR:
 357         guarantee(T_CHAR == T_BOOLEAN+1, "check BasicType definition in globalDefinitions.hpp");
 358         __ z_st(Z_RET, 0, r_arg_result_addr);
 359         __ z_br(Z_R14); // Return to caller.
 360         __ align(handlerLen);
 361       // T_FLOAT:
 362         guarantee(T_FLOAT == T_CHAR+1, "check BasicType definition in globalDefinitions.hpp");
 363         __ z_ste(Z_FRET, 0, r_arg_result_addr);
 364         __ z_br(Z_R14); // Return to caller.
 365         __ align(handlerLen);
 366       // T_DOUBLE:
 367         guarantee(T_DOUBLE == T_FLOAT+1, "check BasicType definition in globalDefinitions.hpp");
 368         __ z_std(Z_FRET, 0, r_arg_result_addr);
 369         __ z_br(Z_R14); // Return to caller.
 370         __ align(handlerLen);
 371       // T_BYTE:
 372         guarantee(T_BYTE == T_DOUBLE+1, "check BasicType definition in globalDefinitions.hpp");
 373         __ z_st(Z_RET, 0, r_arg_result_addr);
 374         __ z_br(Z_R14); // Return to caller.
 375         __ align(handlerLen);
 376       // T_SHORT:
 377         guarantee(T_SHORT == T_BYTE+1, "check BasicType definition in globalDefinitions.hpp");
 378         __ z_st(Z_RET, 0, r_arg_result_addr);
 379         __ z_br(Z_R14); // Return to caller.
 380         __ align(handlerLen);
 381       // T_INT:
 382         guarantee(T_INT == T_SHORT+1, "check BasicType definition in globalDefinitions.hpp");
 383         __ z_st(Z_RET, 0, r_arg_result_addr);
 384         __ z_br(Z_R14); // Return to caller.
 385         __ align(handlerLen);
 386       // T_LONG:
 387         guarantee(T_LONG == T_INT+1, "check BasicType definition in globalDefinitions.hpp");
 388         __ z_stg(Z_RET, 0, r_arg_result_addr);
 389         __ z_br(Z_R14); // Return to caller.
 390         __ align(handlerLen);
 391       // T_OBJECT:
 392         guarantee(T_OBJECT == T_LONG+1, "check BasicType definition in globalDefinitions.hpp");
 393         __ z_stg(Z_RET, 0, r_arg_result_addr);
 394         __ z_br(Z_R14); // Return to caller.
 395         __ align(handlerLen);
 396       // T_ARRAY:
 397         guarantee(T_ARRAY == T_OBJECT+1, "check BasicType definition in globalDefinitions.hpp");
 398         __ z_stg(Z_RET, 0, r_arg_result_addr);
 399         __ z_br(Z_R14); // Return to caller.
 400         __ align(handlerLen);
 401       // T_VOID:
 402         guarantee(T_VOID == T_ARRAY+1, "check BasicType definition in globalDefinitions.hpp");
 403         __ z_stg(Z_RET, 0, r_arg_result_addr);
 404         __ z_br(Z_R14); // Return to caller.
 405         __ align(handlerLen);
 406       // T_ADDRESS:
 407         guarantee(T_ADDRESS == T_VOID+1, "check BasicType definition in globalDefinitions.hpp");
 408         __ z_stg(Z_RET, 0, r_arg_result_addr);
 409         __ z_br(Z_R14); // Return to caller.
 410         __ align(handlerLen);
 411       // T_NARROWOOP:
 412         guarantee(T_NARROWOOP == T_ADDRESS+1, "check BasicType definition in globalDefinitions.hpp");
 413         __ z_st(Z_RET, 0, r_arg_result_addr);
 414         __ z_br(Z_R14); // Return to caller.
 415         __ align(handlerLen);
 416       BLOCK_COMMENT("} process result");
 417     }
 418     return start;
 419   }
 420 
 421   // Return point for a Java call if there's an exception thrown in
 422   // Java code. The exception is caught and transformed into a
 423   // pending exception stored in JavaThread that can be tested from
 424   // within the VM.
 425   address generate_catch_exception() {
 426     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 427 
 428     address start = __ pc();
 429 
 430     //
 431     // Registers alive
 432     //
 433     //   Z_thread
 434     //   Z_ARG1 - address of pending exception
 435     //   Z_ARG2 - return address in call stub
 436     //
 437 
 438     const Register exception_file = Z_R0;
 439     const Register exception_line = Z_R1;
 440 
 441     __ load_const_optimized(exception_file, (void*)__FILE__);
 442     __ load_const_optimized(exception_line, (void*)__LINE__);
 443 
 444     __ z_stg(Z_ARG1, thread_(pending_exception));
 445     // Store into `char *'.
 446     __ z_stg(exception_file, thread_(exception_file));
 447     // Store into `int'.
 448     __ z_st(exception_line, thread_(exception_line));
 449 
 450     // Complete return to VM.
 451     assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
 452 
 453     // Continue in call stub.
 454     __ z_br(Z_ARG2);
 455 
 456     return start;
 457   }
 458 
 459   // Continuation point for runtime calls returning with a pending
 460   // exception. The pending exception check happened in the runtime
 461   // or native call stub. The pending exception in Thread is
 462   // converted into a Java-level exception.
 463   //
 464   // Read:
 465   //   Z_R14: pc the runtime library callee wants to return to.
 466   //   Since the exception occurred in the callee, the return pc
 467   //   from the point of view of Java is the exception pc.
 468   //
 469   // Invalidate:
 470   //   Volatile registers (except below).
 471   //
 472   // Update:
 473   //   Z_ARG1: exception
 474   //   (Z_R14 is unchanged and is live out).
 475   //
 476   address generate_forward_exception() {
 477     StubCodeMark mark(this, "StubRoutines", "forward_exception");
 478     address start = __ pc();
 479 
 480     #define pending_exception_offset in_bytes(Thread::pending_exception_offset())
 481 #ifdef ASSERT
 482     // Get pending exception oop.
 483     __ z_lg(Z_ARG1, pending_exception_offset, Z_thread);
 484 
 485     // Make sure that this code is only executed if there is a pending exception.
 486     {
 487       Label L;
 488       __ z_ltgr(Z_ARG1, Z_ARG1);
 489       __ z_brne(L);
 490       __ stop("StubRoutines::forward exception: no pending exception (1)");
 491       __ bind(L);
 492     }
 493 
 494     __ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop");
 495 #endif
 496 
 497     __ z_lgr(Z_ARG2, Z_R14); // Copy exception pc into Z_ARG2.
 498     __ save_return_pc();
 499     __ push_frame_abi160(0);
 500     // Find exception handler.
 501     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
 502                     Z_thread,
 503                     Z_ARG2);
 504     // Copy handler's address.
 505     __ z_lgr(Z_R1, Z_RET);
 506     __ pop_frame();
 507     __ restore_return_pc();
 508 
 509     // Set up the arguments for the exception handler:
 510     // - Z_ARG1: exception oop
 511     // - Z_ARG2: exception pc
 512 
 513     // Load pending exception oop.
 514     __ z_lg(Z_ARG1, pending_exception_offset, Z_thread);
 515 
 516     // The exception pc is the return address in the caller,
 517     // must load it into Z_ARG2
 518     __ z_lgr(Z_ARG2, Z_R14);
 519 
 520 #ifdef ASSERT
 521     // Make sure exception is set.
 522     { Label L;
 523       __ z_ltgr(Z_ARG1, Z_ARG1);
 524       __ z_brne(L);
 525       __ stop("StubRoutines::forward exception: no pending exception (2)");
 526       __ bind(L);
 527     }
 528 #endif
 529     // Clear the pending exception.
 530     __ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *));
 531     // Jump to exception handler
 532     __ z_br(Z_R1 /*handler address*/);
 533 
 534     return start;
 535 
 536     #undef pending_exception_offset
 537   }
 538 
 539   // Continuation point for throwing of implicit exceptions that are
 540   // not handled in the current activation. Fabricates an exception
 541   // oop and initiates normal exception dispatching in this
 542   // frame. Only callee-saved registers are preserved (through the
 543   // normal RegisterMap handling). If the compiler
 544   // needs all registers to be preserved between the fault point and
 545   // the exception handler then it must assume responsibility for that
 546   // in AbstractCompiler::continuation_for_implicit_null_exception or
 547   // continuation_for_implicit_division_by_zero_exception. All other
 548   // implicit exceptions (e.g., NullPointerException or
 549   // AbstractMethodError on entry) are either at call sites or
 550   // otherwise assume that stack unwinding will be initiated, so
 551   // caller saved registers were assumed volatile in the compiler.
 552 
 553   // Note that we generate only this stub into a RuntimeStub, because
 554   // it needs to be properly traversed and ignored during GC, so we
 555   // change the meaning of the "__" macro within this method.
 556 
 557   // Note: the routine set_pc_not_at_call_for_caller in
 558   // SharedRuntime.cpp requires that this code be generated into a
 559   // RuntimeStub.
 560 #undef __
 561 #define __ masm->
 562 
 563   address generate_throw_exception(const char* name, address runtime_entry,
 564                                    bool restore_saved_exception_pc,
 565                                    Register arg1 = noreg, Register arg2 = noreg) {
 566     assert_different_registers(arg1, Z_R0_scratch);  // would be destroyed by push_frame()
 567     assert_different_registers(arg2, Z_R0_scratch);  // would be destroyed by push_frame()
 568 
 569     int insts_size = 256;
 570     int locs_size  = 0;
 571     CodeBuffer      code(name, insts_size, locs_size);
 572     MacroAssembler* masm = new MacroAssembler(&code);
 573     int framesize_in_bytes;
 574     address start = __ pc();
 575 
 576     __ save_return_pc();
 577     framesize_in_bytes = __ push_frame_abi160(0);
 578 
 579     address frame_complete_pc = __ pc();
 580     if (restore_saved_exception_pc) {
 581       __ unimplemented("StubGenerator::throw_exception", 74);
 582     }
 583 
 584     // Note that we always have a runtime stub frame on the top of stack at this point.
 585     __ get_PC(Z_R1);
 586     __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1);
 587 
 588     // Do the call.
 589     BLOCK_COMMENT("call runtime_entry");
 590     __ call_VM_leaf(runtime_entry, Z_thread, arg1, arg2);
 591 
 592     __ reset_last_Java_frame();
 593 
 594 #ifdef ASSERT
 595     // Make sure that this code is only executed if there is a pending exception.
 596     { Label L;
 597       __ z_lg(Z_R0,
 598                 in_bytes(Thread::pending_exception_offset()),
 599                 Z_thread);
 600       __ z_ltgr(Z_R0, Z_R0);
 601       __ z_brne(L);
 602       __ stop("StubRoutines::throw_exception: no pending exception");
 603       __ bind(L);
 604     }
 605 #endif
 606 
 607     __ pop_frame();
 608     __ restore_return_pc();
 609 
 610     __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
 611     __ z_br(Z_R1);
 612 
 613     RuntimeStub* stub =
 614       RuntimeStub::new_runtime_stub(name, &code,
 615                                     frame_complete_pc - start,
 616                                     framesize_in_bytes/wordSize,
 617                                     NULL /*oop_maps*/, false);
 618 
 619     return stub->entry_point();
 620   }
 621 
 622 #undef __
 623 #ifdef PRODUCT
 624 #define __ _masm->
 625 #else
 626 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
 627 #endif
 628 
 629   // Support for uint StubRoutine::zarch::partial_subtype_check(Klass
 630   // sub, Klass super);
 631   //
 632   // Arguments:
 633   //   ret  : Z_RET, returned
 634   //   sub  : Z_ARG2, argument, not changed
 635   //   super: Z_ARG3, argument, not changed
 636   //
 637   //   raddr: Z_R14, blown by call
 638   //
 639   address generate_partial_subtype_check() {
 640     StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
 641     Label miss;
 642 
 643     address start = __ pc();
 644 
 645     const Register Rsubklass   = Z_ARG2; // subklass
 646     const Register Rsuperklass = Z_ARG3; // superklass
 647 
 648     // No args, but tmp registers that are killed.
 649     const Register Rlength     = Z_ARG4; // cache array length
 650     const Register Rarray_ptr  = Z_ARG5; // Current value from cache array.
 651 
 652     if (UseCompressedOops) {
 653       assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub");
 654     }
 655 
 656     // Always take the slow path (see SPARC).
 657     __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass,
 658                                      Rarray_ptr, Rlength, NULL, &miss);
 659 
 660     // Match falls through here.
 661     __ clear_reg(Z_RET);               // Zero indicates a match. Set EQ flag in CC.
 662     __ z_br(Z_R14);
 663 
 664     __ BIND(miss);
 665     __ load_const_optimized(Z_RET, 1); // One indicates a miss.
 666     __ z_ltgr(Z_RET, Z_RET);           // Set NE flag in CR.
 667     __ z_br(Z_R14);
 668 
 669     return start;
 670   }
 671 
 672   // Return address of code to be called from code generated by
 673   // MacroAssembler::verify_oop.
 674   //
 675   // Don't generate, rather use C++ code.
 676   address generate_verify_oop_subroutine() {
 677     // Don't generate a StubCodeMark, because no code is generated!
 678     // Generating the mark triggers notifying the oprofile jvmti agent
 679     // about the dynamic code generation, but the stub without
 680     // code (code_size == 0) confuses opjitconv
 681     // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
 682 
 683     address start = 0;
 684     return start;
 685   }
 686 
 687   // Generate pre-write barrier for array.
 688   //
 689   // Input:
 690   //    addr  - register containing starting address
 691   //    count - register containing element count
 692   //
 693   // The input registers are overwritten.
 694   void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
 695 
 696     BarrierSet* const bs = Universe::heap()->barrier_set();
 697     switch (bs->kind()) {
 698       case BarrierSet::G1SATBCTLogging:
 699         // With G1, don't generate the call if we statically know that the target is uninitialized.
 700         if (!dest_uninitialized) {
 701           // Is marking active?
 702           Label filtered;
 703           assert_different_registers(addr,  Z_R0_scratch);  // would be destroyed by push_frame()
 704           assert_different_registers(count, Z_R0_scratch);  // would be destroyed by push_frame()
 705           Register Rtmp1 = Z_R0_scratch;
 706           const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() +
 707                                              SATBMarkQueue::byte_offset_of_active());
 708           if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 709             __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
 710           } else {
 711             guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 712             __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
 713           }
 714           __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
 715 
 716           // __ push_frame_abi160(0);  // implicitly done in save_live_registers()
 717           (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers);
 718           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), addr, count);
 719           (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers);
 720           // __ pop_frame();  // implicitly done in restore_live_registers()
 721 
 722           __ bind(filtered);
 723         }
 724         break;
 725       case BarrierSet::CardTableForRS:
 726       case BarrierSet::CardTableExtension:
 727       case BarrierSet::ModRef:
 728         break;
 729       default:
 730         ShouldNotReachHere();
 731     }
 732   }
 733 
 734   // Generate post-write barrier for array.
 735   //
 736   // Input:
 737   //    addr  - register containing starting address
 738   //    count - register containing element count
 739   //
 740   // The input registers are overwritten.
 741   void gen_write_ref_array_post_barrier(Register addr, Register count, bool branchToEnd) {
 742     BarrierSet* const bs = Universe::heap()->barrier_set();
 743     switch (bs->kind()) {
 744       case BarrierSet::G1SATBCTLogging:
 745         {
 746           if (branchToEnd) {
 747             assert_different_registers(addr,  Z_R0_scratch);  // would be destroyed by push_frame()
 748             assert_different_registers(count, Z_R0_scratch);  // would be destroyed by push_frame()
 749             // __ push_frame_abi160(0);  // implicitly done in save_live_registers()
 750             (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers);
 751             __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
 752             (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers);
 753             // __ pop_frame();   // implicitly done in restore_live_registers()
 754           } else {
 755             // Tail call: call c and return to stub caller.
 756             address entry_point = CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
 757             __ lgr_if_needed(Z_ARG1, addr);
 758             __ lgr_if_needed(Z_ARG2, count);
 759             __ load_const(Z_R1, entry_point);
 760             __ z_br(Z_R1); // Branch without linking, callee will return to stub caller.
 761           }
 762         }
 763         break;
 764       case BarrierSet::CardTableForRS:
 765       case BarrierSet::CardTableExtension:
 766         // These cases formerly known as
 767         //   void array_store_check(Register addr, Register count, bool branchToEnd).
 768         {
 769           NearLabel doXC, done;
 770           CardTableModRefBS* ct = (CardTableModRefBS*)bs;
 771           assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
 772           assert_different_registers(Z_R0, Z_R1, addr, count);
 773 
 774           // Nothing to do if count <= 0.
 775           if (branchToEnd) {
 776             __ compare64_and_branch(count, (intptr_t) 0, Assembler::bcondNotHigh, done);
 777           } else {
 778             __ z_ltgr(count, count);
 779             __ z_bcr(Assembler::bcondNotPositive, Z_R14);
 780           }
 781 
 782           // Note: We can't combine the shifts. We could lose a carry
 783           // from calculating the array end address.
 784           // count = (count-1)*BytesPerHeapOop + addr
 785           // Count holds addr of last oop in array then.
 786           __ z_sllg(count, count, LogBytesPerHeapOop);
 787           __ add2reg_with_index(count, -BytesPerHeapOop, count, addr);
 788 
 789           // Get base address of card table.
 790           __ load_const_optimized(Z_R1, (address)ct->byte_map_base);
 791 
 792           // count = (count>>shift) - (addr>>shift)
 793           __ z_srlg(addr,  addr,  CardTableModRefBS::card_shift);
 794           __ z_srlg(count, count, CardTableModRefBS::card_shift);
 795 
 796           // Prefetch first elements of card table for update.
 797           if (VM_Version::has_Prefetch()) {
 798             __ z_pfd(0x02, 0, addr, Z_R1);
 799           }
 800 
 801           // Special case: clear just one byte.
 802           __ clear_reg(Z_R0, true, false);  // Used for doOneByte.
 803           __ z_sgr(count, addr);            // Count = n-1 now, CC used for brc below.
 804           __ z_stc(Z_R0, 0, addr, Z_R1);    // Must preserve CC from z_sgr.
 805           if (branchToEnd) {
 806             __ z_brz(done);
 807           } else {
 808             __ z_bcr(Assembler::bcondZero, Z_R14);
 809           }
 810 
 811           __ z_cghi(count, 255);
 812           __ z_brnh(doXC);
 813 
 814           // MVCLE: clear a long area.
 815           // Start addr of card table range = base + addr.
 816           // # bytes in    card table range = (count + 1)
 817           __ add2reg_with_index(Z_R0, 0, Z_R1, addr);
 818           __ add2reg(Z_R1, 1, count);
 819 
 820           // dirty hack:
 821           // There are just two callers. Both pass
 822           // count in Z_ARG3 = Z_R4
 823           // addr  in Z_ARG2 = Z_R3
 824           // ==> use Z_ARG2 as src len reg = 0
 825           //         Z_ARG1 as src addr (ignored)
 826           assert(count == Z_ARG3, "count: unexpected register number");
 827           assert(addr  == Z_ARG2, "addr:  unexpected register number");
 828           __ clear_reg(Z_ARG2, true, false);
 829 
 830           __ MacroAssembler::move_long_ext(Z_R0, Z_ARG1, 0);
 831 
 832           if (branchToEnd) {
 833             __ z_bru(done);
 834           } else {
 835             __ z_bcr(Assembler::bcondAlways, Z_R14);
 836           }
 837 
 838           // XC: clear a short area.
 839           Label XC_template; // Instr template, never exec directly!
 840           __ bind(XC_template);
 841           __ z_xc(0, 0, addr, 0, addr);
 842 
 843           __ bind(doXC);
 844           // start addr of card table range = base + addr
 845           // end   addr of card table range = base + addr + count
 846           __ add2reg_with_index(addr, 0, Z_R1, addr);
 847 
 848           if (VM_Version::has_ExecuteExtensions()) {
 849             __ z_exrl(count, XC_template);   // Execute XC with var. len.
 850           } else {
 851             __ z_larl(Z_R1, XC_template);
 852             __ z_ex(count, 0, Z_R0, Z_R1);   // Execute XC with var. len.
 853           }
 854           if (!branchToEnd) {
 855             __ z_br(Z_R14);
 856           }
 857 
 858           __ bind(done);
 859         }
 860         break;
 861       case BarrierSet::ModRef:
 862         if (!branchToEnd) { __ z_br(Z_R14); }
 863         break;
 864       default:
 865         ShouldNotReachHere();
 866     }
 867   }
 868 
 869 
 870   // This is to test that the count register contains a positive int value.
 871   // Required because C2 does not respect int to long conversion for stub calls.
 872   void assert_positive_int(Register count) {
 873 #ifdef ASSERT
 874     __ z_srag(Z_R0, count, 31);  // Just leave the sign (must be zero) in Z_R0.
 875     __ asm_assert_eq("missing zero extend", 0xAFFE);
 876 #endif
 877   }
 878 
 879   //  Generate overlap test for array copy stubs.
 880   //  If no actual overlap is detected, control is transferred to the
 881   //  "normal" copy stub (entry address passed in disjoint_copy_target).
 882   //  Otherwise, execution continues with the code generated by the
 883   //  caller of array_overlap_test.
 884   //
 885   //  Input:
 886   //    Z_ARG1    - from
 887   //    Z_ARG2    - to
 888   //    Z_ARG3    - element count
 889   void array_overlap_test(address disjoint_copy_target, int log2_elem_size) {
 890     __ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh,
 891                                                     disjoint_copy_target, /*len64=*/true, /*has_sign=*/false);
 892 
 893     Register index = Z_ARG3;
 894     if (log2_elem_size > 0) {
 895       __ z_sllg(Z_R1, Z_ARG3, log2_elem_size);  // byte count
 896       index = Z_R1;
 897     }
 898     __ add2reg_with_index(Z_R1, 0, index, Z_ARG1);  // First byte after "from" range.
 899 
 900     __ MacroAssembler::compare_and_branch_optimized(Z_R1, Z_ARG2, Assembler::bcondNotHigh,
 901                                                     disjoint_copy_target, /*len64=*/true, /*has_sign=*/false);
 902 
 903     // Destructive overlap: let caller generate code for that.
 904   }
 905 
 906   //  Generate stub for disjoint array copy. If "aligned" is true, the
 907   //  "from" and "to" addresses are assumed to be heapword aligned.
 908   //
 909   //  Arguments for generated stub:
 910   //      from:  Z_ARG1
 911   //      to:    Z_ARG2
 912   //      count: Z_ARG3 treated as signed
 913   void generate_disjoint_copy(bool aligned, int element_size,
 914                               bool branchToEnd,
 915                               bool restoreArgs) {
 916     // This is the zarch specific stub generator for general array copy tasks.
 917     // It has the following prereqs and features:
 918     //
 919     // - No destructive overlap allowed (else unpredictable results).
 920     // - Destructive overlap does not exist if the leftmost byte of the target
 921     //   does not coincide with any of the source bytes (except the leftmost).
 922     //
 923     //   Register usage upon entry:
 924     //      Z_ARG1 == Z_R2 :   address of source array
 925     //      Z_ARG2 == Z_R3 :   address of target array
 926     //      Z_ARG3 == Z_R4 :   length of operands (# of elements on entry)
 927     //
 928     // Register usage within the generator:
 929     // - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len).
 930     //                 Used as pair register operand in complex moves, scratch registers anyway.
 931     // - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg).
 932     //                  Same as R0/R1, but no scratch register.
 933     // - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine,
 934     //                          but they might get temporarily overwritten.
 935 
 936     Register  save_reg    = Z_ARG4;   // (= Z_R5), holds original target operand address for restore.
 937 
 938     {
 939       Register   llen_reg = Z_R1;     // Holds left operand len (odd reg).
 940       Register  laddr_reg = Z_R0;     // Holds left operand addr (even reg), overlaps with data_reg.
 941       Register   rlen_reg = Z_R5;     // Holds right operand len (odd reg), overlaps with save_reg.
 942       Register  raddr_reg = Z_R4;     // Holds right operand addr (even reg), overlaps with len_reg.
 943 
 944       Register   data_reg = Z_R0;     // Holds copied data chunk in alignment process and copy loop.
 945       Register    len_reg = Z_ARG3;   // Holds operand len (#elements at entry, #bytes shortly after).
 946       Register    dst_reg = Z_ARG2;   // Holds left (target)  operand addr.
 947       Register    src_reg = Z_ARG1;   // Holds right (source) operand addr.
 948 
 949       Label     doMVCLOOP, doMVCLOOPcount, doMVCLOOPiterate;
 950       Label     doMVCUnrolled;
 951       NearLabel doMVC,  doMVCgeneral, done;
 952       Label     MVC_template;
 953       address   pcMVCblock_b, pcMVCblock_e;
 954 
 955       bool      usedMVCLE       = true;
 956       bool      usedMVCLOOP     = true;
 957       bool      usedMVCUnrolled = false;
 958       bool      usedMVC         = false;
 959       bool      usedMVCgeneral  = false;
 960 
 961       int       stride;
 962       Register  stride_reg;
 963       Register  ix_reg;
 964 
 965       assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2");
 966       unsigned int log2_size = exact_log2(element_size);
 967 
 968       switch (element_size) {
 969         case 1:  BLOCK_COMMENT("ARRAYCOPY DISJOINT byte  {"); break;
 970         case 2:  BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break;
 971         case 4:  BLOCK_COMMENT("ARRAYCOPY DISJOINT int   {"); break;
 972         case 8:  BLOCK_COMMENT("ARRAYCOPY DISJOINT long  {"); break;
 973         default: BLOCK_COMMENT("ARRAYCOPY DISJOINT       {"); break;
 974       }
 975 
 976       assert_positive_int(len_reg);
 977 
 978       BLOCK_COMMENT("preparation {");
 979 
 980       // No copying if len <= 0.
 981       if (branchToEnd) {
 982         __ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done);
 983       } else {
 984         if (VM_Version::has_CompareBranch()) {
 985           __ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14);
 986         } else {
 987           __ z_ltgr(len_reg, len_reg);
 988           __ z_bcr(Assembler::bcondNotPositive, Z_R14);
 989         }
 990       }
 991 
 992       // Prefetch just one cache line. Speculative opt for short arrays.
 993       // Do not use Z_R1 in prefetch. Is undefined here.
 994       if (VM_Version::has_Prefetch()) {
 995         __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
 996         __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
 997       }
 998 
 999       BLOCK_COMMENT("} preparation");
1000 
1001       // Save args only if really needed.
1002       // Keep len test local to branch. Is generated only once.
1003 
1004       BLOCK_COMMENT("mode selection {");
1005 
1006       // Special handling for arrays with only a few elements.
1007       // Nothing fancy: just an executed MVC.
1008       if (log2_size > 0) {
1009         __ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1.
1010       }
1011       if (element_size != 8) {
1012         __ z_cghi(len_reg, 256/element_size);
1013         __ z_brnh(doMVC);
1014         usedMVC = true;
1015       }
1016       if (element_size == 8) { // Long and oop arrays are always aligned.
1017         __ z_cghi(len_reg, 256/element_size);
1018         __ z_brnh(doMVCUnrolled);
1019         usedMVCUnrolled = true;
1020       }
1021 
1022       // Prefetch another cache line. We, for sure, have more than one line to copy.
1023       if (VM_Version::has_Prefetch()) {
1024         __ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access.
1025         __ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access.
1026       }
1027 
1028       if (restoreArgs) {
1029         // Remember entry value of ARG2 to restore all arguments later from that knowledge.
1030         __ z_lgr(save_reg, dst_reg);
1031       }
1032 
1033       __ z_cghi(len_reg, 4096/element_size);
1034       if (log2_size == 0) {
1035         __ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes
1036       }
1037       __ z_brnh(doMVCLOOP);
1038 
1039       // Fall through to MVCLE case.
1040 
1041       BLOCK_COMMENT("} mode selection");
1042 
1043       // MVCLE: for long arrays
1044       //   DW aligned: Best performance for sizes > 4kBytes.
1045       //   unaligned:  Least complex for sizes > 256 bytes.
1046       if (usedMVCLE) {
1047         BLOCK_COMMENT("mode MVCLE {");
1048 
1049         // Setup registers for mvcle.
1050         //__ z_lgr(llen_reg, len_reg);// r1 <- r4  #bytes already in Z_R1, aka llen_reg.
1051         __ z_lgr(laddr_reg, dst_reg); // r0 <- r3
1052         __ z_lgr(raddr_reg, src_reg); // r4 <- r2
1053         __ z_lgr(rlen_reg, llen_reg); // r5 <- r1
1054 
1055         __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0);    // special: bypass cache
1056         // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache.
1057         // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0);
1058 
1059         if (restoreArgs) {
1060           // MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs.
1061           // Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required.
1062           // Len_reg (Z_ARG3) is destroyed and must be restored.
1063           __ z_slgr(laddr_reg, dst_reg);    // copied #bytes
1064           if (log2_size > 0) {
1065             __ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements.
1066           } else {
1067             __ z_lgr(Z_ARG3, laddr_reg);
1068           }
1069         }
1070         if (branchToEnd) {
1071           __ z_bru(done);
1072         } else {
1073           __ z_br(Z_R14);
1074         }
1075         BLOCK_COMMENT("} mode MVCLE");
1076       }
1077       // No fallthru possible here.
1078 
1079       //  MVCUnrolled: for short, aligned arrays.
1080 
1081       if (usedMVCUnrolled) {
1082         BLOCK_COMMENT("mode MVC unrolled {");
1083         stride = 8;
1084 
1085         // Generate unrolled MVC instructions.
1086         for (int ii = 32; ii > 1; ii--) {
1087           __ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy
1088           if (branchToEnd) {
1089             __ z_bru(done);
1090           } else {
1091             __ z_br(Z_R14);
1092           }
1093         }
1094 
1095         pcMVCblock_b = __ pc();
1096         __ z_mvc(0, 1 * stride-1, dst_reg, 0, src_reg); // 8 byte copy
1097         if (branchToEnd) {
1098           __ z_bru(done);
1099         } else {
1100           __ z_br(Z_R14);
1101         }
1102 
1103         pcMVCblock_e = __ pc();
1104         Label MVC_ListEnd;
1105         __ bind(MVC_ListEnd);
1106 
1107         // This is an absolute fast path:
1108         // - Array len in bytes must be not greater than 256.
1109         // - Array len in bytes must be an integer mult of DW
1110         //   to save expensive handling of trailing bytes.
1111         // - Argument restore is not done,
1112         //   i.e. previous code must not alter arguments (this code doesn't either).
1113 
1114         __ bind(doMVCUnrolled);
1115 
1116         // Avoid mul, prefer shift where possible.
1117         // Combine shift right (for #DW) with shift left (for block size).
1118         // Set CC for zero test below (asm_assert).
1119         // Note: #bytes comes in Z_R1, #DW in len_reg.
1120         unsigned int MVCblocksize    = pcMVCblock_e - pcMVCblock_b;
1121         unsigned int logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning).
1122 
1123         if (log2_size > 0) { // Len was scaled into Z_R1.
1124           switch (MVCblocksize) {
1125 
1126             case  8: logMVCblocksize = 3;
1127                      __ z_ltgr(Z_R0, Z_R1); // #bytes is index
1128                      break;                 // reasonable size, use shift
1129 
1130             case 16: logMVCblocksize = 4;
1131                      __ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size);
1132                      break;                 // reasonable size, use shift
1133 
1134             default: logMVCblocksize = 0;
1135                      __ z_ltgr(Z_R0, len_reg); // #DW for mul
1136                      break;                 // all other sizes: use mul
1137           }
1138         } else {
1139           guarantee(log2_size, "doMVCUnrolled: only for DW entities");
1140         }
1141 
1142         // This test (and branch) is redundant. Previous code makes sure that
1143         //  - element count > 0
1144         //  - element size == 8.
1145         // Thus, len reg should never be zero here. We insert an asm_assert() here,
1146         // just to double-check and to be on the safe side.
1147         __ asm_assert(false, "zero len cannot occur", 99);
1148 
1149         __ z_larl(Z_R1, MVC_ListEnd);        // Get addr of last instr block.
1150         // Avoid mul, prefer shift where possible.
1151         if (logMVCblocksize == 0) {
1152           __ z_mghi(Z_R0, MVCblocksize);
1153         }
1154         __ z_slgr(Z_R1, Z_R0);
1155         __ z_br(Z_R1);
1156         BLOCK_COMMENT("} mode MVC unrolled");
1157       }
1158       // No fallthru possible here.
1159 
1160       // MVC execute template
1161       // Must always generate. Usage may be switched on below.
1162       // There is no suitable place after here to put the template.
1163       __ bind(MVC_template);
1164       __ z_mvc(0,0,dst_reg,0,src_reg);      // Instr template, never exec directly!
1165 
1166 
1167       // MVC Loop: for medium-sized arrays
1168 
1169       // Only for DW aligned arrays (src and dst).
1170       // #bytes to copy must be at least 256!!!
1171       // Non-aligned cases handled separately.
1172       stride     = 256;
1173       stride_reg = Z_R1;   // Holds #bytes when control arrives here.
1174       ix_reg     = Z_ARG3; // Alias for len_reg.
1175 
1176 
1177       if (usedMVCLOOP) {
1178         BLOCK_COMMENT("mode MVC loop {");
1179         __ bind(doMVCLOOP);
1180 
1181         __ z_lcgr(ix_reg, Z_R1);         // Ix runs from -(n-2)*stride to 1*stride (inclusive).
1182         __ z_llill(stride_reg, stride);
1183         __ add2reg(ix_reg, 2*stride);    // Thus: increment ix by 2*stride.
1184 
1185         __ bind(doMVCLOOPiterate);
1186           __ z_mvc(0, stride-1, dst_reg, 0, src_reg);
1187           __ add2reg(dst_reg, stride);
1188           __ add2reg(src_reg, stride);
1189           __ bind(doMVCLOOPcount);
1190           __ z_brxlg(ix_reg, stride_reg, doMVCLOOPiterate);
1191 
1192         // Don 't use add2reg() here, since we must set the condition code!
1193         __ z_aghi(ix_reg, -2*stride);       // Compensate incr from above: zero diff means "all copied".
1194 
1195         if (restoreArgs) {
1196           __ z_lcgr(Z_R1, ix_reg);          // Prepare ix_reg for copy loop, #bytes expected in Z_R1.
1197           __ z_brnz(doMVCgeneral);          // We're not done yet, ix_reg is not zero.
1198 
1199           // ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg.
1200           __ z_slgr(dst_reg, save_reg);     // copied #bytes
1201           __ z_slgr(src_reg, dst_reg);      // = ARG1 (now restored)
1202           if (log2_size) {
1203             __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3.
1204           } else {
1205             __ z_lgr(Z_ARG3, dst_reg);
1206           }
1207           __ z_lgr(Z_ARG2, save_reg);       // ARG2 now restored.
1208 
1209           if (branchToEnd) {
1210             __ z_bru(done);
1211           } else {
1212             __ z_br(Z_R14);
1213           }
1214 
1215         } else {
1216             if (branchToEnd) {
1217               __ z_brz(done);                        // CC set by aghi instr.
1218           } else {
1219               __ z_bcr(Assembler::bcondZero, Z_R14); // We're all done if zero.
1220             }
1221 
1222           __ z_lcgr(Z_R1, ix_reg);    // Prepare ix_reg for copy loop, #bytes expected in Z_R1.
1223           // __ z_bru(doMVCgeneral);  // fallthru
1224         }
1225         usedMVCgeneral = true;
1226         BLOCK_COMMENT("} mode MVC loop");
1227       }
1228       // Fallthru to doMVCgeneral
1229 
1230       // MVCgeneral: for short, unaligned arrays, after other copy operations
1231 
1232       // Somewhat expensive due to use of EX instruction, but simple.
1233       if (usedMVCgeneral) {
1234         BLOCK_COMMENT("mode MVC general {");
1235         __ bind(doMVCgeneral);
1236 
1237         __ add2reg(len_reg, -1, Z_R1);             // Get #bytes-1 for EXECUTE.
1238         if (VM_Version::has_ExecuteExtensions()) {
1239           __ z_exrl(len_reg, MVC_template);        // Execute MVC with variable length.
1240         } else {
1241           __ z_larl(Z_R1, MVC_template);           // Get addr of instr template.
1242           __ z_ex(len_reg, 0, Z_R0, Z_R1);         // Execute MVC with variable length.
1243         }                                          // penalty: 9 ticks
1244 
1245         if (restoreArgs) {
1246           // ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg
1247           __ z_slgr(dst_reg, save_reg);            // Copied #bytes without the "doMVCgeneral" chunk
1248           __ z_slgr(src_reg, dst_reg);             // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk
1249           __ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet.
1250           if (log2_size) {
1251             __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3
1252           } else {
1253              __ z_lgr(Z_ARG3, dst_reg);
1254           }
1255           __ z_lgr(Z_ARG2, save_reg);              // ARG2 now restored.
1256         }
1257 
1258         if (usedMVC) {
1259           if (branchToEnd) {
1260             __ z_bru(done);
1261           } else {
1262             __ z_br(Z_R14);
1263         }
1264         } else {
1265           if (!branchToEnd) __ z_br(Z_R14);
1266         }
1267         BLOCK_COMMENT("} mode MVC general");
1268       }
1269       // Fallthru possible if following block not generated.
1270 
1271       // MVC: for short, unaligned arrays
1272 
1273       // Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks.
1274       // Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4.
1275       if (usedMVC) {
1276         BLOCK_COMMENT("mode MVC {");
1277         __ bind(doMVC);
1278 
1279         // get #bytes-1 for EXECUTE
1280         if (log2_size) {
1281           __ add2reg(Z_R1, -1);                // Length was scaled into Z_R1.
1282         } else {
1283           __ add2reg(Z_R1, -1, len_reg);       // Length was not scaled.
1284         }
1285 
1286         if (VM_Version::has_ExecuteExtensions()) {
1287           __ z_exrl(Z_R1, MVC_template);       // Execute MVC with variable length.
1288         } else {
1289           __ z_lgr(Z_R0, Z_R5);                // Save ARG4, may be unnecessary.
1290           __ z_larl(Z_R5, MVC_template);       // Get addr of instr template.
1291           __ z_ex(Z_R1, 0, Z_R0, Z_R5);        // Execute MVC with variable length.
1292           __ z_lgr(Z_R5, Z_R0);                // Restore ARG4, may be unnecessary.
1293         }
1294 
1295         if (!branchToEnd) {
1296           __ z_br(Z_R14);
1297         }
1298         BLOCK_COMMENT("} mode MVC");
1299       }
1300 
1301       __ bind(done);
1302 
1303       switch (element_size) {
1304         case 1:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break;
1305         case 2:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break;
1306         case 4:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT int  "); break;
1307         case 8:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break;
1308         default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT      "); break;
1309       }
1310     }
1311   }
1312 
1313   // Generate stub for conjoint array copy. If "aligned" is true, the
1314   // "from" and "to" addresses are assumed to be heapword aligned.
1315   //
1316   // Arguments for generated stub:
1317   //   from:  Z_ARG1
1318   //   to:    Z_ARG2
1319   //   count: Z_ARG3 treated as signed
1320   void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) {
1321 
1322     // This is the zarch specific stub generator for general array copy tasks.
1323     // It has the following prereqs and features:
1324     //
1325     // - Destructive overlap exists and is handled by reverse copy.
1326     // - Destructive overlap exists if the leftmost byte of the target
1327     //   does coincide with any of the source bytes (except the leftmost).
1328     // - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride)
1329     // - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine.
1330     // - Z_ARG3 is USED but preserved by the stub routine.
1331     // - Z_ARG4 is used as index register and is thus KILLed.
1332     //
1333     {
1334       Register stride_reg = Z_R1;     // Stride & compare value in loop (negative element_size).
1335       Register   data_reg = Z_R0;     // Holds value of currently processed element.
1336       Register     ix_reg = Z_ARG4;   // Holds byte index of currently processed element.
1337       Register    len_reg = Z_ARG3;   // Holds length (in #elements) of arrays.
1338       Register    dst_reg = Z_ARG2;   // Holds left  operand addr.
1339       Register    src_reg = Z_ARG1;   // Holds right operand addr.
1340 
1341       assert(256%element_size == 0, "Element size must be power of 2.");
1342       assert(element_size     <= 8, "Can't handle more than DW units.");
1343 
1344       switch (element_size) {
1345         case 1:  BLOCK_COMMENT("ARRAYCOPY CONJOINT byte  {"); break;
1346         case 2:  BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break;
1347         case 4:  BLOCK_COMMENT("ARRAYCOPY CONJOINT int   {"); break;
1348         case 8:  BLOCK_COMMENT("ARRAYCOPY CONJOINT long  {"); break;
1349         default: BLOCK_COMMENT("ARRAYCOPY CONJOINT       {"); break;
1350       }
1351 
1352       assert_positive_int(len_reg);
1353 
1354       if (VM_Version::has_Prefetch()) {
1355         __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
1356         __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
1357       }
1358 
1359       unsigned int log2_size = exact_log2(element_size);
1360       if (log2_size) {
1361         __ z_sllg(ix_reg, len_reg, log2_size);
1362       } else {
1363         __ z_lgr(ix_reg, len_reg);
1364       }
1365 
1366       // Optimize reverse copy loop.
1367       // Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks.
1368       // Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic.
1369       // Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length.
1370 
1371       Label countLoop1;
1372       Label copyLoop1;
1373       Label skipBY;
1374       Label skipHW;
1375       int   stride = -8;
1376 
1377       __ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop.
1378 
1379       if (element_size == 8)    // Nothing to do here.
1380         __ z_bru(countLoop1);
1381       else {                    // Do not generate dead code.
1382         __ z_tmll(ix_reg, 7);   // Check the "odd" bits.
1383         __ z_bre(countLoop1);   // There are none, very good!
1384       }
1385 
1386       if (log2_size == 0) {     // Handle leftover Byte.
1387         __ z_tmll(ix_reg, 1);
1388         __ z_bre(skipBY);
1389         __ z_lb(data_reg,   -1, ix_reg, src_reg);
1390         __ z_stcy(data_reg, -1, ix_reg, dst_reg);
1391         __ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI.
1392         __ bind(skipBY);
1393         // fallthru
1394       }
1395       if (log2_size <= 1) {     // Handle leftover HW.
1396         __ z_tmll(ix_reg, 2);
1397         __ z_bre(skipHW);
1398         __ z_lhy(data_reg,  -2, ix_reg, src_reg);
1399         __ z_sthy(data_reg, -2, ix_reg, dst_reg);
1400         __ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI.
1401         __ bind(skipHW);
1402         __ z_tmll(ix_reg, 4);
1403         __ z_bre(countLoop1);
1404         // fallthru
1405       }
1406       if (log2_size <= 2) {     // There are just 4 bytes (left) that need to be copied.
1407         __ z_ly(data_reg,  -4, ix_reg, src_reg);
1408         __ z_sty(data_reg, -4, ix_reg, dst_reg);
1409         __ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI.
1410         __ z_bru(countLoop1);
1411       }
1412 
1413       // Control can never get to here. Never! Never ever!
1414       __ z_illtrap(0x99);
1415       __ bind(copyLoop1);
1416       __ z_lg(data_reg,  0, ix_reg, src_reg);
1417       __ z_stg(data_reg, 0, ix_reg, dst_reg);
1418       __ bind(countLoop1);
1419       __ z_brxhg(ix_reg, stride_reg, copyLoop1);
1420 
1421       if (!branchToEnd)
1422         __ z_br(Z_R14);
1423 
1424       switch (element_size) {
1425         case 1:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break;
1426         case 2:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break;
1427         case 4:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT int  "); break;
1428         case 8:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break;
1429         default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT      "); break;
1430       }
1431     }
1432   }
1433 
1434   // Generate stub for disjoint byte copy. If "aligned" is true, the
1435   // "from" and "to" addresses are assumed to be heapword aligned.
1436   address generate_disjoint_byte_copy(bool aligned, const char * name) {
1437     StubCodeMark mark(this, "StubRoutines", name);
1438 
1439     // This is the zarch specific stub generator for byte array copy.
1440     // Refer to generate_disjoint_copy for a list of prereqs and features:
1441     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1442     generate_disjoint_copy(aligned, 1, false, false);
1443     return __ addr_at(start_off);
1444   }
1445 
1446 
1447   address generate_disjoint_short_copy(bool aligned, const char * name) {
1448     StubCodeMark mark(this, "StubRoutines", name);
1449     // This is the zarch specific stub generator for short array copy.
1450     // Refer to generate_disjoint_copy for a list of prereqs and features:
1451     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1452     generate_disjoint_copy(aligned, 2, false, false);
1453     return __ addr_at(start_off);
1454   }
1455 
1456 
1457   address generate_disjoint_int_copy(bool aligned, const char * name) {
1458     StubCodeMark mark(this, "StubRoutines", name);
1459     // This is the zarch specific stub generator for int array copy.
1460     // Refer to generate_disjoint_copy for a list of prereqs and features:
1461     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1462     generate_disjoint_copy(aligned, 4, false, false);
1463     return __ addr_at(start_off);
1464   }
1465 
1466 
1467   address generate_disjoint_long_copy(bool aligned, const char * name) {
1468     StubCodeMark mark(this, "StubRoutines", name);
1469     // This is the zarch specific stub generator for long array copy.
1470     // Refer to generate_disjoint_copy for a list of prereqs and features:
1471     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1472     generate_disjoint_copy(aligned, 8, false, false);
1473     return __ addr_at(start_off);
1474   }
1475 
1476 
1477   address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
1478     StubCodeMark mark(this, "StubRoutines", name);
1479     // This is the zarch specific stub generator for oop array copy.
1480     // Refer to generate_disjoint_copy for a list of prereqs and features.
1481     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1482     unsigned int size      = UseCompressedOops ? 4 : 8;
1483 
1484     gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized);
1485 
1486     generate_disjoint_copy(aligned, size, true, true);
1487 
1488     gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false);
1489 
1490     return __ addr_at(start_off);
1491   }
1492 
1493 
1494   address generate_conjoint_byte_copy(bool aligned, const char * name) {
1495     StubCodeMark mark(this, "StubRoutines", name);
1496     // This is the zarch specific stub generator for overlapping byte array copy.
1497     // Refer to generate_conjoint_copy for a list of prereqs and features:
1498     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1499     address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy()
1500                                        : StubRoutines::jbyte_disjoint_arraycopy();
1501 
1502     array_overlap_test(nooverlap_target, 0); // Branch away to nooverlap_target if disjoint.
1503     generate_conjoint_copy(aligned, 1, false);
1504 
1505     return __ addr_at(start_off);
1506   }
1507 
1508 
1509   address generate_conjoint_short_copy(bool aligned, const char * name) {
1510     StubCodeMark mark(this, "StubRoutines", name);
1511     // This is the zarch specific stub generator for overlapping short array copy.
1512     // Refer to generate_conjoint_copy for a list of prereqs and features:
1513     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1514     address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy()
1515                                        : StubRoutines::jshort_disjoint_arraycopy();
1516 
1517     array_overlap_test(nooverlap_target, 1); // Branch away to nooverlap_target if disjoint.
1518     generate_conjoint_copy(aligned, 2, false);
1519 
1520     return __ addr_at(start_off);
1521   }
1522 
1523   address generate_conjoint_int_copy(bool aligned, const char * name) {
1524     StubCodeMark mark(this, "StubRoutines", name);
1525     // This is the zarch specific stub generator for overlapping int array copy.
1526     // Refer to generate_conjoint_copy for a list of prereqs and features:
1527 
1528     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1529     address nooverlap_target = aligned ? StubRoutines::arrayof_jint_disjoint_arraycopy()
1530                                        : StubRoutines::jint_disjoint_arraycopy();
1531 
1532     array_overlap_test(nooverlap_target, 2); // Branch away to nooverlap_target if disjoint.
1533     generate_conjoint_copy(aligned, 4, false);
1534 
1535     return __ addr_at(start_off);
1536   }
1537 
1538   address generate_conjoint_long_copy(bool aligned, const char * name) {
1539     StubCodeMark mark(this, "StubRoutines", name);
1540     // This is the zarch specific stub generator for overlapping long array copy.
1541     // Refer to generate_conjoint_copy for a list of prereqs and features:
1542 
1543     unsigned int start_off   = __ offset();  // Remember stub start address (is rtn value).
1544     address nooverlap_target = aligned ? StubRoutines::arrayof_jlong_disjoint_arraycopy()
1545                                        : StubRoutines::jlong_disjoint_arraycopy();
1546 
1547     array_overlap_test(nooverlap_target, 3); // Branch away to nooverlap_target if disjoint.
1548     generate_conjoint_copy(aligned, 8, false);
1549 
1550     return __ addr_at(start_off);
1551   }
1552 
1553   address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
1554     StubCodeMark mark(this, "StubRoutines", name);
1555     // This is the zarch specific stub generator for overlapping oop array copy.
1556     // Refer to generate_conjoint_copy for a list of prereqs and features.
1557     unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
1558     unsigned int size      = UseCompressedOops ? 4 : 8;
1559     unsigned int shift     = UseCompressedOops ? 2 : 3;
1560 
1561     address nooverlap_target = aligned ? StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized)
1562                                        : StubRoutines::oop_disjoint_arraycopy(dest_uninitialized);
1563 
1564     // Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier.
1565     array_overlap_test(nooverlap_target, shift);  // Branch away to nooverlap_target if disjoint.
1566 
1567     gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized);
1568 
1569     generate_conjoint_copy(aligned, size, true);  // Must preserve ARG2, ARG3.
1570 
1571     gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false);
1572 
1573     return __ addr_at(start_off);
1574   }
1575 
1576 
1577   void generate_arraycopy_stubs() {
1578 
1579     // Note: the disjoint stubs must be generated first, some of
1580     // the conjoint stubs use them.
1581     StubRoutines::_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy (false, "jbyte_disjoint_arraycopy");
1582     StubRoutines::_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
1583     StubRoutines::_jint_disjoint_arraycopy       = generate_disjoint_int_copy  (false, "jint_disjoint_arraycopy");
1584     StubRoutines::_jlong_disjoint_arraycopy      = generate_disjoint_long_copy (false, "jlong_disjoint_arraycopy");
1585     StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_oop_copy  (false, "oop_disjoint_arraycopy", false);
1586     StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy  (false, "oop_disjoint_arraycopy_uninit", true);
1587 
1588     StubRoutines::_arrayof_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy (true, "arrayof_jbyte_disjoint_arraycopy");
1589     StubRoutines::_arrayof_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
1590     StubRoutines::_arrayof_jint_disjoint_arraycopy       = generate_disjoint_int_copy  (true, "arrayof_jint_disjoint_arraycopy");
1591     StubRoutines::_arrayof_jlong_disjoint_arraycopy      = generate_disjoint_long_copy (true, "arrayof_jlong_disjoint_arraycopy");
1592     StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy  (true, "arrayof_oop_disjoint_arraycopy", false);
1593     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy  (true, "arrayof_oop_disjoint_arraycopy_uninit", true);
1594 
1595     StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy (false, "jbyte_arraycopy");
1596     StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, "jshort_arraycopy");
1597     StubRoutines::_jint_arraycopy            = generate_conjoint_int_copy  (false, "jint_arraycopy");
1598     StubRoutines::_jlong_arraycopy           = generate_conjoint_long_copy (false, "jlong_arraycopy");
1599     StubRoutines::_oop_arraycopy             = generate_conjoint_oop_copy  (false, "oop_arraycopy", false);
1600     StubRoutines::_oop_arraycopy_uninit      = generate_conjoint_oop_copy  (false, "oop_arraycopy_uninit", true);
1601 
1602     StubRoutines::_arrayof_jbyte_arraycopy      = generate_conjoint_byte_copy (true, "arrayof_jbyte_arraycopy");
1603     StubRoutines::_arrayof_jshort_arraycopy     = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
1604     StubRoutines::_arrayof_jint_arraycopy       = generate_conjoint_int_copy  (true, "arrayof_jint_arraycopy");
1605     StubRoutines::_arrayof_jlong_arraycopy      = generate_conjoint_long_copy (true, "arrayof_jlong_arraycopy");
1606     StubRoutines::_arrayof_oop_arraycopy        = generate_conjoint_oop_copy  (true, "arrayof_oop_arraycopy", false);
1607     StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy  (true, "arrayof_oop_arraycopy_uninit", true);
1608   }
1609 
1610   void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) {
1611 
1612     // safefetch signatures:
1613     //   int      SafeFetch32(int*      adr, int      errValue);
1614     //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
1615     //
1616     // arguments:
1617     //   Z_ARG1 = adr
1618     //   Z_ARG2 = errValue
1619     //
1620     // result:
1621     //   Z_RET  = *adr or errValue
1622 
1623     StubCodeMark mark(this, "StubRoutines", name);
1624 
1625     // entry point
1626     // Load *adr into Z_ARG2, may fault.
1627     *entry = *fault_pc = __ pc();
1628     switch (size) {
1629       case 4:
1630         // Sign extended int32_t.
1631         __ z_lgf(Z_ARG2, 0, Z_ARG1);
1632         break;
1633       case 8:
1634         // int64_t
1635         __ z_lg(Z_ARG2, 0, Z_ARG1);
1636         break;
1637       default:
1638         ShouldNotReachHere();
1639     }
1640 
1641     // Return errValue or *adr.
1642     *continuation_pc = __ pc();
1643     __ z_lgr(Z_RET, Z_ARG2);
1644     __ z_br(Z_R14);
1645 
1646   }
1647 
1648   // Call interface for AES_encryptBlock, AES_decryptBlock stubs.
1649   //
1650   //   Z_ARG1 - source data block. Ptr to leftmost byte to be processed.
1651   //   Z_ARG2 - destination data block. Ptr to leftmost byte to be stored.
1652   //            For in-place encryption/decryption, ARG1 and ARG2 can point
1653   //            to the same piece of storage.
1654   //   Z_ARG3 - Crypto key address (expanded key). The first n bits of
1655   //            the expanded key constitute the original AES-<n> key (see below).
1656   //
1657   //   Z_RET  - return value. First unprocessed byte offset in src buffer.
1658   //
1659   // Some remarks:
1660   //   The crypto key, as passed from the caller to these encryption stubs,
1661   //   is a so-called expanded key. It is derived from the original key
1662   //   by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule
1663   //   With the expanded key, the cipher/decipher task is decomposed in
1664   //   multiple, less complex steps, called rounds. Sun SPARC and Intel
1665   //   processors obviously implement support for those less complex steps.
1666   //   z/Architecture provides instructions for full cipher/decipher complexity.
1667   //   Therefore, we need the original, not the expanded key here.
1668   //   Luckily, the first n bits of an AES-<n> expanded key are formed
1669   //   by the original key itself. That takes us out of trouble. :-)
1670   //   The key length (in bytes) relation is as follows:
1671   //     original    expanded   rounds  key bit     keylen
1672   //    key bytes   key bytes            length   in words
1673   //           16         176       11      128         44
1674   //           24         208       13      192         52
1675   //           32         240       15      256         60
1676   //
1677   // The crypto instructions used in the AES* stubs have some specific register requirements.
1678   //   Z_R0   holds the crypto function code. Please refer to the KM/KMC instruction
1679   //          description in the "z/Architecture Principles of Operation" manual for details.
1680   //   Z_R1   holds the parameter block address. The parameter block contains the cryptographic key
1681   //          (KM instruction) and the chaining value (KMC instruction).
1682   //   dst    must designate an even-numbered register, holding the address of the output message.
1683   //   src    must designate an even/odd register pair, holding the address/length of the original message
1684 
1685   // Helper function which generates code to
1686   //  - load the function code in register fCode (== Z_R0)
1687   //  - load the data block length (depends on cipher function) into register srclen if requested.
1688   //  - is_decipher switches between cipher/decipher function codes
1689   //  - set_len requests (if true) loading the data block length in register srclen
1690   void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) {
1691 
1692     BLOCK_COMMENT("Set fCode {"); {
1693       Label fCode_set;
1694       int   mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher;
1695       bool  identical_dataBlk_len =  (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk)
1696                                   && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk);
1697       // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256.
1698       __ z_cghi(keylen, 52);
1699 
1700       __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode);
1701       if (!identical_dataBlk_len) {
1702         __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk);
1703       }
1704       __ z_brh(fCode_set);  // keyLen >  52: AES256
1705 
1706       __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode);
1707       if (!identical_dataBlk_len) {
1708         __ z_lghi(srclen, VM_Version::Cipher::_AES192_dataBlk);
1709       }
1710       __ z_bre(fCode_set);  // keyLen == 52: AES192
1711 
1712       __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode);
1713       if (!identical_dataBlk_len) {
1714         __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk);
1715       }
1716       // __ z_brl(fCode_set);  // keyLen <  52: AES128           // fallthru
1717 
1718       __ bind(fCode_set);
1719       if (identical_dataBlk_len) {
1720         __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk);
1721       }
1722     }
1723     BLOCK_COMMENT("} Set fCode");
1724   }
1725 
1726   // Push a parameter block for the cipher/decipher instruction on the stack.
1727   // NOTE:
1728   //   Before returning, the stub has to copy the chaining value from
1729   //   the parmBlk, where it was updated by the crypto instruction, back
1730   //   to the chaining value array the address of which was passed in the cv argument.
1731   //   As all the available registers are used and modified by KMC, we need to save
1732   //   the key length across the KMC instruction. We do so by spilling it to the stack,
1733   //   just preceding the parmBlk (at (parmBlk - 8)).
1734   void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) {
1735     const int AES_parmBlk_align    = 32;
1736     const int AES_parmBlk_addspace = AES_parmBlk_align; // Must be multiple of AES_parmblk_align.
1737     int       cv_len, key_len;
1738     int       mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher;
1739     Label     parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set;
1740 
1741     BLOCK_COMMENT("push parmBlk {");
1742     if (VM_Version::has_Crypto_AES()   ) { __ z_cghi(keylen, 52); }
1743     if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); }  // keyLen >  52: AES256
1744     if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); }  // keyLen == 52: AES192
1745     if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); }  // keyLen <  52: AES128
1746 
1747     // Security net: requested AES function not available on this CPU.
1748     // NOTE:
1749     //   As of now (March 2015), this safety net is not required. JCE policy files limit the
1750     //   cryptographic strength of the keys used to 128 bit. If we have AES hardware support
1751     //   at all, we have at least AES-128.
1752     __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0);
1753 
1754     if (VM_Version::has_Crypto_AES128()) {
1755       __ bind(parmBlk_128);
1756       cv_len  = VM_Version::Cipher::_AES128_dataBlk;
1757       key_len = VM_Version::Cipher::_AES128_parmBlk_C - cv_len;
1758       __ z_lay(parmBlk, -(VM_Version::Cipher::_AES128_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP);
1759       __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff);  // align parameter block
1760 
1761       // Resize the frame to accommodate for the aligned parameter block and other stuff.
1762       // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk).
1763       __ z_stg(keylen, -8, parmBlk);                   // Spill keylen for later use.
1764       __ z_stg(Z_SP,  -16, parmBlk);                   // Spill SP for easy revert.
1765       __ z_aghi(parmBlk, -AES_parmBlk_addspace);       // Additional space for keylen, etc..
1766       __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP.
1767       __ z_aghi(parmBlk,  AES_parmBlk_addspace);       // Restore parameter block address.
1768 
1769       __ z_mvc(0,      cv_len-1,  parmBlk, 0, cv);     // Copy cv.
1770       __ z_mvc(cv_len, key_len-1, parmBlk, 0, key);    // Copy key.
1771       __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode);
1772       if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) {
1773         __ z_bru(parmBlk_set);  // Fallthru otherwise.
1774       }
1775     }
1776 
1777     if (VM_Version::has_Crypto_AES192()) {
1778       __ bind(parmBlk_192);
1779       cv_len  = VM_Version::Cipher::_AES192_dataBlk;
1780       key_len = VM_Version::Cipher::_AES192_parmBlk_C - cv_len;
1781       __ z_lay(parmBlk, -(VM_Version::Cipher::_AES192_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP);
1782       __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff);  // Align parameter block.
1783 
1784       // Resize the frame to accommodate for the aligned parameter block and other stuff.
1785       // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk).
1786       __ z_stg(keylen, -8, parmBlk);                   // Spill keylen for later use.
1787       __ z_stg(Z_SP,  -16, parmBlk);                   // Spill SP for easy revert.
1788       __ z_aghi(parmBlk, -AES_parmBlk_addspace);       // Additional space for keylen, etc..
1789       __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP.
1790       __ z_aghi(parmBlk, AES_parmBlk_addspace);        // Restore parameter block address.
1791 
1792       __ z_mvc(0,      cv_len-1,  parmBlk, 0, cv);     // Copy cv.
1793       __ z_mvc(cv_len, key_len-1, parmBlk, 0, key);    // Copy key.
1794       __ z_lghi(fCode,    VM_Version::Cipher::_AES192 + mode);
1795       if (VM_Version::has_Crypto_AES256()) {
1796         __ z_bru(parmBlk_set);  // Fallthru otherwise.
1797       }
1798     }
1799 
1800     if (VM_Version::has_Crypto_AES256()) {
1801       __ bind(parmBlk_256);
1802       cv_len  = VM_Version::Cipher::_AES256_dataBlk;
1803       key_len = VM_Version::Cipher::_AES256_parmBlk_C - cv_len;
1804       __ z_lay(parmBlk, -(VM_Version::Cipher::_AES256_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP);
1805       __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff);  // Align parameter block.
1806 
1807       // Resize the frame to accommodate for the aligned parameter block and other stuff.
1808       // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk).
1809       __ z_stg(keylen, -8, parmBlk);                   // Spill keylen for later use.
1810       __ z_stg(Z_SP,  -16, parmBlk);                   // Spill SP for easy revert.
1811       __ z_aghi(parmBlk, -AES_parmBlk_addspace);       // Additional space for keylen, etc..
1812       __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP.
1813       __ z_aghi(parmBlk,  AES_parmBlk_addspace);       // Restore parameter block address.
1814 
1815       __ z_mvc(0,      cv_len-1,  parmBlk, 0, cv);     // Copy cv.
1816       __ z_mvc(cv_len, key_len-1, parmBlk, 0, key);    // Copy key.
1817       __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode);
1818       // __ z_bru(parmBlk_set);  // fallthru
1819     }
1820 
1821     __ bind(parmBlk_set);
1822     BLOCK_COMMENT("} push parmBlk");
1823   }
1824 
1825   // Pop a parameter block from the stack. The chaining value portion of the parameter block
1826   // is copied back to the cv array as it is needed for subsequent cipher steps.
1827   // The keylen value as well as the original SP (before resizing) was pushed to the stack
1828   // when pushing the parameter block.
1829   void generate_pop_parmBlk(Register keylen, Register parmBlk, Register key, Register cv) {
1830 
1831     BLOCK_COMMENT("pop parmBlk {");
1832     bool identical_dataBlk_len =  (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) &&
1833                                   (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk);
1834     if (identical_dataBlk_len) {
1835       int cv_len = VM_Version::Cipher::_AES128_dataBlk;
1836       __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
1837     } else {
1838       int cv_len;
1839       Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set;
1840       __ z_lg(keylen, -8, parmBlk);  // restore keylen
1841       __ z_cghi(keylen, 52);
1842       if (VM_Version::has_Crypto_AES256()) __ z_brh(parmBlk_256);  // keyLen >  52: AES256
1843       if (VM_Version::has_Crypto_AES192()) __ z_bre(parmBlk_192);  // keyLen == 52: AES192
1844       // if (VM_Version::has_Crypto_AES128()) __ z_brl(parmBlk_128);  // keyLen <  52: AES128  // fallthru
1845 
1846       // Security net: there is no one here. If we would need it, we should have
1847       // fallen into it already when pushing the parameter block.
1848       if (VM_Version::has_Crypto_AES128()) {
1849         __ bind(parmBlk_128);
1850         cv_len = VM_Version::Cipher::_AES128_dataBlk;
1851         __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
1852         if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) {
1853           __ z_bru(parmBlk_set);
1854         }
1855       }
1856 
1857       if (VM_Version::has_Crypto_AES192()) {
1858         __ bind(parmBlk_192);
1859         cv_len = VM_Version::Cipher::_AES192_dataBlk;
1860         __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
1861         if (VM_Version::has_Crypto_AES256()) {
1862           __ z_bru(parmBlk_set);
1863         }
1864       }
1865 
1866       if (VM_Version::has_Crypto_AES256()) {
1867         __ bind(parmBlk_256);
1868         cv_len = VM_Version::Cipher::_AES256_dataBlk;
1869         __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
1870         // __ z_bru(parmBlk_set);  // fallthru
1871       }
1872       __ bind(parmBlk_set);
1873     }
1874     __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute.
1875     BLOCK_COMMENT("} pop parmBlk");
1876   }
1877 
1878   // Compute AES encrypt function.
1879   address generate_AES_encryptBlock(const char* name) {
1880     __ align(CodeEntryAlignment);
1881     StubCodeMark mark(this, "StubRoutines", name);
1882     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1883 
1884     Register       from    = Z_ARG1; // source byte array
1885     Register       to      = Z_ARG2; // destination byte array
1886     Register       key     = Z_ARG3; // expanded key array
1887 
1888     const Register keylen  = Z_R0;   // Temporarily (until fCode is set) holds the expanded key array length.
1889     const Register fCode   = Z_R0;   // crypto function code
1890     const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
1891     const Register src     = Z_ARG1; // is Z_R2
1892     const Register srclen  = Z_ARG2; // Overwrites destination address.
1893     const Register dst     = Z_ARG3; // Overwrites expanded key address.
1894 
1895     // Read key len of expanded key (in 4-byte words).
1896     __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
1897 
1898     // Copy arguments to registers as required by crypto instruction.
1899     __ z_lgr(parmBlk, key);          // crypto key (in T_INT array).
1900     // __ z_lgr(src, from);          // Copy not needed, src/from are identical.
1901     __ z_lgr(dst, to);               // Copy destination address to even register.
1902 
1903     // Construct function code in Z_R0, data block length in Z_ARG2.
1904     generate_load_AES_fCode(keylen, fCode, srclen, false);
1905 
1906     __ km(dst, src);          // Cipher the message.
1907 
1908     __ z_br(Z_R14);
1909 
1910     return __ addr_at(start_off);
1911   }
1912 
1913   // Compute AES decrypt function.
1914   address generate_AES_decryptBlock(const char* name) {
1915     __ align(CodeEntryAlignment);
1916     StubCodeMark mark(this, "StubRoutines", name);
1917     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1918 
1919     Register       from    = Z_ARG1; // source byte array
1920     Register       to      = Z_ARG2; // destination byte array
1921     Register       key     = Z_ARG3; // expanded key array, not preset at entry!!!
1922 
1923     const Register keylen  = Z_R0;   // Temporarily (until fCode is set) holds the expanded key array length.
1924     const Register fCode   = Z_R0;   // crypto function code
1925     const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
1926     const Register src     = Z_ARG1; // is Z_R2
1927     const Register srclen  = Z_ARG2; // Overwrites destination address.
1928     const Register dst     = Z_ARG3; // Overwrites key address.
1929 
1930     // Read key len of expanded key (in 4-byte words).
1931     __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
1932 
1933     // Copy arguments to registers as required by crypto instruction.
1934     __ z_lgr(parmBlk, key);     // Copy crypto key address.
1935     // __ z_lgr(src, from);     // Copy not needed, src/from are identical.
1936     __ z_lgr(dst, to);          // Copy destination address to even register.
1937 
1938     // Construct function code in Z_R0, data block length in Z_ARG2.
1939     generate_load_AES_fCode(keylen, fCode, srclen, true);
1940 
1941     __ km(dst, src);          // Cipher the message.
1942 
1943     __ z_br(Z_R14);
1944 
1945     return __ addr_at(start_off);
1946   }
1947 
1948   // These stubs receive the addresses of the cryptographic key and of the chaining value as two separate
1949   // arguments (registers "key" and "cv", respectively). The KMC instruction, on the other hand, requires
1950   // chaining value and key to be, in this sequence, adjacent in storage. Thus, we need to allocate some
1951   // thread-local working storage. Using heap memory incurs all the hassles of allocating/freeing.
1952   // Stack space, on the contrary, is deallocated automatically when we return from the stub to the caller.
1953   // *** WARNING ***
1954   // Please note that we do not formally allocate stack space, nor do we
1955   // update the stack pointer. Therefore, no function calls are allowed
1956   // and nobody else must use the stack range where the parameter block
1957   // is located.
1958   // We align the parameter block to the next available octoword.
1959   //
1960   // Compute chained AES encrypt function.
1961   address generate_cipherBlockChaining_AES_encrypt(const char* name) {
1962     __ align(CodeEntryAlignment);
1963     StubCodeMark mark(this, "StubRoutines", name);
1964     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
1965 
1966     Register       from    = Z_ARG1; // source byte array (clear text)
1967     Register       to      = Z_ARG2; // destination byte array (ciphered)
1968     Register       key     = Z_ARG3; // expanded key array.
1969     Register       cv      = Z_ARG4; // chaining value
1970     const Register msglen  = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned
1971                                      // in Z_RET upon completion of this stub. Is 32-bit integer.
1972 
1973     const Register keylen  = Z_R0;   // Expanded key length, as read from key array. Temp only.
1974     const Register fCode   = Z_R0;   // crypto function code
1975     const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
1976     const Register src     = Z_ARG1; // is Z_R2
1977     const Register srclen  = Z_ARG2; // Overwrites destination address.
1978     const Register dst     = Z_ARG3; // Overwrites key address.
1979 
1980     // Read key len of expanded key (in 4-byte words).
1981     __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
1982 
1983     // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block.
1984     // Construct function code in Z_R0.
1985     generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, false);
1986 
1987     // Prepare other registers for instruction.
1988     // __ z_lgr(src, from);     // Not needed, registers are the same.
1989     __ z_lgr(dst, to);
1990     __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required.
1991 
1992     __ kmc(dst, src);           // Cipher the message.
1993 
1994     generate_pop_parmBlk(keylen, parmBlk, key, cv);
1995 
1996     __ z_llgfr(Z_RET, msglen);  // We pass the offsets as ints, not as longs as required.
1997     __ z_br(Z_R14);
1998 
1999     return __ addr_at(start_off);
2000   }
2001 
2002   // Compute chained AES encrypt function.
2003   address generate_cipherBlockChaining_AES_decrypt(const char* name) {
2004     __ align(CodeEntryAlignment);
2005     StubCodeMark mark(this, "StubRoutines", name);
2006     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
2007 
2008     Register       from    = Z_ARG1; // source byte array (ciphered)
2009     Register       to      = Z_ARG2; // destination byte array (clear text)
2010     Register       key     = Z_ARG3; // expanded key array, not preset at entry!!!
2011     Register       cv      = Z_ARG4; // chaining value
2012     const Register msglen  = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned
2013                                      // in Z_RET upon completion of this stub.
2014 
2015     const Register keylen  = Z_R0;   // Expanded key length, as read from key array. Temp only.
2016     const Register fCode   = Z_R0;   // crypto function code
2017     const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
2018     const Register src     = Z_ARG1; // is Z_R2
2019     const Register srclen  = Z_ARG2; // Overwrites destination address.
2020     const Register dst     = Z_ARG3; // Overwrites key address.
2021 
2022     // Read key len of expanded key (in 4-byte words).
2023     __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2024 
2025     // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block.
2026     // Construct function code in Z_R0.
2027     generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, true);
2028 
2029     // Prepare other registers for instruction.
2030     // __ z_lgr(src, from);     // Not needed, registers are the same.
2031     __ z_lgr(dst, to);
2032     __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required.
2033 
2034     __ kmc(dst, src);           // Decipher the message.
2035 
2036     generate_pop_parmBlk(keylen, parmBlk, key, cv);
2037 
2038     __ z_llgfr(Z_RET, msglen);  // We pass the offsets as ints, not as longs as required.
2039     __ z_br(Z_R14);
2040 
2041     return __ addr_at(start_off);
2042   }
2043 
2044 
2045   // Call interface for all SHA* stubs.
2046   //
2047   //   Z_ARG1 - source data block. Ptr to leftmost byte to be processed.
2048   //   Z_ARG2 - current SHA state. Ptr to state area. This area serves as
2049   //            parameter block as required by the crypto instruction.
2050   //   Z_ARG3 - current byte offset in source data block.
2051   //   Z_ARG4 - last byte offset in source data block.
2052   //            (Z_ARG4 - Z_ARG3) gives the #bytes remaining to be processed.
2053   //
2054   //   Z_RET  - return value. First unprocessed byte offset in src buffer.
2055   //
2056   //   A few notes on the call interface:
2057   //    - All stubs, whether they are single-block or multi-block, are assumed to
2058   //      digest an integer multiple of the data block length of data. All data
2059   //      blocks are digested using the intermediate message digest (KIMD) instruction.
2060   //      Special end processing, as done by the KLMD instruction, seems to be
2061   //      emulated by the calling code.
2062   //
2063   //    - Z_ARG1 addresses the first byte of source data. The offset (Z_ARG3) is
2064   //      already accounted for.
2065   //
2066   //    - The current SHA state (the intermediate message digest value) is contained
2067   //      in an area addressed by Z_ARG2. The area size depends on the SHA variant
2068   //      and is accessible via the enum VM_Version::MsgDigest::_SHA<n>_parmBlk_I
2069   //
2070   //    - The single-block stub is expected to digest exactly one data block, starting
2071   //      at the address passed in Z_ARG1.
2072   //
2073   //    - The multi-block stub is expected to digest all data blocks which start in
2074   //      the offset interval [srcOff(Z_ARG3), srcLimit(Z_ARG4)). The exact difference
2075   //      (srcLimit-srcOff), rounded up to the next multiple of the data block length,
2076   //      gives the number of blocks to digest. It must be assumed that the calling code
2077   //      provides for a large enough source data buffer.
2078   //
2079   // Compute SHA-1 function.
2080   address generate_SHA1_stub(bool multiBlock, const char* name) {
2081     __ align(CodeEntryAlignment);
2082     StubCodeMark mark(this, "StubRoutines", name);
2083     unsigned int start_off = __ offset();   // Remember stub start address (is rtn value).
2084 
2085     const Register srcBuff        = Z_ARG1; // Points to first block to process (offset already added).
2086     const Register SHAState       = Z_ARG2; // Only on entry. Reused soon thereafter for kimd register pairs.
2087     const Register srcOff         = Z_ARG3; // int
2088     const Register srcLimit       = Z_ARG4; // Only passed in multiBlock case. int
2089 
2090     const Register SHAState_local = Z_R1;
2091     const Register SHAState_save  = Z_ARG3;
2092     const Register srcBufLen      = Z_ARG2; // Destroys state address, must be copied before.
2093     Label useKLMD, rtn;
2094 
2095     __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA1);   // function code
2096     __ z_lgr(SHAState_local, SHAState);                                 // SHAState == parameter block
2097 
2098     if (multiBlock) {  // Process everything from offset to limit.
2099 
2100       // The following description is valid if we get a raw (unpimped) source data buffer,
2101       // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
2102       // the calling convention for these stubs is different. We leave the description in
2103       // to inform the reader what must be happening hidden in the calling code.
2104       //
2105       // The data block to be processed can have arbitrary length, i.e. its length does not
2106       // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
2107       // two different paths. If the length is an integer multiple, we use KIMD, saving us
2108       // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
2109       // to the stack, execute a KLMD instruction on it and copy the result back to the
2110       // caller's SHA state location.
2111 
2112       // Total #srcBuff blocks to process.
2113       if (VM_Version::has_DistinctOpnds()) {
2114         __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference
2115         __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);   // round up
2116         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff);
2117         __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value.
2118         __ z_llgfr(srcBufLen, srcBufLen);      // Cast to 64-bit.
2119       } else {
2120         __ z_lgfr(srcBufLen, srcLimit);        // Exact difference. srcLimit passed as int.
2121         __ z_sgfr(srcBufLen, srcOff);          // SrcOff passed as int, now properly casted to long.
2122         __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);   // round up
2123         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff);
2124         __ z_lgr(srcLimit, srcOff);            // SrcLimit temporarily holds return value.
2125         __ z_agr(srcLimit, srcBufLen);
2126       }
2127 
2128       // Integral #blocks to digest?
2129       // As a result of the calculations above, srcBufLen MUST be an integer
2130       // multiple of _SHA1_dataBlk, or else we are in big trouble.
2131       // We insert an asm_assert into the KLMD case to guard against that.
2132       __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);
2133       __ z_brc(Assembler::bcondNotAllZero, useKLMD);
2134 
2135       // Process all full blocks.
2136       __ kimd(srcBuff);
2137 
2138       __ z_lgr(Z_RET, srcLimit);  // Offset of first unprocessed byte in buffer.
2139     } else {  // Process one data block only.
2140       __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA1_dataBlk);   // #srcBuff bytes to process
2141       __ kimd(srcBuff);
2142       __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA1_dataBlk, srcOff);            // Offset of first unprocessed byte in buffer. No 32 to 64 bit extension needed.
2143     }
2144 
2145     __ bind(rtn);
2146     __ z_br(Z_R14);
2147 
2148     if (multiBlock) {
2149       __ bind(useKLMD);
2150 
2151 #if 1
2152       // Security net: this stub is believed to be called for full-sized data blocks only
2153       // NOTE: The following code is believed to be correct, but is is not tested.
2154       __ stop_static("SHA128 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
2155 #endif
2156     }
2157 
2158     return __ addr_at(start_off);
2159   }
2160 
2161   // Compute SHA-256 function.
2162   address generate_SHA256_stub(bool multiBlock, const char* name) {
2163     __ align(CodeEntryAlignment);
2164     StubCodeMark mark(this, "StubRoutines", name);
2165     unsigned int start_off = __ offset();   // Remember stub start address (is rtn value).
2166 
2167     const Register srcBuff        = Z_ARG1;
2168     const Register SHAState       = Z_ARG2; // Only on entry. Reused soon thereafter.
2169     const Register SHAState_local = Z_R1;
2170     const Register SHAState_save  = Z_ARG3;
2171     const Register srcOff         = Z_ARG3;
2172     const Register srcLimit       = Z_ARG4;
2173     const Register srcBufLen      = Z_ARG2; // Destroys state address, must be copied before.
2174     Label useKLMD, rtn;
2175 
2176     __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA256); // function code
2177     __ z_lgr(SHAState_local, SHAState);                                 // SHAState == parameter block
2178 
2179     if (multiBlock) {  // Process everything from offset to limit.
2180       // The following description is valid if we get a raw (unpimped) source data buffer,
2181       // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
2182       // the calling convention for these stubs is different. We leave the description in
2183       // to inform the reader what must be happening hidden in the calling code.
2184       //
2185       // The data block to be processed can have arbitrary length, i.e. its length does not
2186       // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
2187       // two different paths. If the length is an integer multiple, we use KIMD, saving us
2188       // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
2189       // to the stack, execute a KLMD instruction on it and copy the result back to the
2190       // caller's SHA state location.
2191 
2192       // total #srcBuff blocks to process
2193       if (VM_Version::has_DistinctOpnds()) {
2194         __ z_srk(srcBufLen, srcLimit, srcOff);   // exact difference
2195         __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up
2196         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff);
2197         __ z_ark(srcLimit, srcOff, srcBufLen);   // Srclimit temporarily holds return value.
2198         __ z_llgfr(srcBufLen, srcBufLen);        // Cast to 64-bit.
2199       } else {
2200         __ z_lgfr(srcBufLen, srcLimit);          // exact difference
2201         __ z_sgfr(srcBufLen, srcOff);
2202         __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up
2203         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff);
2204         __ z_lgr(srcLimit, srcOff);              // Srclimit temporarily holds return value.
2205         __ z_agr(srcLimit, srcBufLen);
2206       }
2207 
2208       // Integral #blocks to digest?
2209       // As a result of the calculations above, srcBufLen MUST be an integer
2210       // multiple of _SHA1_dataBlk, or else we are in big trouble.
2211       // We insert an asm_assert into the KLMD case to guard against that.
2212       __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1);
2213       __ z_brc(Assembler::bcondNotAllZero, useKLMD);
2214 
2215       // Process all full blocks.
2216       __ kimd(srcBuff);
2217 
2218       __ z_lgr(Z_RET, srcLimit);  // Offset of first unprocessed byte in buffer.
2219     } else {  // Process one data block only.
2220       __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA256_dataBlk); // #srcBuff bytes to process
2221       __ kimd(srcBuff);
2222       __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA256_dataBlk, srcOff);          // Offset of first unprocessed byte in buffer.
2223     }
2224 
2225     __ bind(rtn);
2226     __ z_br(Z_R14);
2227 
2228     if (multiBlock) {
2229       __ bind(useKLMD);
2230 #if 1
2231       // Security net: this stub is believed to be called for full-sized data blocks only.
2232       // NOTE:
2233       //   The following code is believed to be correct, but is is not tested.
2234       __ stop_static("SHA256 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
2235 #endif
2236     }
2237 
2238     return __ addr_at(start_off);
2239   }
2240 
2241   // Compute SHA-512 function.
2242   address generate_SHA512_stub(bool multiBlock, const char* name) {
2243     __ align(CodeEntryAlignment);
2244     StubCodeMark mark(this, "StubRoutines", name);
2245     unsigned int start_off = __ offset();   // Remember stub start address (is rtn value).
2246 
2247     const Register srcBuff        = Z_ARG1;
2248     const Register SHAState       = Z_ARG2; // Only on entry. Reused soon thereafter.
2249     const Register SHAState_local = Z_R1;
2250     const Register SHAState_save  = Z_ARG3;
2251     const Register srcOff         = Z_ARG3;
2252     const Register srcLimit       = Z_ARG4;
2253     const Register srcBufLen      = Z_ARG2; // Destroys state address, must be copied before.
2254     Label useKLMD, rtn;
2255 
2256     __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA512); // function code
2257     __ z_lgr(SHAState_local, SHAState);                                 // SHAState == parameter block
2258 
2259     if (multiBlock) {  // Process everything from offset to limit.
2260       // The following description is valid if we get a raw (unpimped) source data buffer,
2261       // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
2262       // the calling convention for these stubs is different. We leave the description in
2263       // to inform the reader what must be happening hidden in the calling code.
2264       //
2265       // The data block to be processed can have arbitrary length, i.e. its length does not
2266       // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
2267       // two different paths. If the length is an integer multiple, we use KIMD, saving us
2268       // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
2269       // to the stack, execute a KLMD instruction on it and copy the result back to the
2270       // caller's SHA state location.
2271 
2272       // total #srcBuff blocks to process
2273       if (VM_Version::has_DistinctOpnds()) {
2274         __ z_srk(srcBufLen, srcLimit, srcOff);   // exact difference
2275         __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up
2276         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff);
2277         __ z_ark(srcLimit, srcOff, srcBufLen);   // Srclimit temporarily holds return value.
2278         __ z_llgfr(srcBufLen, srcBufLen);        // Cast to 64-bit.
2279       } else {
2280         __ z_lgfr(srcBufLen, srcLimit);          // exact difference
2281         __ z_sgfr(srcBufLen, srcOff);
2282         __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up
2283         __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff);
2284         __ z_lgr(srcLimit, srcOff);              // Srclimit temporarily holds return value.
2285         __ z_agr(srcLimit, srcBufLen);
2286       }
2287 
2288       // integral #blocks to digest?
2289       // As a result of the calculations above, srcBufLen MUST be an integer
2290       // multiple of _SHA1_dataBlk, or else we are in big trouble.
2291       // We insert an asm_assert into the KLMD case to guard against that.
2292       __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1);
2293       __ z_brc(Assembler::bcondNotAllZero, useKLMD);
2294 
2295       // Process all full blocks.
2296       __ kimd(srcBuff);
2297 
2298       __ z_lgr(Z_RET, srcLimit);  // Offset of first unprocessed byte in buffer.
2299     } else {  // Process one data block only.
2300       __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA512_dataBlk); // #srcBuff bytes to process
2301       __ kimd(srcBuff);
2302       __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA512_dataBlk, srcOff);          // Offset of first unprocessed byte in buffer.
2303     }
2304 
2305     __ bind(rtn);
2306     __ z_br(Z_R14);
2307 
2308     if (multiBlock) {
2309       __ bind(useKLMD);
2310 #if 1
2311       // Security net: this stub is believed to be called for full-sized data blocks only
2312       // NOTE:
2313       //   The following code is believed to be correct, but is is not tested.
2314       __ stop_static("SHA512 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
2315 #endif
2316     }
2317 
2318     return __ addr_at(start_off);
2319   }
2320 
2321 
2322   /**
2323    *  Arguments:
2324    *
2325    * Inputs:
2326    *   Z_ARG1    - int   crc
2327    *   Z_ARG2    - byte* buf
2328    *   Z_ARG3    - int   length (of buffer)
2329    *
2330    * Result:
2331    *   Z_RET     - int   crc result
2332    **/
2333   // Compute CRC function (generic, for all polynomials).
2334   void generate_CRC_updateBytes(const char* name, Register table, bool invertCRC) {
2335 
2336     // arguments to kernel_crc32:
2337     Register       crc     = Z_ARG1;  // Current checksum, preset by caller or result from previous call, int.
2338     Register       data    = Z_ARG2;  // source byte array
2339     Register       dataLen = Z_ARG3;  // #bytes to process, int
2340 //    Register       table   = Z_ARG4;  // crc table address. Preloaded and passed in by caller.
2341     const Register t0      = Z_R10;   // work reg for kernel* emitters
2342     const Register t1      = Z_R11;   // work reg for kernel* emitters
2343     const Register t2      = Z_R12;   // work reg for kernel* emitters
2344     const Register t3      = Z_R13;   // work reg for kernel* emitters
2345 
2346     assert_different_registers(crc, data, dataLen, table);
2347 
2348     // We pass these values as ints, not as longs as required by C calling convention.
2349     // Crc used as int.
2350     __ z_llgfr(dataLen, dataLen);
2351 
2352     __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
2353     __ z_stmg(Z_R10, Z_R13, 1*8, Z_SP);  // Spill regs 10..11 to make them available as work registers.
2354     __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, invertCRC);
2355     __ z_lmg(Z_R10, Z_R13, 1*8, Z_SP);   // Spill regs 10..11 back from stack.
2356     __ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
2357 
2358     __ z_llgfr(Z_RET, crc);  // Updated crc is function result. No copying required, just zero upper 32 bits.
2359     __ z_br(Z_R14);          // Result already in Z_RET == Z_ARG1.
2360   }
2361 
2362 
2363   // Compute CRC32 function.
2364   address generate_CRC32_updateBytes(const char* name) {
2365     __ align(CodeEntryAlignment);
2366     StubCodeMark mark(this, "StubRoutines", name);
2367     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
2368 
2369     assert(UseCRC32Intrinsics, "should not generate this stub (%s) with CRC32 intrinsics disabled", name);
2370 
2371     BLOCK_COMMENT("CRC32_updateBytes {");
2372     Register       table   = Z_ARG4;  // crc32 table address.
2373     StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
2374 
2375     generate_CRC_updateBytes(name, table, true);
2376     BLOCK_COMMENT("} CRC32_updateBytes");
2377 
2378     return __ addr_at(start_off);
2379   }
2380 
2381 
2382   // Compute CRC32C function.
2383   address generate_CRC32C_updateBytes(const char* name) {
2384     __ align(CodeEntryAlignment);
2385     StubCodeMark mark(this, "StubRoutines", name);
2386     unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
2387 
2388     assert(UseCRC32CIntrinsics, "should not generate this stub (%s) with CRC32C intrinsics disabled", name);
2389 
2390     BLOCK_COMMENT("CRC32C_updateBytes {");
2391     Register       table   = Z_ARG4;  // crc32c table address.
2392     StubRoutines::zarch::generate_load_crc32c_table_addr(_masm, table);
2393 
2394     generate_CRC_updateBytes(name, table, false);
2395     BLOCK_COMMENT("} CRC32C_updateBytes");
2396 
2397     return __ addr_at(start_off);
2398   }
2399 
2400 
2401   // Arguments:
2402   //   Z_ARG1    - x address
2403   //   Z_ARG2    - x length
2404   //   Z_ARG3    - y address
2405   //   Z_ARG4    - y length
2406   //   Z_ARG5    - z address
2407   //   160[Z_SP] - z length
2408   address generate_multiplyToLen() {
2409     __ align(CodeEntryAlignment);
2410     StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
2411 
2412     address start = __ pc();
2413 
2414     const Register x    = Z_ARG1;
2415     const Register xlen = Z_ARG2;
2416     const Register y    = Z_ARG3;
2417     const Register ylen = Z_ARG4;
2418     const Register z    = Z_ARG5;
2419     // zlen is passed on the stack:
2420     // Address zlen(Z_SP, _z_abi(remaining_cargs));
2421 
2422     // Next registers will be saved on stack in multiply_to_len().
2423     const Register tmp1 = Z_tmp_1;
2424     const Register tmp2 = Z_tmp_2;
2425     const Register tmp3 = Z_tmp_3;
2426     const Register tmp4 = Z_tmp_4;
2427     const Register tmp5 = Z_R9;
2428 
2429     BLOCK_COMMENT("Entry:");
2430 
2431     __ z_llgfr(xlen, xlen);
2432     __ z_llgfr(ylen, ylen);
2433 
2434     __ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5);
2435 
2436     __ z_br(Z_R14);  // Return to caller.
2437 
2438     return start;
2439   }
2440 
2441   void generate_initial() {
2442     // Generates all stubs and initializes the entry points.
2443 
2444     // Entry points that exist in all platforms.
2445     // Note: This is code that could be shared among different
2446     // platforms - however the benefit seems to be smaller than the
2447     // disadvantage of having a much more complicated generator
2448     // structure. See also comment in stubRoutines.hpp.
2449     StubRoutines::_forward_exception_entry                 = generate_forward_exception();
2450 
2451     StubRoutines::_call_stub_entry                         = generate_call_stub(StubRoutines::_call_stub_return_address);
2452     StubRoutines::_catch_exception_entry                   = generate_catch_exception();
2453 
2454     // Build this early so it's available for the interpreter.
2455     StubRoutines::_throw_StackOverflowError_entry          =
2456       generate_throw_exception("StackOverflowError throw_exception",
2457                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
2458     StubRoutines::_throw_delayed_StackOverflowError_entry  =
2459       generate_throw_exception("delayed StackOverflowError throw_exception",
2460                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), false);
2461 
2462     //----------------------------------------------------------------------
2463     // Entry points that are platform specific.
2464 
2465     if (UseCRC32Intrinsics) {
2466       StubRoutines::_crc_table_adr     = (address)StubRoutines::zarch::_crc_table;
2467       StubRoutines::_updateBytesCRC32  = generate_CRC32_updateBytes("CRC32_updateBytes");
2468     }
2469 
2470     if (UseCRC32CIntrinsics) {
2471       StubRoutines::_crc32c_table_addr = (address)StubRoutines::zarch::_crc32c_table;
2472       StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes("CRC32C_updateBytes");
2473     }
2474 
2475     // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
2476     StubRoutines::zarch::_trot_table_addr = (address)StubRoutines::zarch::_trot_table;
2477   }
2478 
2479 
2480   void generate_all() {
2481     // Generates all stubs and initializes the entry points.
2482 
2483     StubRoutines::zarch::_partial_subtype_check            = generate_partial_subtype_check();
2484 
2485     // These entry points require SharedInfo::stack0 to be set up in non-core builds.
2486     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
2487     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
2488     StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
2489 
2490     // Support for verify_oop (must happen after universe_init).
2491     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop_subroutine();
2492 
2493     // Arraycopy stubs used by compilers.
2494     generate_arraycopy_stubs();
2495 
2496     // safefetch stubs
2497     generate_safefetch("SafeFetch32", sizeof(int),      &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc);
2498     generate_safefetch("SafeFetchN",  sizeof(intptr_t), &StubRoutines::_safefetchN_entry,  &StubRoutines::_safefetchN_fault_pc,  &StubRoutines::_safefetchN_continuation_pc);
2499 
2500     // Generate AES intrinsics code.
2501     if (UseAESIntrinsics) {
2502       StubRoutines::_aescrypt_encryptBlock = generate_AES_encryptBlock("AES_encryptBlock");
2503       StubRoutines::_aescrypt_decryptBlock = generate_AES_decryptBlock("AES_decryptBlock");
2504       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_AES_encrypt("AES_encryptBlock_chaining");
2505       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_AES_decrypt("AES_decryptBlock_chaining");
2506     }
2507 
2508     // Generate SHA1/SHA256/SHA512 intrinsics code.
2509     if (UseSHA1Intrinsics) {
2510       StubRoutines::_sha1_implCompress     = generate_SHA1_stub(false,   "SHA1_singleBlock");
2511       StubRoutines::_sha1_implCompressMB   = generate_SHA1_stub(true,    "SHA1_multiBlock");
2512     }
2513     if (UseSHA256Intrinsics) {
2514       StubRoutines::_sha256_implCompress   = generate_SHA256_stub(false, "SHA256_singleBlock");
2515       StubRoutines::_sha256_implCompressMB = generate_SHA256_stub(true,  "SHA256_multiBlock");
2516     }
2517     if (UseSHA512Intrinsics) {
2518       StubRoutines::_sha512_implCompress   = generate_SHA512_stub(false, "SHA512_singleBlock");
2519       StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(true,  "SHA512_multiBlock");
2520     }
2521 
2522 #ifdef COMPILER2
2523     if (UseMultiplyToLenIntrinsic) {
2524       StubRoutines::_multiplyToLen = generate_multiplyToLen();
2525     }
2526     if (UseMontgomeryMultiplyIntrinsic) {
2527       StubRoutines::_montgomeryMultiply
2528         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
2529     }
2530     if (UseMontgomerySquareIntrinsic) {
2531       StubRoutines::_montgomerySquare
2532         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
2533     }
2534 #endif
2535   }
2536 
2537  public:
2538   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2539     // Replace the standard masm with a special one:
2540     _masm = new MacroAssembler(code);
2541 
2542     _stub_count = !all ? 0x100 : 0x200;
2543     if (all) {
2544       generate_all();
2545     } else {
2546       generate_initial();
2547     }
2548   }
2549 
2550  private:
2551   int _stub_count;
2552   void stub_prolog(StubCodeDesc* cdesc) {
2553 #ifdef ASSERT
2554     // Put extra information in the stub code, to make it more readable.
2555     // Write the high part of the address.
2556     // [RGV] Check if there is a dependency on the size of this prolog.
2557     __ emit_32((intptr_t)cdesc >> 32);
2558     __ emit_32((intptr_t)cdesc);
2559     __ emit_32(++_stub_count);
2560 #endif
2561     align(true);
2562   }
2563 
2564   void align(bool at_header = false) {
2565     // z/Architecture cache line size is 256 bytes.
2566     // There is no obvious benefit in aligning stub
2567     // code to cache lines. Use CodeEntryAlignment instead.
2568     const unsigned int icache_line_size      = CodeEntryAlignment;
2569     const unsigned int icache_half_line_size = MIN2<unsigned int>(32, CodeEntryAlignment);
2570 
2571     if (at_header) {
2572       while ((intptr_t)(__ pc()) % icache_line_size != 0) {
2573         __ emit_16(0);
2574       }
2575     } else {
2576       while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
2577         __ z_nop();
2578       }
2579     }
2580   }
2581 
2582 };
2583 
2584 void StubGenerator_generate(CodeBuffer* code, bool all) {
2585   StubGenerator g(code, all);
2586 }