1 /* 2 * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2017, SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "registerSaver_s390.hpp" 29 #include "interpreter/interpreter.hpp" 30 #include "interpreter/interp_masm.hpp" 31 #include "nativeInst_s390.hpp" 32 #include "oops/instanceOop.hpp" 33 #include "oops/objArrayKlass.hpp" 34 #include "oops/oop.inline.hpp" 35 #include "prims/methodHandles.hpp" 36 #include "runtime/frame.inline.hpp" 37 #include "runtime/handles.inline.hpp" 38 #include "runtime/sharedRuntime.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/stubRoutines.hpp" 41 #include "runtime/thread.inline.hpp" 42 43 // Declaration and definition of StubGenerator (no .hpp file). 44 // For a more detailed description of the stub routine structure 45 // see the comment in stubRoutines.hpp. 46 47 #ifdef PRODUCT 48 #define __ _masm-> 49 #else 50 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> 51 #endif 52 53 #define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str) 54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 55 56 // ----------------------------------------------------------------------- 57 // Stub Code definitions 58 59 class StubGenerator: public StubCodeGenerator { 60 private: 61 62 //---------------------------------------------------------------------- 63 // Call stubs are used to call Java from C. 64 65 // 66 // Arguments: 67 // 68 // R2 - call wrapper address : address 69 // R3 - result : intptr_t* 70 // R4 - result type : BasicType 71 // R5 - method : method 72 // R6 - frame mgr entry point : address 73 // [SP+160] - parameter block : intptr_t* 74 // [SP+172] - parameter count in words : int 75 // [SP+176] - thread : Thread* 76 // 77 address generate_call_stub(address& return_address) { 78 // Set up a new C frame, copy Java arguments, call frame manager 79 // or native_entry, and process result. 80 81 StubCodeMark mark(this, "StubRoutines", "call_stub"); 82 address start = __ pc(); 83 84 Register r_arg_call_wrapper_addr = Z_ARG1; 85 Register r_arg_result_addr = Z_ARG2; 86 Register r_arg_result_type = Z_ARG3; 87 Register r_arg_method = Z_ARG4; 88 Register r_arg_entry = Z_ARG5; 89 90 // offsets to fp 91 #define d_arg_thread 176 92 #define d_arg_argument_addr 160 93 #define d_arg_argument_count 168+4 94 95 Register r_entryframe_fp = Z_tmp_1; 96 Register r_top_of_arguments_addr = Z_ARG4; 97 Register r_new_arg_entry = Z_R14; 98 99 // macros for frame offsets 100 #define call_wrapper_address_offset \ 101 _z_entry_frame_locals_neg(call_wrapper_address) 102 #define result_address_offset \ 103 _z_entry_frame_locals_neg(result_address) 104 #define result_type_offset \ 105 _z_entry_frame_locals_neg(result_type) 106 #define arguments_tos_address_offset \ 107 _z_entry_frame_locals_neg(arguments_tos_address) 108 109 { 110 // 111 // STACK on entry to call_stub: 112 // 113 // F1 [C_FRAME] 114 // ... 115 // 116 117 Register r_argument_addr = Z_tmp_3; 118 Register r_argumentcopy_addr = Z_tmp_4; 119 Register r_argument_size_in_bytes = Z_ARG5; 120 Register r_frame_size = Z_R1; 121 122 Label arguments_copied; 123 124 // Save non-volatile registers to ABI of caller frame. 125 BLOCK_COMMENT("save registers, push frame {"); 126 __ z_stmg(Z_R6, Z_R14, 16, Z_SP); 127 __ z_std(Z_F8, 96, Z_SP); 128 __ z_std(Z_F9, 104, Z_SP); 129 __ z_std(Z_F10, 112, Z_SP); 130 __ z_std(Z_F11, 120, Z_SP); 131 __ z_std(Z_F12, 128, Z_SP); 132 __ z_std(Z_F13, 136, Z_SP); 133 __ z_std(Z_F14, 144, Z_SP); 134 __ z_std(Z_F15, 152, Z_SP); 135 136 // 137 // Push ENTRY_FRAME including arguments: 138 // 139 // F0 [TOP_IJAVA_FRAME_ABI] 140 // [outgoing Java arguments] 141 // [ENTRY_FRAME_LOCALS] 142 // F1 [C_FRAME] 143 // ... 144 // 145 146 // Calculate new frame size and push frame. 147 #define abi_plus_locals_size \ 148 (frame::z_top_ijava_frame_abi_size + frame::z_entry_frame_locals_size) 149 if (abi_plus_locals_size % BytesPerWord == 0) { 150 // Preload constant part of frame size. 151 __ load_const_optimized(r_frame_size, -abi_plus_locals_size/BytesPerWord); 152 // Keep copy of our frame pointer (caller's SP). 153 __ z_lgr(r_entryframe_fp, Z_SP); 154 // Add space required by arguments to frame size. 155 __ z_slgf(r_frame_size, d_arg_argument_count, Z_R0, Z_SP); 156 // Move Z_ARG5 early, it will be used as a local. 157 __ z_lgr(r_new_arg_entry, r_arg_entry); 158 // Convert frame size from words to bytes. 159 __ z_sllg(r_frame_size, r_frame_size, LogBytesPerWord); 160 __ push_frame(r_frame_size, r_entryframe_fp, 161 false/*don't copy SP*/, true /*frame size sign inverted*/); 162 } else { 163 guarantee(false, "frame sizes should be multiples of word size (BytesPerWord)"); 164 } 165 BLOCK_COMMENT("} save, push"); 166 167 // Load argument registers for call. 168 BLOCK_COMMENT("prepare/copy arguments {"); 169 __ z_lgr(Z_method, r_arg_method); 170 __ z_lg(Z_thread, d_arg_thread, r_entryframe_fp); 171 172 // Calculate top_of_arguments_addr which will be tos (not prepushed) later. 173 // Wimply use SP + frame::top_ijava_frame_size. 174 __ add2reg(r_top_of_arguments_addr, 175 frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP); 176 177 // Initialize call_stub locals (step 1). 178 if ((call_wrapper_address_offset + BytesPerWord == result_address_offset) && 179 (result_address_offset + BytesPerWord == result_type_offset) && 180 (result_type_offset + BytesPerWord == arguments_tos_address_offset)) { 181 182 __ z_stmg(r_arg_call_wrapper_addr, r_top_of_arguments_addr, 183 call_wrapper_address_offset, r_entryframe_fp); 184 } else { 185 __ z_stg(r_arg_call_wrapper_addr, 186 call_wrapper_address_offset, r_entryframe_fp); 187 __ z_stg(r_arg_result_addr, 188 result_address_offset, r_entryframe_fp); 189 __ z_stg(r_arg_result_type, 190 result_type_offset, r_entryframe_fp); 191 __ z_stg(r_top_of_arguments_addr, 192 arguments_tos_address_offset, r_entryframe_fp); 193 } 194 195 // Copy Java arguments. 196 197 // Any arguments to copy? 198 __ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count)); 199 __ z_bre(arguments_copied); 200 201 // Prepare loop and copy arguments in reverse order. 202 { 203 // Calculate argument size in bytes. 204 __ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord); 205 206 // Get addr of first incoming Java argument. 207 __ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp); 208 209 // Let r_argumentcopy_addr point to last outgoing Java argument. 210 __ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively. 211 212 // Let r_argument_addr point to last incoming Java argument. 213 __ add2reg_with_index(r_argument_addr, -BytesPerWord, 214 r_argument_size_in_bytes, r_argument_addr); 215 216 // Now loop while Z_R1 > 0 and copy arguments. 217 { 218 Label next_argument; 219 __ bind(next_argument); 220 // Mem-mem move. 221 __ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr); 222 __ add2reg(r_argument_addr, -BytesPerWord); 223 __ add2reg(r_argumentcopy_addr, BytesPerWord); 224 __ z_brct(Z_R1, next_argument); 225 } 226 } // End of argument copy loop. 227 228 __ bind(arguments_copied); 229 } 230 BLOCK_COMMENT("} arguments"); 231 232 BLOCK_COMMENT("call {"); 233 { 234 // Call frame manager or native entry. 235 236 // 237 // Register state on entry to frame manager / native entry: 238 // 239 // Z_ARG1 = r_top_of_arguments_addr - intptr_t *sender tos (prepushed) 240 // Lesp = (SP) + copied_arguments_offset - 8 241 // Z_method - method 242 // Z_thread - JavaThread* 243 // 244 245 // Here, the usual SP is the initial_caller_sp. 246 __ z_lgr(Z_R10, Z_SP); 247 248 // Z_esp points to the slot below the last argument. 249 __ z_lgr(Z_esp, r_top_of_arguments_addr); 250 251 // 252 // Stack on entry to frame manager / native entry: 253 // 254 // F0 [TOP_IJAVA_FRAME_ABI] 255 // [outgoing Java arguments] 256 // [ENTRY_FRAME_LOCALS] 257 // F1 [C_FRAME] 258 // ... 259 // 260 261 // Do a light-weight C-call here, r_new_arg_entry holds the address 262 // of the interpreter entry point (frame manager or native entry) 263 // and save runtime-value of return_pc in return_address 264 // (call by reference argument). 265 return_address = __ call_stub(r_new_arg_entry); 266 } 267 BLOCK_COMMENT("} call"); 268 269 { 270 BLOCK_COMMENT("restore registers {"); 271 // Returned from frame manager or native entry. 272 // Now pop frame, process result, and return to caller. 273 274 // 275 // Stack on exit from frame manager / native entry: 276 // 277 // F0 [ABI] 278 // ... 279 // [ENTRY_FRAME_LOCALS] 280 // F1 [C_FRAME] 281 // ... 282 // 283 // Just pop the topmost frame ... 284 // 285 286 Label ret_is_object; 287 Label ret_is_long; 288 Label ret_is_float; 289 Label ret_is_double; 290 291 // Restore frame pointer. 292 __ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP); 293 // Pop frame. Done here to minimize stalls. 294 __ pop_frame(); 295 296 // Reload some volatile registers which we've spilled before the call 297 // to frame manager / native entry. 298 // Access all locals via frame pointer, because we know nothing about 299 // the topmost frame's size. 300 __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp); 301 __ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp); 302 303 // Restore non-volatiles. 304 __ z_lmg(Z_R6, Z_R14, 16, Z_SP); 305 __ z_ld(Z_F8, 96, Z_SP); 306 __ z_ld(Z_F9, 104, Z_SP); 307 __ z_ld(Z_F10, 112, Z_SP); 308 __ z_ld(Z_F11, 120, Z_SP); 309 __ z_ld(Z_F12, 128, Z_SP); 310 __ z_ld(Z_F13, 136, Z_SP); 311 __ z_ld(Z_F14, 144, Z_SP); 312 __ z_ld(Z_F15, 152, Z_SP); 313 BLOCK_COMMENT("} restore"); 314 315 // 316 // Stack on exit from call_stub: 317 // 318 // 0 [C_FRAME] 319 // ... 320 // 321 // No call_stub frames left. 322 // 323 324 // All non-volatiles have been restored at this point!! 325 326 //------------------------------------------------------------------------ 327 // The following code makes some assumptions on the T_<type> enum values. 328 // The enum is defined in globalDefinitions.hpp. 329 // The validity of the assumptions is tested as far as possible. 330 // The assigned values should not be shuffled 331 // T_BOOLEAN==4 - lowest used enum value 332 // T_NARROWOOP==16 - largest used enum value 333 //------------------------------------------------------------------------ 334 BLOCK_COMMENT("process result {"); 335 Label firstHandler; 336 int handlerLen= 8; 337 #ifdef ASSERT 338 char assertMsg[] = "check BasicType definition in globalDefinitions.hpp"; 339 __ z_chi(r_arg_result_type, T_BOOLEAN); 340 __ asm_assert_low(assertMsg, 0x0234); 341 __ z_chi(r_arg_result_type, T_NARROWOOP); 342 __ asm_assert_high(assertMsg, 0x0235); 343 #endif 344 __ add2reg(r_arg_result_type, -T_BOOLEAN); // Remove offset. 345 __ z_larl(Z_R1, firstHandler); // location of first handler 346 __ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long. 347 __ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1); 348 349 __ align(handlerLen); 350 __ bind(firstHandler); 351 // T_BOOLEAN: 352 guarantee(T_BOOLEAN == 4, "check BasicType definition in globalDefinitions.hpp"); 353 __ z_st(Z_RET, 0, r_arg_result_addr); 354 __ z_br(Z_R14); // Return to caller. 355 __ align(handlerLen); 356 // T_CHAR: 357 guarantee(T_CHAR == T_BOOLEAN+1, "check BasicType definition in globalDefinitions.hpp"); 358 __ z_st(Z_RET, 0, r_arg_result_addr); 359 __ z_br(Z_R14); // Return to caller. 360 __ align(handlerLen); 361 // T_FLOAT: 362 guarantee(T_FLOAT == T_CHAR+1, "check BasicType definition in globalDefinitions.hpp"); 363 __ z_ste(Z_FRET, 0, r_arg_result_addr); 364 __ z_br(Z_R14); // Return to caller. 365 __ align(handlerLen); 366 // T_DOUBLE: 367 guarantee(T_DOUBLE == T_FLOAT+1, "check BasicType definition in globalDefinitions.hpp"); 368 __ z_std(Z_FRET, 0, r_arg_result_addr); 369 __ z_br(Z_R14); // Return to caller. 370 __ align(handlerLen); 371 // T_BYTE: 372 guarantee(T_BYTE == T_DOUBLE+1, "check BasicType definition in globalDefinitions.hpp"); 373 __ z_st(Z_RET, 0, r_arg_result_addr); 374 __ z_br(Z_R14); // Return to caller. 375 __ align(handlerLen); 376 // T_SHORT: 377 guarantee(T_SHORT == T_BYTE+1, "check BasicType definition in globalDefinitions.hpp"); 378 __ z_st(Z_RET, 0, r_arg_result_addr); 379 __ z_br(Z_R14); // Return to caller. 380 __ align(handlerLen); 381 // T_INT: 382 guarantee(T_INT == T_SHORT+1, "check BasicType definition in globalDefinitions.hpp"); 383 __ z_st(Z_RET, 0, r_arg_result_addr); 384 __ z_br(Z_R14); // Return to caller. 385 __ align(handlerLen); 386 // T_LONG: 387 guarantee(T_LONG == T_INT+1, "check BasicType definition in globalDefinitions.hpp"); 388 __ z_stg(Z_RET, 0, r_arg_result_addr); 389 __ z_br(Z_R14); // Return to caller. 390 __ align(handlerLen); 391 // T_OBJECT: 392 guarantee(T_OBJECT == T_LONG+1, "check BasicType definition in globalDefinitions.hpp"); 393 __ z_stg(Z_RET, 0, r_arg_result_addr); 394 __ z_br(Z_R14); // Return to caller. 395 __ align(handlerLen); 396 // T_ARRAY: 397 guarantee(T_ARRAY == T_OBJECT+1, "check BasicType definition in globalDefinitions.hpp"); 398 __ z_stg(Z_RET, 0, r_arg_result_addr); 399 __ z_br(Z_R14); // Return to caller. 400 __ align(handlerLen); 401 // T_VOID: 402 guarantee(T_VOID == T_ARRAY+1, "check BasicType definition in globalDefinitions.hpp"); 403 __ z_stg(Z_RET, 0, r_arg_result_addr); 404 __ z_br(Z_R14); // Return to caller. 405 __ align(handlerLen); 406 // T_ADDRESS: 407 guarantee(T_ADDRESS == T_VOID+1, "check BasicType definition in globalDefinitions.hpp"); 408 __ z_stg(Z_RET, 0, r_arg_result_addr); 409 __ z_br(Z_R14); // Return to caller. 410 __ align(handlerLen); 411 // T_NARROWOOP: 412 guarantee(T_NARROWOOP == T_ADDRESS+1, "check BasicType definition in globalDefinitions.hpp"); 413 __ z_st(Z_RET, 0, r_arg_result_addr); 414 __ z_br(Z_R14); // Return to caller. 415 __ align(handlerLen); 416 BLOCK_COMMENT("} process result"); 417 } 418 return start; 419 } 420 421 // Return point for a Java call if there's an exception thrown in 422 // Java code. The exception is caught and transformed into a 423 // pending exception stored in JavaThread that can be tested from 424 // within the VM. 425 address generate_catch_exception() { 426 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 427 428 address start = __ pc(); 429 430 // 431 // Registers alive 432 // 433 // Z_thread 434 // Z_ARG1 - address of pending exception 435 // Z_ARG2 - return address in call stub 436 // 437 438 const Register exception_file = Z_R0; 439 const Register exception_line = Z_R1; 440 441 __ load_const_optimized(exception_file, (void*)__FILE__); 442 __ load_const_optimized(exception_line, (void*)__LINE__); 443 444 __ z_stg(Z_ARG1, thread_(pending_exception)); 445 // Store into `char *'. 446 __ z_stg(exception_file, thread_(exception_file)); 447 // Store into `int'. 448 __ z_st(exception_line, thread_(exception_line)); 449 450 // Complete return to VM. 451 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); 452 453 // Continue in call stub. 454 __ z_br(Z_ARG2); 455 456 return start; 457 } 458 459 // Continuation point for runtime calls returning with a pending 460 // exception. The pending exception check happened in the runtime 461 // or native call stub. The pending exception in Thread is 462 // converted into a Java-level exception. 463 // 464 // Read: 465 // Z_R14: pc the runtime library callee wants to return to. 466 // Since the exception occurred in the callee, the return pc 467 // from the point of view of Java is the exception pc. 468 // 469 // Invalidate: 470 // Volatile registers (except below). 471 // 472 // Update: 473 // Z_ARG1: exception 474 // (Z_R14 is unchanged and is live out). 475 // 476 address generate_forward_exception() { 477 StubCodeMark mark(this, "StubRoutines", "forward_exception"); 478 address start = __ pc(); 479 480 #define pending_exception_offset in_bytes(Thread::pending_exception_offset()) 481 #ifdef ASSERT 482 // Get pending exception oop. 483 __ z_lg(Z_ARG1, pending_exception_offset, Z_thread); 484 485 // Make sure that this code is only executed if there is a pending exception. 486 { 487 Label L; 488 __ z_ltgr(Z_ARG1, Z_ARG1); 489 __ z_brne(L); 490 __ stop("StubRoutines::forward exception: no pending exception (1)"); 491 __ bind(L); 492 } 493 494 __ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop"); 495 #endif 496 497 __ z_lgr(Z_ARG2, Z_R14); // Copy exception pc into Z_ARG2. 498 __ save_return_pc(); 499 __ push_frame_abi160(0); 500 // Find exception handler. 501 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 502 Z_thread, 503 Z_ARG2); 504 // Copy handler's address. 505 __ z_lgr(Z_R1, Z_RET); 506 __ pop_frame(); 507 __ restore_return_pc(); 508 509 // Set up the arguments for the exception handler: 510 // - Z_ARG1: exception oop 511 // - Z_ARG2: exception pc 512 513 // Load pending exception oop. 514 __ z_lg(Z_ARG1, pending_exception_offset, Z_thread); 515 516 // The exception pc is the return address in the caller, 517 // must load it into Z_ARG2 518 __ z_lgr(Z_ARG2, Z_R14); 519 520 #ifdef ASSERT 521 // Make sure exception is set. 522 { Label L; 523 __ z_ltgr(Z_ARG1, Z_ARG1); 524 __ z_brne(L); 525 __ stop("StubRoutines::forward exception: no pending exception (2)"); 526 __ bind(L); 527 } 528 #endif 529 // Clear the pending exception. 530 __ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *)); 531 // Jump to exception handler 532 __ z_br(Z_R1 /*handler address*/); 533 534 return start; 535 536 #undef pending_exception_offset 537 } 538 539 // Continuation point for throwing of implicit exceptions that are 540 // not handled in the current activation. Fabricates an exception 541 // oop and initiates normal exception dispatching in this 542 // frame. Only callee-saved registers are preserved (through the 543 // normal RegisterMap handling). If the compiler 544 // needs all registers to be preserved between the fault point and 545 // the exception handler then it must assume responsibility for that 546 // in AbstractCompiler::continuation_for_implicit_null_exception or 547 // continuation_for_implicit_division_by_zero_exception. All other 548 // implicit exceptions (e.g., NullPointerException or 549 // AbstractMethodError on entry) are either at call sites or 550 // otherwise assume that stack unwinding will be initiated, so 551 // caller saved registers were assumed volatile in the compiler. 552 553 // Note that we generate only this stub into a RuntimeStub, because 554 // it needs to be properly traversed and ignored during GC, so we 555 // change the meaning of the "__" macro within this method. 556 557 // Note: the routine set_pc_not_at_call_for_caller in 558 // SharedRuntime.cpp requires that this code be generated into a 559 // RuntimeStub. 560 #undef __ 561 #define __ masm-> 562 563 address generate_throw_exception(const char* name, address runtime_entry, 564 bool restore_saved_exception_pc, 565 Register arg1 = noreg, Register arg2 = noreg) { 566 assert_different_registers(arg1, Z_R0_scratch); // would be destroyed by push_frame() 567 assert_different_registers(arg2, Z_R0_scratch); // would be destroyed by push_frame() 568 569 int insts_size = 256; 570 int locs_size = 0; 571 CodeBuffer code(name, insts_size, locs_size); 572 MacroAssembler* masm = new MacroAssembler(&code); 573 int framesize_in_bytes; 574 address start = __ pc(); 575 576 __ save_return_pc(); 577 framesize_in_bytes = __ push_frame_abi160(0); 578 579 address frame_complete_pc = __ pc(); 580 if (restore_saved_exception_pc) { 581 __ unimplemented("StubGenerator::throw_exception", 74); 582 } 583 584 // Note that we always have a runtime stub frame on the top of stack at this point. 585 __ get_PC(Z_R1); 586 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1); 587 588 // Do the call. 589 BLOCK_COMMENT("call runtime_entry"); 590 __ call_VM_leaf(runtime_entry, Z_thread, arg1, arg2); 591 592 __ reset_last_Java_frame(); 593 594 #ifdef ASSERT 595 // Make sure that this code is only executed if there is a pending exception. 596 { Label L; 597 __ z_lg(Z_R0, 598 in_bytes(Thread::pending_exception_offset()), 599 Z_thread); 600 __ z_ltgr(Z_R0, Z_R0); 601 __ z_brne(L); 602 __ stop("StubRoutines::throw_exception: no pending exception"); 603 __ bind(L); 604 } 605 #endif 606 607 __ pop_frame(); 608 __ restore_return_pc(); 609 610 __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry()); 611 __ z_br(Z_R1); 612 613 RuntimeStub* stub = 614 RuntimeStub::new_runtime_stub(name, &code, 615 frame_complete_pc - start, 616 framesize_in_bytes/wordSize, 617 NULL /*oop_maps*/, false); 618 619 return stub->entry_point(); 620 } 621 622 #undef __ 623 #ifdef PRODUCT 624 #define __ _masm-> 625 #else 626 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> 627 #endif 628 629 // Support for uint StubRoutine::zarch::partial_subtype_check(Klass 630 // sub, Klass super); 631 // 632 // Arguments: 633 // ret : Z_RET, returned 634 // sub : Z_ARG2, argument, not changed 635 // super: Z_ARG3, argument, not changed 636 // 637 // raddr: Z_R14, blown by call 638 // 639 address generate_partial_subtype_check() { 640 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 641 Label miss; 642 643 address start = __ pc(); 644 645 const Register Rsubklass = Z_ARG2; // subklass 646 const Register Rsuperklass = Z_ARG3; // superklass 647 648 // No args, but tmp registers that are killed. 649 const Register Rlength = Z_ARG4; // cache array length 650 const Register Rarray_ptr = Z_ARG5; // Current value from cache array. 651 652 if (UseCompressedOops) { 653 assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub"); 654 } 655 656 // Always take the slow path (see SPARC). 657 __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, 658 Rarray_ptr, Rlength, NULL, &miss); 659 660 // Match falls through here. 661 __ clear_reg(Z_RET); // Zero indicates a match. Set EQ flag in CC. 662 __ z_br(Z_R14); 663 664 __ BIND(miss); 665 __ load_const_optimized(Z_RET, 1); // One indicates a miss. 666 __ z_ltgr(Z_RET, Z_RET); // Set NE flag in CR. 667 __ z_br(Z_R14); 668 669 return start; 670 } 671 672 // Return address of code to be called from code generated by 673 // MacroAssembler::verify_oop. 674 // 675 // Don't generate, rather use C++ code. 676 address generate_verify_oop_subroutine() { 677 // Don't generate a StubCodeMark, because no code is generated! 678 // Generating the mark triggers notifying the oprofile jvmti agent 679 // about the dynamic code generation, but the stub without 680 // code (code_size == 0) confuses opjitconv 681 // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); 682 683 address start = 0; 684 return start; 685 } 686 687 // Generate pre-write barrier for array. 688 // 689 // Input: 690 // addr - register containing starting address 691 // count - register containing element count 692 // 693 // The input registers are overwritten. 694 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { 695 696 BarrierSet* const bs = Universe::heap()->barrier_set(); 697 switch (bs->kind()) { 698 case BarrierSet::G1SATBCTLogging: 699 // With G1, don't generate the call if we statically know that the target is uninitialized. 700 if (!dest_uninitialized) { 701 // Is marking active? 702 Label filtered; 703 assert_different_registers(addr, Z_R0_scratch); // would be destroyed by push_frame() 704 assert_different_registers(count, Z_R0_scratch); // would be destroyed by push_frame() 705 Register Rtmp1 = Z_R0_scratch; 706 const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() + 707 SATBMarkQueue::byte_offset_of_active()); 708 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 709 __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset)); 710 } else { 711 guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 712 __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset)); 713 } 714 __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently. 715 716 // __ push_frame_abi160(0); // implicitly done in save_live_registers() 717 (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers); 718 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), addr, count); 719 (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers); 720 // __ pop_frame(); // implicitly done in restore_live_registers() 721 722 __ bind(filtered); 723 } 724 break; 725 case BarrierSet::CardTableForRS: 726 case BarrierSet::CardTableExtension: 727 case BarrierSet::ModRef: 728 break; 729 default: 730 ShouldNotReachHere(); 731 } 732 } 733 734 // Generate post-write barrier for array. 735 // 736 // Input: 737 // addr - register containing starting address 738 // count - register containing element count 739 // 740 // The input registers are overwritten. 741 void gen_write_ref_array_post_barrier(Register addr, Register count, bool branchToEnd) { 742 BarrierSet* const bs = Universe::heap()->barrier_set(); 743 switch (bs->kind()) { 744 case BarrierSet::G1SATBCTLogging: 745 { 746 if (branchToEnd) { 747 assert_different_registers(addr, Z_R0_scratch); // would be destroyed by push_frame() 748 assert_different_registers(count, Z_R0_scratch); // would be destroyed by push_frame() 749 // __ push_frame_abi160(0); // implicitly done in save_live_registers() 750 (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers); 751 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count); 752 (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers); 753 // __ pop_frame(); // implicitly done in restore_live_registers() 754 } else { 755 // Tail call: call c and return to stub caller. 756 address entry_point = CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post); 757 __ lgr_if_needed(Z_ARG1, addr); 758 __ lgr_if_needed(Z_ARG2, count); 759 __ load_const(Z_R1, entry_point); 760 __ z_br(Z_R1); // Branch without linking, callee will return to stub caller. 761 } 762 } 763 break; 764 case BarrierSet::CardTableForRS: 765 case BarrierSet::CardTableExtension: 766 // These cases formerly known as 767 // void array_store_check(Register addr, Register count, bool branchToEnd). 768 { 769 NearLabel doXC, done; 770 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 771 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 772 assert_different_registers(Z_R0, Z_R1, addr, count); 773 774 // Nothing to do if count <= 0. 775 if (branchToEnd) { 776 __ compare64_and_branch(count, (intptr_t) 0, Assembler::bcondNotHigh, done); 777 } else { 778 __ z_ltgr(count, count); 779 __ z_bcr(Assembler::bcondNotPositive, Z_R14); 780 } 781 782 // Note: We can't combine the shifts. We could lose a carry 783 // from calculating the array end address. 784 // count = (count-1)*BytesPerHeapOop + addr 785 // Count holds addr of last oop in array then. 786 __ z_sllg(count, count, LogBytesPerHeapOop); 787 __ add2reg_with_index(count, -BytesPerHeapOop, count, addr); 788 789 // Get base address of card table. 790 __ load_const_optimized(Z_R1, (address)ct->byte_map_base); 791 792 // count = (count>>shift) - (addr>>shift) 793 __ z_srlg(addr, addr, CardTableModRefBS::card_shift); 794 __ z_srlg(count, count, CardTableModRefBS::card_shift); 795 796 // Prefetch first elements of card table for update. 797 if (VM_Version::has_Prefetch()) { 798 __ z_pfd(0x02, 0, addr, Z_R1); 799 } 800 801 // Special case: clear just one byte. 802 __ clear_reg(Z_R0, true, false); // Used for doOneByte. 803 __ z_sgr(count, addr); // Count = n-1 now, CC used for brc below. 804 __ z_stc(Z_R0, 0, addr, Z_R1); // Must preserve CC from z_sgr. 805 if (branchToEnd) { 806 __ z_brz(done); 807 } else { 808 __ z_bcr(Assembler::bcondZero, Z_R14); 809 } 810 811 __ z_cghi(count, 255); 812 __ z_brnh(doXC); 813 814 // MVCLE: clear a long area. 815 // Start addr of card table range = base + addr. 816 // # bytes in card table range = (count + 1) 817 __ add2reg_with_index(Z_R0, 0, Z_R1, addr); 818 __ add2reg(Z_R1, 1, count); 819 820 // dirty hack: 821 // There are just two callers. Both pass 822 // count in Z_ARG3 = Z_R4 823 // addr in Z_ARG2 = Z_R3 824 // ==> use Z_ARG2 as src len reg = 0 825 // Z_ARG1 as src addr (ignored) 826 assert(count == Z_ARG3, "count: unexpected register number"); 827 assert(addr == Z_ARG2, "addr: unexpected register number"); 828 __ clear_reg(Z_ARG2, true, false); 829 830 __ MacroAssembler::move_long_ext(Z_R0, Z_ARG1, 0); 831 832 if (branchToEnd) { 833 __ z_bru(done); 834 } else { 835 __ z_bcr(Assembler::bcondAlways, Z_R14); 836 } 837 838 // XC: clear a short area. 839 Label XC_template; // Instr template, never exec directly! 840 __ bind(XC_template); 841 __ z_xc(0, 0, addr, 0, addr); 842 843 __ bind(doXC); 844 // start addr of card table range = base + addr 845 // end addr of card table range = base + addr + count 846 __ add2reg_with_index(addr, 0, Z_R1, addr); 847 848 if (VM_Version::has_ExecuteExtensions()) { 849 __ z_exrl(count, XC_template); // Execute XC with var. len. 850 } else { 851 __ z_larl(Z_R1, XC_template); 852 __ z_ex(count, 0, Z_R0, Z_R1); // Execute XC with var. len. 853 } 854 if (!branchToEnd) { 855 __ z_br(Z_R14); 856 } 857 858 __ bind(done); 859 } 860 break; 861 case BarrierSet::ModRef: 862 if (!branchToEnd) { __ z_br(Z_R14); } 863 break; 864 default: 865 ShouldNotReachHere(); 866 } 867 } 868 869 870 // This is to test that the count register contains a positive int value. 871 // Required because C2 does not respect int to long conversion for stub calls. 872 void assert_positive_int(Register count) { 873 #ifdef ASSERT 874 __ z_srag(Z_R0, count, 31); // Just leave the sign (must be zero) in Z_R0. 875 __ asm_assert_eq("missing zero extend", 0xAFFE); 876 #endif 877 } 878 879 // Generate overlap test for array copy stubs. 880 // If no actual overlap is detected, control is transferred to the 881 // "normal" copy stub (entry address passed in disjoint_copy_target). 882 // Otherwise, execution continues with the code generated by the 883 // caller of array_overlap_test. 884 // 885 // Input: 886 // Z_ARG1 - from 887 // Z_ARG2 - to 888 // Z_ARG3 - element count 889 void array_overlap_test(address disjoint_copy_target, int log2_elem_size) { 890 __ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh, 891 disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); 892 893 Register index = Z_ARG3; 894 if (log2_elem_size > 0) { 895 __ z_sllg(Z_R1, Z_ARG3, log2_elem_size); // byte count 896 index = Z_R1; 897 } 898 __ add2reg_with_index(Z_R1, 0, index, Z_ARG1); // First byte after "from" range. 899 900 __ MacroAssembler::compare_and_branch_optimized(Z_R1, Z_ARG2, Assembler::bcondNotHigh, 901 disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); 902 903 // Destructive overlap: let caller generate code for that. 904 } 905 906 // Generate stub for disjoint array copy. If "aligned" is true, the 907 // "from" and "to" addresses are assumed to be heapword aligned. 908 // 909 // Arguments for generated stub: 910 // from: Z_ARG1 911 // to: Z_ARG2 912 // count: Z_ARG3 treated as signed 913 void generate_disjoint_copy(bool aligned, int element_size, 914 bool branchToEnd, 915 bool restoreArgs) { 916 // This is the zarch specific stub generator for general array copy tasks. 917 // It has the following prereqs and features: 918 // 919 // - No destructive overlap allowed (else unpredictable results). 920 // - Destructive overlap does not exist if the leftmost byte of the target 921 // does not coincide with any of the source bytes (except the leftmost). 922 // 923 // Register usage upon entry: 924 // Z_ARG1 == Z_R2 : address of source array 925 // Z_ARG2 == Z_R3 : address of target array 926 // Z_ARG3 == Z_R4 : length of operands (# of elements on entry) 927 // 928 // Register usage within the generator: 929 // - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len). 930 // Used as pair register operand in complex moves, scratch registers anyway. 931 // - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg). 932 // Same as R0/R1, but no scratch register. 933 // - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine, 934 // but they might get temporarily overwritten. 935 936 Register save_reg = Z_ARG4; // (= Z_R5), holds original target operand address for restore. 937 938 { 939 Register llen_reg = Z_R1; // Holds left operand len (odd reg). 940 Register laddr_reg = Z_R0; // Holds left operand addr (even reg), overlaps with data_reg. 941 Register rlen_reg = Z_R5; // Holds right operand len (odd reg), overlaps with save_reg. 942 Register raddr_reg = Z_R4; // Holds right operand addr (even reg), overlaps with len_reg. 943 944 Register data_reg = Z_R0; // Holds copied data chunk in alignment process and copy loop. 945 Register len_reg = Z_ARG3; // Holds operand len (#elements at entry, #bytes shortly after). 946 Register dst_reg = Z_ARG2; // Holds left (target) operand addr. 947 Register src_reg = Z_ARG1; // Holds right (source) operand addr. 948 949 Label doMVCLOOP, doMVCLOOPcount, doMVCLOOPiterate; 950 Label doMVCUnrolled; 951 NearLabel doMVC, doMVCgeneral, done; 952 Label MVC_template; 953 address pcMVCblock_b, pcMVCblock_e; 954 955 bool usedMVCLE = true; 956 bool usedMVCLOOP = true; 957 bool usedMVCUnrolled = false; 958 bool usedMVC = false; 959 bool usedMVCgeneral = false; 960 961 int stride; 962 Register stride_reg; 963 Register ix_reg; 964 965 assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2"); 966 unsigned int log2_size = exact_log2(element_size); 967 968 switch (element_size) { 969 case 1: BLOCK_COMMENT("ARRAYCOPY DISJOINT byte {"); break; 970 case 2: BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break; 971 case 4: BLOCK_COMMENT("ARRAYCOPY DISJOINT int {"); break; 972 case 8: BLOCK_COMMENT("ARRAYCOPY DISJOINT long {"); break; 973 default: BLOCK_COMMENT("ARRAYCOPY DISJOINT {"); break; 974 } 975 976 assert_positive_int(len_reg); 977 978 BLOCK_COMMENT("preparation {"); 979 980 // No copying if len <= 0. 981 if (branchToEnd) { 982 __ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done); 983 } else { 984 if (VM_Version::has_CompareBranch()) { 985 __ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14); 986 } else { 987 __ z_ltgr(len_reg, len_reg); 988 __ z_bcr(Assembler::bcondNotPositive, Z_R14); 989 } 990 } 991 992 // Prefetch just one cache line. Speculative opt for short arrays. 993 // Do not use Z_R1 in prefetch. Is undefined here. 994 if (VM_Version::has_Prefetch()) { 995 __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. 996 __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. 997 } 998 999 BLOCK_COMMENT("} preparation"); 1000 1001 // Save args only if really needed. 1002 // Keep len test local to branch. Is generated only once. 1003 1004 BLOCK_COMMENT("mode selection {"); 1005 1006 // Special handling for arrays with only a few elements. 1007 // Nothing fancy: just an executed MVC. 1008 if (log2_size > 0) { 1009 __ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1. 1010 } 1011 if (element_size != 8) { 1012 __ z_cghi(len_reg, 256/element_size); 1013 __ z_brnh(doMVC); 1014 usedMVC = true; 1015 } 1016 if (element_size == 8) { // Long and oop arrays are always aligned. 1017 __ z_cghi(len_reg, 256/element_size); 1018 __ z_brnh(doMVCUnrolled); 1019 usedMVCUnrolled = true; 1020 } 1021 1022 // Prefetch another cache line. We, for sure, have more than one line to copy. 1023 if (VM_Version::has_Prefetch()) { 1024 __ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access. 1025 __ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access. 1026 } 1027 1028 if (restoreArgs) { 1029 // Remember entry value of ARG2 to restore all arguments later from that knowledge. 1030 __ z_lgr(save_reg, dst_reg); 1031 } 1032 1033 __ z_cghi(len_reg, 4096/element_size); 1034 if (log2_size == 0) { 1035 __ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes 1036 } 1037 __ z_brnh(doMVCLOOP); 1038 1039 // Fall through to MVCLE case. 1040 1041 BLOCK_COMMENT("} mode selection"); 1042 1043 // MVCLE: for long arrays 1044 // DW aligned: Best performance for sizes > 4kBytes. 1045 // unaligned: Least complex for sizes > 256 bytes. 1046 if (usedMVCLE) { 1047 BLOCK_COMMENT("mode MVCLE {"); 1048 1049 // Setup registers for mvcle. 1050 //__ z_lgr(llen_reg, len_reg);// r1 <- r4 #bytes already in Z_R1, aka llen_reg. 1051 __ z_lgr(laddr_reg, dst_reg); // r0 <- r3 1052 __ z_lgr(raddr_reg, src_reg); // r4 <- r2 1053 __ z_lgr(rlen_reg, llen_reg); // r5 <- r1 1054 1055 __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0); // special: bypass cache 1056 // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache. 1057 // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0); 1058 1059 if (restoreArgs) { 1060 // MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs. 1061 // Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required. 1062 // Len_reg (Z_ARG3) is destroyed and must be restored. 1063 __ z_slgr(laddr_reg, dst_reg); // copied #bytes 1064 if (log2_size > 0) { 1065 __ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements. 1066 } else { 1067 __ z_lgr(Z_ARG3, laddr_reg); 1068 } 1069 } 1070 if (branchToEnd) { 1071 __ z_bru(done); 1072 } else { 1073 __ z_br(Z_R14); 1074 } 1075 BLOCK_COMMENT("} mode MVCLE"); 1076 } 1077 // No fallthru possible here. 1078 1079 // MVCUnrolled: for short, aligned arrays. 1080 1081 if (usedMVCUnrolled) { 1082 BLOCK_COMMENT("mode MVC unrolled {"); 1083 stride = 8; 1084 1085 // Generate unrolled MVC instructions. 1086 for (int ii = 32; ii > 1; ii--) { 1087 __ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy 1088 if (branchToEnd) { 1089 __ z_bru(done); 1090 } else { 1091 __ z_br(Z_R14); 1092 } 1093 } 1094 1095 pcMVCblock_b = __ pc(); 1096 __ z_mvc(0, 1 * stride-1, dst_reg, 0, src_reg); // 8 byte copy 1097 if (branchToEnd) { 1098 __ z_bru(done); 1099 } else { 1100 __ z_br(Z_R14); 1101 } 1102 1103 pcMVCblock_e = __ pc(); 1104 Label MVC_ListEnd; 1105 __ bind(MVC_ListEnd); 1106 1107 // This is an absolute fast path: 1108 // - Array len in bytes must be not greater than 256. 1109 // - Array len in bytes must be an integer mult of DW 1110 // to save expensive handling of trailing bytes. 1111 // - Argument restore is not done, 1112 // i.e. previous code must not alter arguments (this code doesn't either). 1113 1114 __ bind(doMVCUnrolled); 1115 1116 // Avoid mul, prefer shift where possible. 1117 // Combine shift right (for #DW) with shift left (for block size). 1118 // Set CC for zero test below (asm_assert). 1119 // Note: #bytes comes in Z_R1, #DW in len_reg. 1120 unsigned int MVCblocksize = pcMVCblock_e - pcMVCblock_b; 1121 unsigned int logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning). 1122 1123 if (log2_size > 0) { // Len was scaled into Z_R1. 1124 switch (MVCblocksize) { 1125 1126 case 8: logMVCblocksize = 3; 1127 __ z_ltgr(Z_R0, Z_R1); // #bytes is index 1128 break; // reasonable size, use shift 1129 1130 case 16: logMVCblocksize = 4; 1131 __ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size); 1132 break; // reasonable size, use shift 1133 1134 default: logMVCblocksize = 0; 1135 __ z_ltgr(Z_R0, len_reg); // #DW for mul 1136 break; // all other sizes: use mul 1137 } 1138 } else { 1139 guarantee(log2_size, "doMVCUnrolled: only for DW entities"); 1140 } 1141 1142 // This test (and branch) is redundant. Previous code makes sure that 1143 // - element count > 0 1144 // - element size == 8. 1145 // Thus, len reg should never be zero here. We insert an asm_assert() here, 1146 // just to double-check and to be on the safe side. 1147 __ asm_assert(false, "zero len cannot occur", 99); 1148 1149 __ z_larl(Z_R1, MVC_ListEnd); // Get addr of last instr block. 1150 // Avoid mul, prefer shift where possible. 1151 if (logMVCblocksize == 0) { 1152 __ z_mghi(Z_R0, MVCblocksize); 1153 } 1154 __ z_slgr(Z_R1, Z_R0); 1155 __ z_br(Z_R1); 1156 BLOCK_COMMENT("} mode MVC unrolled"); 1157 } 1158 // No fallthru possible here. 1159 1160 // MVC execute template 1161 // Must always generate. Usage may be switched on below. 1162 // There is no suitable place after here to put the template. 1163 __ bind(MVC_template); 1164 __ z_mvc(0,0,dst_reg,0,src_reg); // Instr template, never exec directly! 1165 1166 1167 // MVC Loop: for medium-sized arrays 1168 1169 // Only for DW aligned arrays (src and dst). 1170 // #bytes to copy must be at least 256!!! 1171 // Non-aligned cases handled separately. 1172 stride = 256; 1173 stride_reg = Z_R1; // Holds #bytes when control arrives here. 1174 ix_reg = Z_ARG3; // Alias for len_reg. 1175 1176 1177 if (usedMVCLOOP) { 1178 BLOCK_COMMENT("mode MVC loop {"); 1179 __ bind(doMVCLOOP); 1180 1181 __ z_lcgr(ix_reg, Z_R1); // Ix runs from -(n-2)*stride to 1*stride (inclusive). 1182 __ z_llill(stride_reg, stride); 1183 __ add2reg(ix_reg, 2*stride); // Thus: increment ix by 2*stride. 1184 1185 __ bind(doMVCLOOPiterate); 1186 __ z_mvc(0, stride-1, dst_reg, 0, src_reg); 1187 __ add2reg(dst_reg, stride); 1188 __ add2reg(src_reg, stride); 1189 __ bind(doMVCLOOPcount); 1190 __ z_brxlg(ix_reg, stride_reg, doMVCLOOPiterate); 1191 1192 // Don 't use add2reg() here, since we must set the condition code! 1193 __ z_aghi(ix_reg, -2*stride); // Compensate incr from above: zero diff means "all copied". 1194 1195 if (restoreArgs) { 1196 __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. 1197 __ z_brnz(doMVCgeneral); // We're not done yet, ix_reg is not zero. 1198 1199 // ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg. 1200 __ z_slgr(dst_reg, save_reg); // copied #bytes 1201 __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored) 1202 if (log2_size) { 1203 __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3. 1204 } else { 1205 __ z_lgr(Z_ARG3, dst_reg); 1206 } 1207 __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. 1208 1209 if (branchToEnd) { 1210 __ z_bru(done); 1211 } else { 1212 __ z_br(Z_R14); 1213 } 1214 1215 } else { 1216 if (branchToEnd) { 1217 __ z_brz(done); // CC set by aghi instr. 1218 } else { 1219 __ z_bcr(Assembler::bcondZero, Z_R14); // We're all done if zero. 1220 } 1221 1222 __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. 1223 // __ z_bru(doMVCgeneral); // fallthru 1224 } 1225 usedMVCgeneral = true; 1226 BLOCK_COMMENT("} mode MVC loop"); 1227 } 1228 // Fallthru to doMVCgeneral 1229 1230 // MVCgeneral: for short, unaligned arrays, after other copy operations 1231 1232 // Somewhat expensive due to use of EX instruction, but simple. 1233 if (usedMVCgeneral) { 1234 BLOCK_COMMENT("mode MVC general {"); 1235 __ bind(doMVCgeneral); 1236 1237 __ add2reg(len_reg, -1, Z_R1); // Get #bytes-1 for EXECUTE. 1238 if (VM_Version::has_ExecuteExtensions()) { 1239 __ z_exrl(len_reg, MVC_template); // Execute MVC with variable length. 1240 } else { 1241 __ z_larl(Z_R1, MVC_template); // Get addr of instr template. 1242 __ z_ex(len_reg, 0, Z_R0, Z_R1); // Execute MVC with variable length. 1243 } // penalty: 9 ticks 1244 1245 if (restoreArgs) { 1246 // ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg 1247 __ z_slgr(dst_reg, save_reg); // Copied #bytes without the "doMVCgeneral" chunk 1248 __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk 1249 __ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet. 1250 if (log2_size) { 1251 __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3 1252 } else { 1253 __ z_lgr(Z_ARG3, dst_reg); 1254 } 1255 __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. 1256 } 1257 1258 if (usedMVC) { 1259 if (branchToEnd) { 1260 __ z_bru(done); 1261 } else { 1262 __ z_br(Z_R14); 1263 } 1264 } else { 1265 if (!branchToEnd) __ z_br(Z_R14); 1266 } 1267 BLOCK_COMMENT("} mode MVC general"); 1268 } 1269 // Fallthru possible if following block not generated. 1270 1271 // MVC: for short, unaligned arrays 1272 1273 // Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks. 1274 // Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4. 1275 if (usedMVC) { 1276 BLOCK_COMMENT("mode MVC {"); 1277 __ bind(doMVC); 1278 1279 // get #bytes-1 for EXECUTE 1280 if (log2_size) { 1281 __ add2reg(Z_R1, -1); // Length was scaled into Z_R1. 1282 } else { 1283 __ add2reg(Z_R1, -1, len_reg); // Length was not scaled. 1284 } 1285 1286 if (VM_Version::has_ExecuteExtensions()) { 1287 __ z_exrl(Z_R1, MVC_template); // Execute MVC with variable length. 1288 } else { 1289 __ z_lgr(Z_R0, Z_R5); // Save ARG4, may be unnecessary. 1290 __ z_larl(Z_R5, MVC_template); // Get addr of instr template. 1291 __ z_ex(Z_R1, 0, Z_R0, Z_R5); // Execute MVC with variable length. 1292 __ z_lgr(Z_R5, Z_R0); // Restore ARG4, may be unnecessary. 1293 } 1294 1295 if (!branchToEnd) { 1296 __ z_br(Z_R14); 1297 } 1298 BLOCK_COMMENT("} mode MVC"); 1299 } 1300 1301 __ bind(done); 1302 1303 switch (element_size) { 1304 case 1: BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break; 1305 case 2: BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break; 1306 case 4: BLOCK_COMMENT("} ARRAYCOPY DISJOINT int "); break; 1307 case 8: BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break; 1308 default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT "); break; 1309 } 1310 } 1311 } 1312 1313 // Generate stub for conjoint array copy. If "aligned" is true, the 1314 // "from" and "to" addresses are assumed to be heapword aligned. 1315 // 1316 // Arguments for generated stub: 1317 // from: Z_ARG1 1318 // to: Z_ARG2 1319 // count: Z_ARG3 treated as signed 1320 void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) { 1321 1322 // This is the zarch specific stub generator for general array copy tasks. 1323 // It has the following prereqs and features: 1324 // 1325 // - Destructive overlap exists and is handled by reverse copy. 1326 // - Destructive overlap exists if the leftmost byte of the target 1327 // does coincide with any of the source bytes (except the leftmost). 1328 // - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride) 1329 // - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine. 1330 // - Z_ARG3 is USED but preserved by the stub routine. 1331 // - Z_ARG4 is used as index register and is thus KILLed. 1332 // 1333 { 1334 Register stride_reg = Z_R1; // Stride & compare value in loop (negative element_size). 1335 Register data_reg = Z_R0; // Holds value of currently processed element. 1336 Register ix_reg = Z_ARG4; // Holds byte index of currently processed element. 1337 Register len_reg = Z_ARG3; // Holds length (in #elements) of arrays. 1338 Register dst_reg = Z_ARG2; // Holds left operand addr. 1339 Register src_reg = Z_ARG1; // Holds right operand addr. 1340 1341 assert(256%element_size == 0, "Element size must be power of 2."); 1342 assert(element_size <= 8, "Can't handle more than DW units."); 1343 1344 switch (element_size) { 1345 case 1: BLOCK_COMMENT("ARRAYCOPY CONJOINT byte {"); break; 1346 case 2: BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break; 1347 case 4: BLOCK_COMMENT("ARRAYCOPY CONJOINT int {"); break; 1348 case 8: BLOCK_COMMENT("ARRAYCOPY CONJOINT long {"); break; 1349 default: BLOCK_COMMENT("ARRAYCOPY CONJOINT {"); break; 1350 } 1351 1352 assert_positive_int(len_reg); 1353 1354 if (VM_Version::has_Prefetch()) { 1355 __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. 1356 __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. 1357 } 1358 1359 unsigned int log2_size = exact_log2(element_size); 1360 if (log2_size) { 1361 __ z_sllg(ix_reg, len_reg, log2_size); 1362 } else { 1363 __ z_lgr(ix_reg, len_reg); 1364 } 1365 1366 // Optimize reverse copy loop. 1367 // Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks. 1368 // Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic. 1369 // Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length. 1370 1371 Label countLoop1; 1372 Label copyLoop1; 1373 Label skipBY; 1374 Label skipHW; 1375 int stride = -8; 1376 1377 __ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop. 1378 1379 if (element_size == 8) // Nothing to do here. 1380 __ z_bru(countLoop1); 1381 else { // Do not generate dead code. 1382 __ z_tmll(ix_reg, 7); // Check the "odd" bits. 1383 __ z_bre(countLoop1); // There are none, very good! 1384 } 1385 1386 if (log2_size == 0) { // Handle leftover Byte. 1387 __ z_tmll(ix_reg, 1); 1388 __ z_bre(skipBY); 1389 __ z_lb(data_reg, -1, ix_reg, src_reg); 1390 __ z_stcy(data_reg, -1, ix_reg, dst_reg); 1391 __ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI. 1392 __ bind(skipBY); 1393 // fallthru 1394 } 1395 if (log2_size <= 1) { // Handle leftover HW. 1396 __ z_tmll(ix_reg, 2); 1397 __ z_bre(skipHW); 1398 __ z_lhy(data_reg, -2, ix_reg, src_reg); 1399 __ z_sthy(data_reg, -2, ix_reg, dst_reg); 1400 __ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI. 1401 __ bind(skipHW); 1402 __ z_tmll(ix_reg, 4); 1403 __ z_bre(countLoop1); 1404 // fallthru 1405 } 1406 if (log2_size <= 2) { // There are just 4 bytes (left) that need to be copied. 1407 __ z_ly(data_reg, -4, ix_reg, src_reg); 1408 __ z_sty(data_reg, -4, ix_reg, dst_reg); 1409 __ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI. 1410 __ z_bru(countLoop1); 1411 } 1412 1413 // Control can never get to here. Never! Never ever! 1414 __ z_illtrap(0x99); 1415 __ bind(copyLoop1); 1416 __ z_lg(data_reg, 0, ix_reg, src_reg); 1417 __ z_stg(data_reg, 0, ix_reg, dst_reg); 1418 __ bind(countLoop1); 1419 __ z_brxhg(ix_reg, stride_reg, copyLoop1); 1420 1421 if (!branchToEnd) 1422 __ z_br(Z_R14); 1423 1424 switch (element_size) { 1425 case 1: BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break; 1426 case 2: BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break; 1427 case 4: BLOCK_COMMENT("} ARRAYCOPY CONJOINT int "); break; 1428 case 8: BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break; 1429 default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT "); break; 1430 } 1431 } 1432 } 1433 1434 // Generate stub for disjoint byte copy. If "aligned" is true, the 1435 // "from" and "to" addresses are assumed to be heapword aligned. 1436 address generate_disjoint_byte_copy(bool aligned, const char * name) { 1437 StubCodeMark mark(this, "StubRoutines", name); 1438 1439 // This is the zarch specific stub generator for byte array copy. 1440 // Refer to generate_disjoint_copy for a list of prereqs and features: 1441 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1442 generate_disjoint_copy(aligned, 1, false, false); 1443 return __ addr_at(start_off); 1444 } 1445 1446 1447 address generate_disjoint_short_copy(bool aligned, const char * name) { 1448 StubCodeMark mark(this, "StubRoutines", name); 1449 // This is the zarch specific stub generator for short array copy. 1450 // Refer to generate_disjoint_copy for a list of prereqs and features: 1451 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1452 generate_disjoint_copy(aligned, 2, false, false); 1453 return __ addr_at(start_off); 1454 } 1455 1456 1457 address generate_disjoint_int_copy(bool aligned, const char * name) { 1458 StubCodeMark mark(this, "StubRoutines", name); 1459 // This is the zarch specific stub generator for int array copy. 1460 // Refer to generate_disjoint_copy for a list of prereqs and features: 1461 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1462 generate_disjoint_copy(aligned, 4, false, false); 1463 return __ addr_at(start_off); 1464 } 1465 1466 1467 address generate_disjoint_long_copy(bool aligned, const char * name) { 1468 StubCodeMark mark(this, "StubRoutines", name); 1469 // This is the zarch specific stub generator for long array copy. 1470 // Refer to generate_disjoint_copy for a list of prereqs and features: 1471 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1472 generate_disjoint_copy(aligned, 8, false, false); 1473 return __ addr_at(start_off); 1474 } 1475 1476 1477 address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { 1478 StubCodeMark mark(this, "StubRoutines", name); 1479 // This is the zarch specific stub generator for oop array copy. 1480 // Refer to generate_disjoint_copy for a list of prereqs and features. 1481 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1482 unsigned int size = UseCompressedOops ? 4 : 8; 1483 1484 gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized); 1485 1486 generate_disjoint_copy(aligned, size, true, true); 1487 1488 gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false); 1489 1490 return __ addr_at(start_off); 1491 } 1492 1493 1494 address generate_conjoint_byte_copy(bool aligned, const char * name) { 1495 StubCodeMark mark(this, "StubRoutines", name); 1496 // This is the zarch specific stub generator for overlapping byte array copy. 1497 // Refer to generate_conjoint_copy for a list of prereqs and features: 1498 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1499 address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy() 1500 : StubRoutines::jbyte_disjoint_arraycopy(); 1501 1502 array_overlap_test(nooverlap_target, 0); // Branch away to nooverlap_target if disjoint. 1503 generate_conjoint_copy(aligned, 1, false); 1504 1505 return __ addr_at(start_off); 1506 } 1507 1508 1509 address generate_conjoint_short_copy(bool aligned, const char * name) { 1510 StubCodeMark mark(this, "StubRoutines", name); 1511 // This is the zarch specific stub generator for overlapping short array copy. 1512 // Refer to generate_conjoint_copy for a list of prereqs and features: 1513 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1514 address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy() 1515 : StubRoutines::jshort_disjoint_arraycopy(); 1516 1517 array_overlap_test(nooverlap_target, 1); // Branch away to nooverlap_target if disjoint. 1518 generate_conjoint_copy(aligned, 2, false); 1519 1520 return __ addr_at(start_off); 1521 } 1522 1523 address generate_conjoint_int_copy(bool aligned, const char * name) { 1524 StubCodeMark mark(this, "StubRoutines", name); 1525 // This is the zarch specific stub generator for overlapping int array copy. 1526 // Refer to generate_conjoint_copy for a list of prereqs and features: 1527 1528 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1529 address nooverlap_target = aligned ? StubRoutines::arrayof_jint_disjoint_arraycopy() 1530 : StubRoutines::jint_disjoint_arraycopy(); 1531 1532 array_overlap_test(nooverlap_target, 2); // Branch away to nooverlap_target if disjoint. 1533 generate_conjoint_copy(aligned, 4, false); 1534 1535 return __ addr_at(start_off); 1536 } 1537 1538 address generate_conjoint_long_copy(bool aligned, const char * name) { 1539 StubCodeMark mark(this, "StubRoutines", name); 1540 // This is the zarch specific stub generator for overlapping long array copy. 1541 // Refer to generate_conjoint_copy for a list of prereqs and features: 1542 1543 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1544 address nooverlap_target = aligned ? StubRoutines::arrayof_jlong_disjoint_arraycopy() 1545 : StubRoutines::jlong_disjoint_arraycopy(); 1546 1547 array_overlap_test(nooverlap_target, 3); // Branch away to nooverlap_target if disjoint. 1548 generate_conjoint_copy(aligned, 8, false); 1549 1550 return __ addr_at(start_off); 1551 } 1552 1553 address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { 1554 StubCodeMark mark(this, "StubRoutines", name); 1555 // This is the zarch specific stub generator for overlapping oop array copy. 1556 // Refer to generate_conjoint_copy for a list of prereqs and features. 1557 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1558 unsigned int size = UseCompressedOops ? 4 : 8; 1559 unsigned int shift = UseCompressedOops ? 2 : 3; 1560 1561 address nooverlap_target = aligned ? StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized) 1562 : StubRoutines::oop_disjoint_arraycopy(dest_uninitialized); 1563 1564 // Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier. 1565 array_overlap_test(nooverlap_target, shift); // Branch away to nooverlap_target if disjoint. 1566 1567 gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized); 1568 1569 generate_conjoint_copy(aligned, size, true); // Must preserve ARG2, ARG3. 1570 1571 gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false); 1572 1573 return __ addr_at(start_off); 1574 } 1575 1576 1577 void generate_arraycopy_stubs() { 1578 1579 // Note: the disjoint stubs must be generated first, some of 1580 // the conjoint stubs use them. 1581 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (false, "jbyte_disjoint_arraycopy"); 1582 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); 1583 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy (false, "jint_disjoint_arraycopy"); 1584 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy (false, "jlong_disjoint_arraycopy"); 1585 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy", false); 1586 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy_uninit", true); 1587 1588 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (true, "arrayof_jbyte_disjoint_arraycopy"); 1589 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); 1590 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy (true, "arrayof_jint_disjoint_arraycopy"); 1591 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy (true, "arrayof_jlong_disjoint_arraycopy"); 1592 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy", false); 1593 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy_uninit", true); 1594 1595 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy (false, "jbyte_arraycopy"); 1596 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); 1597 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy (false, "jint_arraycopy"); 1598 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy (false, "jlong_arraycopy"); 1599 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy (false, "oop_arraycopy", false); 1600 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy (false, "oop_arraycopy_uninit", true); 1601 1602 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy (true, "arrayof_jbyte_arraycopy"); 1603 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); 1604 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy (true, "arrayof_jint_arraycopy"); 1605 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy (true, "arrayof_jlong_arraycopy"); 1606 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy", false); 1607 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy_uninit", true); 1608 } 1609 1610 void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) { 1611 1612 // safefetch signatures: 1613 // int SafeFetch32(int* adr, int errValue); 1614 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); 1615 // 1616 // arguments: 1617 // Z_ARG1 = adr 1618 // Z_ARG2 = errValue 1619 // 1620 // result: 1621 // Z_RET = *adr or errValue 1622 1623 StubCodeMark mark(this, "StubRoutines", name); 1624 1625 // entry point 1626 // Load *adr into Z_ARG2, may fault. 1627 *entry = *fault_pc = __ pc(); 1628 switch (size) { 1629 case 4: 1630 // Sign extended int32_t. 1631 __ z_lgf(Z_ARG2, 0, Z_ARG1); 1632 break; 1633 case 8: 1634 // int64_t 1635 __ z_lg(Z_ARG2, 0, Z_ARG1); 1636 break; 1637 default: 1638 ShouldNotReachHere(); 1639 } 1640 1641 // Return errValue or *adr. 1642 *continuation_pc = __ pc(); 1643 __ z_lgr(Z_RET, Z_ARG2); 1644 __ z_br(Z_R14); 1645 1646 } 1647 1648 // Call interface for AES_encryptBlock, AES_decryptBlock stubs. 1649 // 1650 // Z_ARG1 - source data block. Ptr to leftmost byte to be processed. 1651 // Z_ARG2 - destination data block. Ptr to leftmost byte to be stored. 1652 // For in-place encryption/decryption, ARG1 and ARG2 can point 1653 // to the same piece of storage. 1654 // Z_ARG3 - Crypto key address (expanded key). The first n bits of 1655 // the expanded key constitute the original AES-<n> key (see below). 1656 // 1657 // Z_RET - return value. First unprocessed byte offset in src buffer. 1658 // 1659 // Some remarks: 1660 // The crypto key, as passed from the caller to these encryption stubs, 1661 // is a so-called expanded key. It is derived from the original key 1662 // by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule 1663 // With the expanded key, the cipher/decipher task is decomposed in 1664 // multiple, less complex steps, called rounds. Sun SPARC and Intel 1665 // processors obviously implement support for those less complex steps. 1666 // z/Architecture provides instructions for full cipher/decipher complexity. 1667 // Therefore, we need the original, not the expanded key here. 1668 // Luckily, the first n bits of an AES-<n> expanded key are formed 1669 // by the original key itself. That takes us out of trouble. :-) 1670 // The key length (in bytes) relation is as follows: 1671 // original expanded rounds key bit keylen 1672 // key bytes key bytes length in words 1673 // 16 176 11 128 44 1674 // 24 208 13 192 52 1675 // 32 240 15 256 60 1676 // 1677 // The crypto instructions used in the AES* stubs have some specific register requirements. 1678 // Z_R0 holds the crypto function code. Please refer to the KM/KMC instruction 1679 // description in the "z/Architecture Principles of Operation" manual for details. 1680 // Z_R1 holds the parameter block address. The parameter block contains the cryptographic key 1681 // (KM instruction) and the chaining value (KMC instruction). 1682 // dst must designate an even-numbered register, holding the address of the output message. 1683 // src must designate an even/odd register pair, holding the address/length of the original message 1684 1685 // Helper function which generates code to 1686 // - load the function code in register fCode (== Z_R0) 1687 // - load the data block length (depends on cipher function) into register srclen if requested. 1688 // - is_decipher switches between cipher/decipher function codes 1689 // - set_len requests (if true) loading the data block length in register srclen 1690 void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) { 1691 1692 BLOCK_COMMENT("Set fCode {"); { 1693 Label fCode_set; 1694 int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; 1695 bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) 1696 && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); 1697 // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256. 1698 __ z_cghi(keylen, 52); 1699 1700 __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); 1701 if (!identical_dataBlk_len) { 1702 __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk); 1703 } 1704 __ z_brh(fCode_set); // keyLen > 52: AES256 1705 1706 __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode); 1707 if (!identical_dataBlk_len) { 1708 __ z_lghi(srclen, VM_Version::Cipher::_AES192_dataBlk); 1709 } 1710 __ z_bre(fCode_set); // keyLen == 52: AES192 1711 1712 __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); 1713 if (!identical_dataBlk_len) { 1714 __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); 1715 } 1716 // __ z_brl(fCode_set); // keyLen < 52: AES128 // fallthru 1717 1718 __ bind(fCode_set); 1719 if (identical_dataBlk_len) { 1720 __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); 1721 } 1722 } 1723 BLOCK_COMMENT("} Set fCode"); 1724 } 1725 1726 // Push a parameter block for the cipher/decipher instruction on the stack. 1727 // NOTE: 1728 // Before returning, the stub has to copy the chaining value from 1729 // the parmBlk, where it was updated by the crypto instruction, back 1730 // to the chaining value array the address of which was passed in the cv argument. 1731 // As all the available registers are used and modified by KMC, we need to save 1732 // the key length across the KMC instruction. We do so by spilling it to the stack, 1733 // just preceding the parmBlk (at (parmBlk - 8)). 1734 void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) { 1735 const int AES_parmBlk_align = 32; 1736 const int AES_parmBlk_addspace = AES_parmBlk_align; // Must be multiple of AES_parmblk_align. 1737 int cv_len, key_len; 1738 int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; 1739 Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; 1740 1741 BLOCK_COMMENT("push parmBlk {"); 1742 if (VM_Version::has_Crypto_AES() ) { __ z_cghi(keylen, 52); } 1743 if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); } // keyLen > 52: AES256 1744 if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); } // keyLen == 52: AES192 1745 if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); } // keyLen < 52: AES128 1746 1747 // Security net: requested AES function not available on this CPU. 1748 // NOTE: 1749 // As of now (March 2015), this safety net is not required. JCE policy files limit the 1750 // cryptographic strength of the keys used to 128 bit. If we have AES hardware support 1751 // at all, we have at least AES-128. 1752 __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0); 1753 1754 if (VM_Version::has_Crypto_AES128()) { 1755 __ bind(parmBlk_128); 1756 cv_len = VM_Version::Cipher::_AES128_dataBlk; 1757 key_len = VM_Version::Cipher::_AES128_parmBlk_C - cv_len; 1758 __ z_lay(parmBlk, -(VM_Version::Cipher::_AES128_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP); 1759 __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // align parameter block 1760 1761 // Resize the frame to accommodate for the aligned parameter block and other stuff. 1762 // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk). 1763 __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. 1764 __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert. 1765 __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc.. 1766 __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP. 1767 __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address. 1768 1769 __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. 1770 __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. 1771 __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); 1772 if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) { 1773 __ z_bru(parmBlk_set); // Fallthru otherwise. 1774 } 1775 } 1776 1777 if (VM_Version::has_Crypto_AES192()) { 1778 __ bind(parmBlk_192); 1779 cv_len = VM_Version::Cipher::_AES192_dataBlk; 1780 key_len = VM_Version::Cipher::_AES192_parmBlk_C - cv_len; 1781 __ z_lay(parmBlk, -(VM_Version::Cipher::_AES192_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP); 1782 __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. 1783 1784 // Resize the frame to accommodate for the aligned parameter block and other stuff. 1785 // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk). 1786 __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. 1787 __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert. 1788 __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc.. 1789 __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP. 1790 __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address. 1791 1792 __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. 1793 __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. 1794 __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode); 1795 if (VM_Version::has_Crypto_AES256()) { 1796 __ z_bru(parmBlk_set); // Fallthru otherwise. 1797 } 1798 } 1799 1800 if (VM_Version::has_Crypto_AES256()) { 1801 __ bind(parmBlk_256); 1802 cv_len = VM_Version::Cipher::_AES256_dataBlk; 1803 key_len = VM_Version::Cipher::_AES256_parmBlk_C - cv_len; 1804 __ z_lay(parmBlk, -(VM_Version::Cipher::_AES256_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP); 1805 __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. 1806 1807 // Resize the frame to accommodate for the aligned parameter block and other stuff. 1808 // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk). 1809 __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. 1810 __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert. 1811 __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc.. 1812 __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP. 1813 __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address. 1814 1815 __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. 1816 __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. 1817 __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); 1818 // __ z_bru(parmBlk_set); // fallthru 1819 } 1820 1821 __ bind(parmBlk_set); 1822 BLOCK_COMMENT("} push parmBlk"); 1823 } 1824 1825 // Pop a parameter block from the stack. The chaining value portion of the parameter block 1826 // is copied back to the cv array as it is needed for subsequent cipher steps. 1827 // The keylen value as well as the original SP (before resizing) was pushed to the stack 1828 // when pushing the parameter block. 1829 void generate_pop_parmBlk(Register keylen, Register parmBlk, Register key, Register cv) { 1830 1831 BLOCK_COMMENT("pop parmBlk {"); 1832 bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) && 1833 (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); 1834 if (identical_dataBlk_len) { 1835 int cv_len = VM_Version::Cipher::_AES128_dataBlk; 1836 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1837 } else { 1838 int cv_len; 1839 Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; 1840 __ z_lg(keylen, -8, parmBlk); // restore keylen 1841 __ z_cghi(keylen, 52); 1842 if (VM_Version::has_Crypto_AES256()) __ z_brh(parmBlk_256); // keyLen > 52: AES256 1843 if (VM_Version::has_Crypto_AES192()) __ z_bre(parmBlk_192); // keyLen == 52: AES192 1844 // if (VM_Version::has_Crypto_AES128()) __ z_brl(parmBlk_128); // keyLen < 52: AES128 // fallthru 1845 1846 // Security net: there is no one here. If we would need it, we should have 1847 // fallen into it already when pushing the parameter block. 1848 if (VM_Version::has_Crypto_AES128()) { 1849 __ bind(parmBlk_128); 1850 cv_len = VM_Version::Cipher::_AES128_dataBlk; 1851 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1852 if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) { 1853 __ z_bru(parmBlk_set); 1854 } 1855 } 1856 1857 if (VM_Version::has_Crypto_AES192()) { 1858 __ bind(parmBlk_192); 1859 cv_len = VM_Version::Cipher::_AES192_dataBlk; 1860 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1861 if (VM_Version::has_Crypto_AES256()) { 1862 __ z_bru(parmBlk_set); 1863 } 1864 } 1865 1866 if (VM_Version::has_Crypto_AES256()) { 1867 __ bind(parmBlk_256); 1868 cv_len = VM_Version::Cipher::_AES256_dataBlk; 1869 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1870 // __ z_bru(parmBlk_set); // fallthru 1871 } 1872 __ bind(parmBlk_set); 1873 } 1874 __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute. 1875 BLOCK_COMMENT("} pop parmBlk"); 1876 } 1877 1878 // Compute AES encrypt function. 1879 address generate_AES_encryptBlock(const char* name) { 1880 __ align(CodeEntryAlignment); 1881 StubCodeMark mark(this, "StubRoutines", name); 1882 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1883 1884 Register from = Z_ARG1; // source byte array 1885 Register to = Z_ARG2; // destination byte array 1886 Register key = Z_ARG3; // expanded key array 1887 1888 const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. 1889 const Register fCode = Z_R0; // crypto function code 1890 const Register parmBlk = Z_R1; // parameter block address (points to crypto key) 1891 const Register src = Z_ARG1; // is Z_R2 1892 const Register srclen = Z_ARG2; // Overwrites destination address. 1893 const Register dst = Z_ARG3; // Overwrites expanded key address. 1894 1895 // Read key len of expanded key (in 4-byte words). 1896 __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1897 1898 // Copy arguments to registers as required by crypto instruction. 1899 __ z_lgr(parmBlk, key); // crypto key (in T_INT array). 1900 // __ z_lgr(src, from); // Copy not needed, src/from are identical. 1901 __ z_lgr(dst, to); // Copy destination address to even register. 1902 1903 // Construct function code in Z_R0, data block length in Z_ARG2. 1904 generate_load_AES_fCode(keylen, fCode, srclen, false); 1905 1906 __ km(dst, src); // Cipher the message. 1907 1908 __ z_br(Z_R14); 1909 1910 return __ addr_at(start_off); 1911 } 1912 1913 // Compute AES decrypt function. 1914 address generate_AES_decryptBlock(const char* name) { 1915 __ align(CodeEntryAlignment); 1916 StubCodeMark mark(this, "StubRoutines", name); 1917 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1918 1919 Register from = Z_ARG1; // source byte array 1920 Register to = Z_ARG2; // destination byte array 1921 Register key = Z_ARG3; // expanded key array, not preset at entry!!! 1922 1923 const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. 1924 const Register fCode = Z_R0; // crypto function code 1925 const Register parmBlk = Z_R1; // parameter block address (points to crypto key) 1926 const Register src = Z_ARG1; // is Z_R2 1927 const Register srclen = Z_ARG2; // Overwrites destination address. 1928 const Register dst = Z_ARG3; // Overwrites key address. 1929 1930 // Read key len of expanded key (in 4-byte words). 1931 __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1932 1933 // Copy arguments to registers as required by crypto instruction. 1934 __ z_lgr(parmBlk, key); // Copy crypto key address. 1935 // __ z_lgr(src, from); // Copy not needed, src/from are identical. 1936 __ z_lgr(dst, to); // Copy destination address to even register. 1937 1938 // Construct function code in Z_R0, data block length in Z_ARG2. 1939 generate_load_AES_fCode(keylen, fCode, srclen, true); 1940 1941 __ km(dst, src); // Cipher the message. 1942 1943 __ z_br(Z_R14); 1944 1945 return __ addr_at(start_off); 1946 } 1947 1948 // These stubs receive the addresses of the cryptographic key and of the chaining value as two separate 1949 // arguments (registers "key" and "cv", respectively). The KMC instruction, on the other hand, requires 1950 // chaining value and key to be, in this sequence, adjacent in storage. Thus, we need to allocate some 1951 // thread-local working storage. Using heap memory incurs all the hassles of allocating/freeing. 1952 // Stack space, on the contrary, is deallocated automatically when we return from the stub to the caller. 1953 // *** WARNING *** 1954 // Please note that we do not formally allocate stack space, nor do we 1955 // update the stack pointer. Therefore, no function calls are allowed 1956 // and nobody else must use the stack range where the parameter block 1957 // is located. 1958 // We align the parameter block to the next available octoword. 1959 // 1960 // Compute chained AES encrypt function. 1961 address generate_cipherBlockChaining_AES_encrypt(const char* name) { 1962 __ align(CodeEntryAlignment); 1963 StubCodeMark mark(this, "StubRoutines", name); 1964 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1965 1966 Register from = Z_ARG1; // source byte array (clear text) 1967 Register to = Z_ARG2; // destination byte array (ciphered) 1968 Register key = Z_ARG3; // expanded key array. 1969 Register cv = Z_ARG4; // chaining value 1970 const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned 1971 // in Z_RET upon completion of this stub. Is 32-bit integer. 1972 1973 const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only. 1974 const Register fCode = Z_R0; // crypto function code 1975 const Register parmBlk = Z_R1; // parameter block address (points to crypto key) 1976 const Register src = Z_ARG1; // is Z_R2 1977 const Register srclen = Z_ARG2; // Overwrites destination address. 1978 const Register dst = Z_ARG3; // Overwrites key address. 1979 1980 // Read key len of expanded key (in 4-byte words). 1981 __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1982 1983 // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block. 1984 // Construct function code in Z_R0. 1985 generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, false); 1986 1987 // Prepare other registers for instruction. 1988 // __ z_lgr(src, from); // Not needed, registers are the same. 1989 __ z_lgr(dst, to); 1990 __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required. 1991 1992 __ kmc(dst, src); // Cipher the message. 1993 1994 generate_pop_parmBlk(keylen, parmBlk, key, cv); 1995 1996 __ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required. 1997 __ z_br(Z_R14); 1998 1999 return __ addr_at(start_off); 2000 } 2001 2002 // Compute chained AES encrypt function. 2003 address generate_cipherBlockChaining_AES_decrypt(const char* name) { 2004 __ align(CodeEntryAlignment); 2005 StubCodeMark mark(this, "StubRoutines", name); 2006 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2007 2008 Register from = Z_ARG1; // source byte array (ciphered) 2009 Register to = Z_ARG2; // destination byte array (clear text) 2010 Register key = Z_ARG3; // expanded key array, not preset at entry!!! 2011 Register cv = Z_ARG4; // chaining value 2012 const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned 2013 // in Z_RET upon completion of this stub. 2014 2015 const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only. 2016 const Register fCode = Z_R0; // crypto function code 2017 const Register parmBlk = Z_R1; // parameter block address (points to crypto key) 2018 const Register src = Z_ARG1; // is Z_R2 2019 const Register srclen = Z_ARG2; // Overwrites destination address. 2020 const Register dst = Z_ARG3; // Overwrites key address. 2021 2022 // Read key len of expanded key (in 4-byte words). 2023 __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2024 2025 // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block. 2026 // Construct function code in Z_R0. 2027 generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, true); 2028 2029 // Prepare other registers for instruction. 2030 // __ z_lgr(src, from); // Not needed, registers are the same. 2031 __ z_lgr(dst, to); 2032 __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required. 2033 2034 __ kmc(dst, src); // Decipher the message. 2035 2036 generate_pop_parmBlk(keylen, parmBlk, key, cv); 2037 2038 __ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required. 2039 __ z_br(Z_R14); 2040 2041 return __ addr_at(start_off); 2042 } 2043 2044 2045 // Call interface for all SHA* stubs. 2046 // 2047 // Z_ARG1 - source data block. Ptr to leftmost byte to be processed. 2048 // Z_ARG2 - current SHA state. Ptr to state area. This area serves as 2049 // parameter block as required by the crypto instruction. 2050 // Z_ARG3 - current byte offset in source data block. 2051 // Z_ARG4 - last byte offset in source data block. 2052 // (Z_ARG4 - Z_ARG3) gives the #bytes remaining to be processed. 2053 // 2054 // Z_RET - return value. First unprocessed byte offset in src buffer. 2055 // 2056 // A few notes on the call interface: 2057 // - All stubs, whether they are single-block or multi-block, are assumed to 2058 // digest an integer multiple of the data block length of data. All data 2059 // blocks are digested using the intermediate message digest (KIMD) instruction. 2060 // Special end processing, as done by the KLMD instruction, seems to be 2061 // emulated by the calling code. 2062 // 2063 // - Z_ARG1 addresses the first byte of source data. The offset (Z_ARG3) is 2064 // already accounted for. 2065 // 2066 // - The current SHA state (the intermediate message digest value) is contained 2067 // in an area addressed by Z_ARG2. The area size depends on the SHA variant 2068 // and is accessible via the enum VM_Version::MsgDigest::_SHA<n>_parmBlk_I 2069 // 2070 // - The single-block stub is expected to digest exactly one data block, starting 2071 // at the address passed in Z_ARG1. 2072 // 2073 // - The multi-block stub is expected to digest all data blocks which start in 2074 // the offset interval [srcOff(Z_ARG3), srcLimit(Z_ARG4)). The exact difference 2075 // (srcLimit-srcOff), rounded up to the next multiple of the data block length, 2076 // gives the number of blocks to digest. It must be assumed that the calling code 2077 // provides for a large enough source data buffer. 2078 // 2079 // Compute SHA-1 function. 2080 address generate_SHA1_stub(bool multiBlock, const char* name) { 2081 __ align(CodeEntryAlignment); 2082 StubCodeMark mark(this, "StubRoutines", name); 2083 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2084 2085 const Register srcBuff = Z_ARG1; // Points to first block to process (offset already added). 2086 const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter for kimd register pairs. 2087 const Register srcOff = Z_ARG3; // int 2088 const Register srcLimit = Z_ARG4; // Only passed in multiBlock case. int 2089 2090 const Register SHAState_local = Z_R1; 2091 const Register SHAState_save = Z_ARG3; 2092 const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. 2093 Label useKLMD, rtn; 2094 2095 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA1); // function code 2096 __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block 2097 2098 if (multiBlock) { // Process everything from offset to limit. 2099 2100 // The following description is valid if we get a raw (unpimped) source data buffer, 2101 // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, 2102 // the calling convention for these stubs is different. We leave the description in 2103 // to inform the reader what must be happening hidden in the calling code. 2104 // 2105 // The data block to be processed can have arbitrary length, i.e. its length does not 2106 // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement 2107 // two different paths. If the length is an integer multiple, we use KIMD, saving us 2108 // to copy the SHA state back and forth. If the length is odd, we copy the SHA state 2109 // to the stack, execute a KLMD instruction on it and copy the result back to the 2110 // caller's SHA state location. 2111 2112 // Total #srcBuff blocks to process. 2113 if (VM_Version::has_DistinctOpnds()) { 2114 __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference 2115 __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up 2116 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); 2117 __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. 2118 __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. 2119 } else { 2120 __ z_lgfr(srcBufLen, srcLimit); // Exact difference. srcLimit passed as int. 2121 __ z_sgfr(srcBufLen, srcOff); // SrcOff passed as int, now properly casted to long. 2122 __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up 2123 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); 2124 __ z_lgr(srcLimit, srcOff); // SrcLimit temporarily holds return value. 2125 __ z_agr(srcLimit, srcBufLen); 2126 } 2127 2128 // Integral #blocks to digest? 2129 // As a result of the calculations above, srcBufLen MUST be an integer 2130 // multiple of _SHA1_dataBlk, or else we are in big trouble. 2131 // We insert an asm_assert into the KLMD case to guard against that. 2132 __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); 2133 __ z_brc(Assembler::bcondNotAllZero, useKLMD); 2134 2135 // Process all full blocks. 2136 __ kimd(srcBuff); 2137 2138 __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. 2139 } else { // Process one data block only. 2140 __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA1_dataBlk); // #srcBuff bytes to process 2141 __ kimd(srcBuff); 2142 __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA1_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. No 32 to 64 bit extension needed. 2143 } 2144 2145 __ bind(rtn); 2146 __ z_br(Z_R14); 2147 2148 if (multiBlock) { 2149 __ bind(useKLMD); 2150 2151 #if 1 2152 // Security net: this stub is believed to be called for full-sized data blocks only 2153 // NOTE: The following code is believed to be correct, but is is not tested. 2154 __ stop_static("SHA128 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); 2155 #endif 2156 } 2157 2158 return __ addr_at(start_off); 2159 } 2160 2161 // Compute SHA-256 function. 2162 address generate_SHA256_stub(bool multiBlock, const char* name) { 2163 __ align(CodeEntryAlignment); 2164 StubCodeMark mark(this, "StubRoutines", name); 2165 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2166 2167 const Register srcBuff = Z_ARG1; 2168 const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. 2169 const Register SHAState_local = Z_R1; 2170 const Register SHAState_save = Z_ARG3; 2171 const Register srcOff = Z_ARG3; 2172 const Register srcLimit = Z_ARG4; 2173 const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. 2174 Label useKLMD, rtn; 2175 2176 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA256); // function code 2177 __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block 2178 2179 if (multiBlock) { // Process everything from offset to limit. 2180 // The following description is valid if we get a raw (unpimped) source data buffer, 2181 // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, 2182 // the calling convention for these stubs is different. We leave the description in 2183 // to inform the reader what must be happening hidden in the calling code. 2184 // 2185 // The data block to be processed can have arbitrary length, i.e. its length does not 2186 // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement 2187 // two different paths. If the length is an integer multiple, we use KIMD, saving us 2188 // to copy the SHA state back and forth. If the length is odd, we copy the SHA state 2189 // to the stack, execute a KLMD instruction on it and copy the result back to the 2190 // caller's SHA state location. 2191 2192 // total #srcBuff blocks to process 2193 if (VM_Version::has_DistinctOpnds()) { 2194 __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference 2195 __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up 2196 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); 2197 __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. 2198 __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. 2199 } else { 2200 __ z_lgfr(srcBufLen, srcLimit); // exact difference 2201 __ z_sgfr(srcBufLen, srcOff); 2202 __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up 2203 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); 2204 __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. 2205 __ z_agr(srcLimit, srcBufLen); 2206 } 2207 2208 // Integral #blocks to digest? 2209 // As a result of the calculations above, srcBufLen MUST be an integer 2210 // multiple of _SHA1_dataBlk, or else we are in big trouble. 2211 // We insert an asm_assert into the KLMD case to guard against that. 2212 __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); 2213 __ z_brc(Assembler::bcondNotAllZero, useKLMD); 2214 2215 // Process all full blocks. 2216 __ kimd(srcBuff); 2217 2218 __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. 2219 } else { // Process one data block only. 2220 __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA256_dataBlk); // #srcBuff bytes to process 2221 __ kimd(srcBuff); 2222 __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA256_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. 2223 } 2224 2225 __ bind(rtn); 2226 __ z_br(Z_R14); 2227 2228 if (multiBlock) { 2229 __ bind(useKLMD); 2230 #if 1 2231 // Security net: this stub is believed to be called for full-sized data blocks only. 2232 // NOTE: 2233 // The following code is believed to be correct, but is is not tested. 2234 __ stop_static("SHA256 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); 2235 #endif 2236 } 2237 2238 return __ addr_at(start_off); 2239 } 2240 2241 // Compute SHA-512 function. 2242 address generate_SHA512_stub(bool multiBlock, const char* name) { 2243 __ align(CodeEntryAlignment); 2244 StubCodeMark mark(this, "StubRoutines", name); 2245 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2246 2247 const Register srcBuff = Z_ARG1; 2248 const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. 2249 const Register SHAState_local = Z_R1; 2250 const Register SHAState_save = Z_ARG3; 2251 const Register srcOff = Z_ARG3; 2252 const Register srcLimit = Z_ARG4; 2253 const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. 2254 Label useKLMD, rtn; 2255 2256 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA512); // function code 2257 __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block 2258 2259 if (multiBlock) { // Process everything from offset to limit. 2260 // The following description is valid if we get a raw (unpimped) source data buffer, 2261 // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, 2262 // the calling convention for these stubs is different. We leave the description in 2263 // to inform the reader what must be happening hidden in the calling code. 2264 // 2265 // The data block to be processed can have arbitrary length, i.e. its length does not 2266 // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement 2267 // two different paths. If the length is an integer multiple, we use KIMD, saving us 2268 // to copy the SHA state back and forth. If the length is odd, we copy the SHA state 2269 // to the stack, execute a KLMD instruction on it and copy the result back to the 2270 // caller's SHA state location. 2271 2272 // total #srcBuff blocks to process 2273 if (VM_Version::has_DistinctOpnds()) { 2274 __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference 2275 __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up 2276 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); 2277 __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. 2278 __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. 2279 } else { 2280 __ z_lgfr(srcBufLen, srcLimit); // exact difference 2281 __ z_sgfr(srcBufLen, srcOff); 2282 __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up 2283 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); 2284 __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. 2285 __ z_agr(srcLimit, srcBufLen); 2286 } 2287 2288 // integral #blocks to digest? 2289 // As a result of the calculations above, srcBufLen MUST be an integer 2290 // multiple of _SHA1_dataBlk, or else we are in big trouble. 2291 // We insert an asm_assert into the KLMD case to guard against that. 2292 __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); 2293 __ z_brc(Assembler::bcondNotAllZero, useKLMD); 2294 2295 // Process all full blocks. 2296 __ kimd(srcBuff); 2297 2298 __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. 2299 } else { // Process one data block only. 2300 __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA512_dataBlk); // #srcBuff bytes to process 2301 __ kimd(srcBuff); 2302 __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA512_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. 2303 } 2304 2305 __ bind(rtn); 2306 __ z_br(Z_R14); 2307 2308 if (multiBlock) { 2309 __ bind(useKLMD); 2310 #if 1 2311 // Security net: this stub is believed to be called for full-sized data blocks only 2312 // NOTE: 2313 // The following code is believed to be correct, but is is not tested. 2314 __ stop_static("SHA512 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); 2315 #endif 2316 } 2317 2318 return __ addr_at(start_off); 2319 } 2320 2321 2322 /** 2323 * Arguments: 2324 * 2325 * Inputs: 2326 * Z_ARG1 - int crc 2327 * Z_ARG2 - byte* buf 2328 * Z_ARG3 - int length (of buffer) 2329 * 2330 * Result: 2331 * Z_RET - int crc result 2332 **/ 2333 // Compute CRC function (generic, for all polynomials). 2334 void generate_CRC_updateBytes(const char* name, Register table, bool invertCRC) { 2335 2336 // arguments to kernel_crc32: 2337 Register crc = Z_ARG1; // Current checksum, preset by caller or result from previous call, int. 2338 Register data = Z_ARG2; // source byte array 2339 Register dataLen = Z_ARG3; // #bytes to process, int 2340 // Register table = Z_ARG4; // crc table address. Preloaded and passed in by caller. 2341 const Register t0 = Z_R10; // work reg for kernel* emitters 2342 const Register t1 = Z_R11; // work reg for kernel* emitters 2343 const Register t2 = Z_R12; // work reg for kernel* emitters 2344 const Register t3 = Z_R13; // work reg for kernel* emitters 2345 2346 assert_different_registers(crc, data, dataLen, table); 2347 2348 // We pass these values as ints, not as longs as required by C calling convention. 2349 // Crc used as int. 2350 __ z_llgfr(dataLen, dataLen); 2351 2352 __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. 2353 __ z_stmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 to make them available as work registers. 2354 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, invertCRC); 2355 __ z_lmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 back from stack. 2356 __ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. 2357 2358 __ z_llgfr(Z_RET, crc); // Updated crc is function result. No copying required, just zero upper 32 bits. 2359 __ z_br(Z_R14); // Result already in Z_RET == Z_ARG1. 2360 } 2361 2362 2363 // Compute CRC32 function. 2364 address generate_CRC32_updateBytes(const char* name) { 2365 __ align(CodeEntryAlignment); 2366 StubCodeMark mark(this, "StubRoutines", name); 2367 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2368 2369 assert(UseCRC32Intrinsics, "should not generate this stub (%s) with CRC32 intrinsics disabled", name); 2370 2371 BLOCK_COMMENT("CRC32_updateBytes {"); 2372 Register table = Z_ARG4; // crc32 table address. 2373 StubRoutines::zarch::generate_load_crc_table_addr(_masm, table); 2374 2375 generate_CRC_updateBytes(name, table, true); 2376 BLOCK_COMMENT("} CRC32_updateBytes"); 2377 2378 return __ addr_at(start_off); 2379 } 2380 2381 2382 // Compute CRC32C function. 2383 address generate_CRC32C_updateBytes(const char* name) { 2384 __ align(CodeEntryAlignment); 2385 StubCodeMark mark(this, "StubRoutines", name); 2386 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2387 2388 assert(UseCRC32CIntrinsics, "should not generate this stub (%s) with CRC32C intrinsics disabled", name); 2389 2390 BLOCK_COMMENT("CRC32C_updateBytes {"); 2391 Register table = Z_ARG4; // crc32c table address. 2392 StubRoutines::zarch::generate_load_crc32c_table_addr(_masm, table); 2393 2394 generate_CRC_updateBytes(name, table, false); 2395 BLOCK_COMMENT("} CRC32C_updateBytes"); 2396 2397 return __ addr_at(start_off); 2398 } 2399 2400 2401 // Arguments: 2402 // Z_ARG1 - x address 2403 // Z_ARG2 - x length 2404 // Z_ARG3 - y address 2405 // Z_ARG4 - y length 2406 // Z_ARG5 - z address 2407 // 160[Z_SP] - z length 2408 address generate_multiplyToLen() { 2409 __ align(CodeEntryAlignment); 2410 StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); 2411 2412 address start = __ pc(); 2413 2414 const Register x = Z_ARG1; 2415 const Register xlen = Z_ARG2; 2416 const Register y = Z_ARG3; 2417 const Register ylen = Z_ARG4; 2418 const Register z = Z_ARG5; 2419 // zlen is passed on the stack: 2420 // Address zlen(Z_SP, _z_abi(remaining_cargs)); 2421 2422 // Next registers will be saved on stack in multiply_to_len(). 2423 const Register tmp1 = Z_tmp_1; 2424 const Register tmp2 = Z_tmp_2; 2425 const Register tmp3 = Z_tmp_3; 2426 const Register tmp4 = Z_tmp_4; 2427 const Register tmp5 = Z_R9; 2428 2429 BLOCK_COMMENT("Entry:"); 2430 2431 __ z_llgfr(xlen, xlen); 2432 __ z_llgfr(ylen, ylen); 2433 2434 __ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5); 2435 2436 __ z_br(Z_R14); // Return to caller. 2437 2438 return start; 2439 } 2440 2441 void generate_initial() { 2442 // Generates all stubs and initializes the entry points. 2443 2444 // Entry points that exist in all platforms. 2445 // Note: This is code that could be shared among different 2446 // platforms - however the benefit seems to be smaller than the 2447 // disadvantage of having a much more complicated generator 2448 // structure. See also comment in stubRoutines.hpp. 2449 StubRoutines::_forward_exception_entry = generate_forward_exception(); 2450 2451 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 2452 StubRoutines::_catch_exception_entry = generate_catch_exception(); 2453 2454 // Build this early so it's available for the interpreter. 2455 StubRoutines::_throw_StackOverflowError_entry = 2456 generate_throw_exception("StackOverflowError throw_exception", 2457 CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); 2458 StubRoutines::_throw_delayed_StackOverflowError_entry = 2459 generate_throw_exception("delayed StackOverflowError throw_exception", 2460 CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), false); 2461 2462 //---------------------------------------------------------------------- 2463 // Entry points that are platform specific. 2464 2465 if (UseCRC32Intrinsics) { 2466 StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table; 2467 StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes"); 2468 } 2469 2470 if (UseCRC32CIntrinsics) { 2471 StubRoutines::_crc32c_table_addr = (address)StubRoutines::zarch::_crc32c_table; 2472 StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes("CRC32C_updateBytes"); 2473 } 2474 2475 // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. 2476 StubRoutines::zarch::_trot_table_addr = (address)StubRoutines::zarch::_trot_table; 2477 } 2478 2479 2480 void generate_all() { 2481 // Generates all stubs and initializes the entry points. 2482 2483 StubRoutines::zarch::_partial_subtype_check = generate_partial_subtype_check(); 2484 2485 // These entry points require SharedInfo::stack0 to be set up in non-core builds. 2486 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); 2487 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); 2488 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); 2489 2490 // Support for verify_oop (must happen after universe_init). 2491 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); 2492 2493 // Arraycopy stubs used by compilers. 2494 generate_arraycopy_stubs(); 2495 2496 // safefetch stubs 2497 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc); 2498 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, &StubRoutines::_safefetchN_fault_pc, &StubRoutines::_safefetchN_continuation_pc); 2499 2500 // Generate AES intrinsics code. 2501 if (UseAESIntrinsics) { 2502 StubRoutines::_aescrypt_encryptBlock = generate_AES_encryptBlock("AES_encryptBlock"); 2503 StubRoutines::_aescrypt_decryptBlock = generate_AES_decryptBlock("AES_decryptBlock"); 2504 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_AES_encrypt("AES_encryptBlock_chaining"); 2505 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_AES_decrypt("AES_decryptBlock_chaining"); 2506 } 2507 2508 // Generate SHA1/SHA256/SHA512 intrinsics code. 2509 if (UseSHA1Intrinsics) { 2510 StubRoutines::_sha1_implCompress = generate_SHA1_stub(false, "SHA1_singleBlock"); 2511 StubRoutines::_sha1_implCompressMB = generate_SHA1_stub(true, "SHA1_multiBlock"); 2512 } 2513 if (UseSHA256Intrinsics) { 2514 StubRoutines::_sha256_implCompress = generate_SHA256_stub(false, "SHA256_singleBlock"); 2515 StubRoutines::_sha256_implCompressMB = generate_SHA256_stub(true, "SHA256_multiBlock"); 2516 } 2517 if (UseSHA512Intrinsics) { 2518 StubRoutines::_sha512_implCompress = generate_SHA512_stub(false, "SHA512_singleBlock"); 2519 StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(true, "SHA512_multiBlock"); 2520 } 2521 2522 #ifdef COMPILER2 2523 if (UseMultiplyToLenIntrinsic) { 2524 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 2525 } 2526 if (UseMontgomeryMultiplyIntrinsic) { 2527 StubRoutines::_montgomeryMultiply 2528 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); 2529 } 2530 if (UseMontgomerySquareIntrinsic) { 2531 StubRoutines::_montgomerySquare 2532 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); 2533 } 2534 #endif 2535 } 2536 2537 public: 2538 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 2539 // Replace the standard masm with a special one: 2540 _masm = new MacroAssembler(code); 2541 2542 _stub_count = !all ? 0x100 : 0x200; 2543 if (all) { 2544 generate_all(); 2545 } else { 2546 generate_initial(); 2547 } 2548 } 2549 2550 private: 2551 int _stub_count; 2552 void stub_prolog(StubCodeDesc* cdesc) { 2553 #ifdef ASSERT 2554 // Put extra information in the stub code, to make it more readable. 2555 // Write the high part of the address. 2556 // [RGV] Check if there is a dependency on the size of this prolog. 2557 __ emit_32((intptr_t)cdesc >> 32); 2558 __ emit_32((intptr_t)cdesc); 2559 __ emit_32(++_stub_count); 2560 #endif 2561 align(true); 2562 } 2563 2564 void align(bool at_header = false) { 2565 // z/Architecture cache line size is 256 bytes. 2566 // There is no obvious benefit in aligning stub 2567 // code to cache lines. Use CodeEntryAlignment instead. 2568 const unsigned int icache_line_size = CodeEntryAlignment; 2569 const unsigned int icache_half_line_size = MIN2<unsigned int>(32, CodeEntryAlignment); 2570 2571 if (at_header) { 2572 while ((intptr_t)(__ pc()) % icache_line_size != 0) { 2573 __ emit_16(0); 2574 } 2575 } else { 2576 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { 2577 __ z_nop(); 2578 } 2579 } 2580 } 2581 2582 }; 2583 2584 void StubGenerator_generate(CodeBuffer* code, bool all) { 2585 StubGenerator g(code, all); 2586 }