1 /* 2 * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2017, SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "registerSaver_s390.hpp" 29 #include "gc/shared/cardTable.hpp" 30 #include "gc/shared/cardTableModRefBS.hpp" 31 #include "interpreter/interpreter.hpp" 32 #include "interpreter/interp_masm.hpp" 33 #include "nativeInst_s390.hpp" 34 #include "oops/instanceOop.hpp" 35 #include "oops/objArrayKlass.hpp" 36 #include "oops/oop.inline.hpp" 37 #include "prims/methodHandles.hpp" 38 #include "runtime/frame.inline.hpp" 39 #include "runtime/handles.inline.hpp" 40 #include "runtime/sharedRuntime.hpp" 41 #include "runtime/stubCodeGenerator.hpp" 42 #include "runtime/stubRoutines.hpp" 43 #include "runtime/thread.inline.hpp" 44 45 // Declaration and definition of StubGenerator (no .hpp file). 46 // For a more detailed description of the stub routine structure 47 // see the comment in stubRoutines.hpp. 48 49 #ifdef PRODUCT 50 #define __ _masm-> 51 #else 52 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> 53 #endif 54 55 #define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str) 56 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 57 58 // ----------------------------------------------------------------------- 59 // Stub Code definitions 60 61 class StubGenerator: public StubCodeGenerator { 62 private: 63 64 //---------------------------------------------------------------------- 65 // Call stubs are used to call Java from C. 66 67 // 68 // Arguments: 69 // 70 // R2 - call wrapper address : address 71 // R3 - result : intptr_t* 72 // R4 - result type : BasicType 73 // R5 - method : method 74 // R6 - frame mgr entry point : address 75 // [SP+160] - parameter block : intptr_t* 76 // [SP+172] - parameter count in words : int 77 // [SP+176] - thread : Thread* 78 // 79 address generate_call_stub(address& return_address) { 80 // Set up a new C frame, copy Java arguments, call frame manager 81 // or native_entry, and process result. 82 83 StubCodeMark mark(this, "StubRoutines", "call_stub"); 84 address start = __ pc(); 85 86 Register r_arg_call_wrapper_addr = Z_ARG1; 87 Register r_arg_result_addr = Z_ARG2; 88 Register r_arg_result_type = Z_ARG3; 89 Register r_arg_method = Z_ARG4; 90 Register r_arg_entry = Z_ARG5; 91 92 // offsets to fp 93 #define d_arg_thread 176 94 #define d_arg_argument_addr 160 95 #define d_arg_argument_count 168+4 96 97 Register r_entryframe_fp = Z_tmp_1; 98 Register r_top_of_arguments_addr = Z_ARG4; 99 Register r_new_arg_entry = Z_R14; 100 101 // macros for frame offsets 102 #define call_wrapper_address_offset \ 103 _z_entry_frame_locals_neg(call_wrapper_address) 104 #define result_address_offset \ 105 _z_entry_frame_locals_neg(result_address) 106 #define result_type_offset \ 107 _z_entry_frame_locals_neg(result_type) 108 #define arguments_tos_address_offset \ 109 _z_entry_frame_locals_neg(arguments_tos_address) 110 111 { 112 // 113 // STACK on entry to call_stub: 114 // 115 // F1 [C_FRAME] 116 // ... 117 // 118 119 Register r_argument_addr = Z_tmp_3; 120 Register r_argumentcopy_addr = Z_tmp_4; 121 Register r_argument_size_in_bytes = Z_ARG5; 122 Register r_frame_size = Z_R1; 123 124 Label arguments_copied; 125 126 // Save non-volatile registers to ABI of caller frame. 127 BLOCK_COMMENT("save registers, push frame {"); 128 __ z_stmg(Z_R6, Z_R14, 16, Z_SP); 129 __ z_std(Z_F8, 96, Z_SP); 130 __ z_std(Z_F9, 104, Z_SP); 131 __ z_std(Z_F10, 112, Z_SP); 132 __ z_std(Z_F11, 120, Z_SP); 133 __ z_std(Z_F12, 128, Z_SP); 134 __ z_std(Z_F13, 136, Z_SP); 135 __ z_std(Z_F14, 144, Z_SP); 136 __ z_std(Z_F15, 152, Z_SP); 137 138 // 139 // Push ENTRY_FRAME including arguments: 140 // 141 // F0 [TOP_IJAVA_FRAME_ABI] 142 // [outgoing Java arguments] 143 // [ENTRY_FRAME_LOCALS] 144 // F1 [C_FRAME] 145 // ... 146 // 147 148 // Calculate new frame size and push frame. 149 #define abi_plus_locals_size \ 150 (frame::z_top_ijava_frame_abi_size + frame::z_entry_frame_locals_size) 151 if (abi_plus_locals_size % BytesPerWord == 0) { 152 // Preload constant part of frame size. 153 __ load_const_optimized(r_frame_size, -abi_plus_locals_size/BytesPerWord); 154 // Keep copy of our frame pointer (caller's SP). 155 __ z_lgr(r_entryframe_fp, Z_SP); 156 // Add space required by arguments to frame size. 157 __ z_slgf(r_frame_size, d_arg_argument_count, Z_R0, Z_SP); 158 // Move Z_ARG5 early, it will be used as a local. 159 __ z_lgr(r_new_arg_entry, r_arg_entry); 160 // Convert frame size from words to bytes. 161 __ z_sllg(r_frame_size, r_frame_size, LogBytesPerWord); 162 __ push_frame(r_frame_size, r_entryframe_fp, 163 false/*don't copy SP*/, true /*frame size sign inverted*/); 164 } else { 165 guarantee(false, "frame sizes should be multiples of word size (BytesPerWord)"); 166 } 167 BLOCK_COMMENT("} save, push"); 168 169 // Load argument registers for call. 170 BLOCK_COMMENT("prepare/copy arguments {"); 171 __ z_lgr(Z_method, r_arg_method); 172 __ z_lg(Z_thread, d_arg_thread, r_entryframe_fp); 173 174 // Calculate top_of_arguments_addr which will be tos (not prepushed) later. 175 // Wimply use SP + frame::top_ijava_frame_size. 176 __ add2reg(r_top_of_arguments_addr, 177 frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP); 178 179 // Initialize call_stub locals (step 1). 180 if ((call_wrapper_address_offset + BytesPerWord == result_address_offset) && 181 (result_address_offset + BytesPerWord == result_type_offset) && 182 (result_type_offset + BytesPerWord == arguments_tos_address_offset)) { 183 184 __ z_stmg(r_arg_call_wrapper_addr, r_top_of_arguments_addr, 185 call_wrapper_address_offset, r_entryframe_fp); 186 } else { 187 __ z_stg(r_arg_call_wrapper_addr, 188 call_wrapper_address_offset, r_entryframe_fp); 189 __ z_stg(r_arg_result_addr, 190 result_address_offset, r_entryframe_fp); 191 __ z_stg(r_arg_result_type, 192 result_type_offset, r_entryframe_fp); 193 __ z_stg(r_top_of_arguments_addr, 194 arguments_tos_address_offset, r_entryframe_fp); 195 } 196 197 // Copy Java arguments. 198 199 // Any arguments to copy? 200 __ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count)); 201 __ z_bre(arguments_copied); 202 203 // Prepare loop and copy arguments in reverse order. 204 { 205 // Calculate argument size in bytes. 206 __ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord); 207 208 // Get addr of first incoming Java argument. 209 __ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp); 210 211 // Let r_argumentcopy_addr point to last outgoing Java argument. 212 __ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively. 213 214 // Let r_argument_addr point to last incoming Java argument. 215 __ add2reg_with_index(r_argument_addr, -BytesPerWord, 216 r_argument_size_in_bytes, r_argument_addr); 217 218 // Now loop while Z_R1 > 0 and copy arguments. 219 { 220 Label next_argument; 221 __ bind(next_argument); 222 // Mem-mem move. 223 __ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr); 224 __ add2reg(r_argument_addr, -BytesPerWord); 225 __ add2reg(r_argumentcopy_addr, BytesPerWord); 226 __ z_brct(Z_R1, next_argument); 227 } 228 } // End of argument copy loop. 229 230 __ bind(arguments_copied); 231 } 232 BLOCK_COMMENT("} arguments"); 233 234 BLOCK_COMMENT("call {"); 235 { 236 // Call frame manager or native entry. 237 238 // 239 // Register state on entry to frame manager / native entry: 240 // 241 // Z_ARG1 = r_top_of_arguments_addr - intptr_t *sender tos (prepushed) 242 // Lesp = (SP) + copied_arguments_offset - 8 243 // Z_method - method 244 // Z_thread - JavaThread* 245 // 246 247 // Here, the usual SP is the initial_caller_sp. 248 __ z_lgr(Z_R10, Z_SP); 249 250 // Z_esp points to the slot below the last argument. 251 __ z_lgr(Z_esp, r_top_of_arguments_addr); 252 253 // 254 // Stack on entry to frame manager / native entry: 255 // 256 // F0 [TOP_IJAVA_FRAME_ABI] 257 // [outgoing Java arguments] 258 // [ENTRY_FRAME_LOCALS] 259 // F1 [C_FRAME] 260 // ... 261 // 262 263 // Do a light-weight C-call here, r_new_arg_entry holds the address 264 // of the interpreter entry point (frame manager or native entry) 265 // and save runtime-value of return_pc in return_address 266 // (call by reference argument). 267 return_address = __ call_stub(r_new_arg_entry); 268 } 269 BLOCK_COMMENT("} call"); 270 271 { 272 BLOCK_COMMENT("restore registers {"); 273 // Returned from frame manager or native entry. 274 // Now pop frame, process result, and return to caller. 275 276 // 277 // Stack on exit from frame manager / native entry: 278 // 279 // F0 [ABI] 280 // ... 281 // [ENTRY_FRAME_LOCALS] 282 // F1 [C_FRAME] 283 // ... 284 // 285 // Just pop the topmost frame ... 286 // 287 288 Label ret_is_object; 289 Label ret_is_long; 290 Label ret_is_float; 291 Label ret_is_double; 292 293 // Restore frame pointer. 294 __ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP); 295 // Pop frame. Done here to minimize stalls. 296 __ pop_frame(); 297 298 // Reload some volatile registers which we've spilled before the call 299 // to frame manager / native entry. 300 // Access all locals via frame pointer, because we know nothing about 301 // the topmost frame's size. 302 __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp); 303 __ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp); 304 305 // Restore non-volatiles. 306 __ z_lmg(Z_R6, Z_R14, 16, Z_SP); 307 __ z_ld(Z_F8, 96, Z_SP); 308 __ z_ld(Z_F9, 104, Z_SP); 309 __ z_ld(Z_F10, 112, Z_SP); 310 __ z_ld(Z_F11, 120, Z_SP); 311 __ z_ld(Z_F12, 128, Z_SP); 312 __ z_ld(Z_F13, 136, Z_SP); 313 __ z_ld(Z_F14, 144, Z_SP); 314 __ z_ld(Z_F15, 152, Z_SP); 315 BLOCK_COMMENT("} restore"); 316 317 // 318 // Stack on exit from call_stub: 319 // 320 // 0 [C_FRAME] 321 // ... 322 // 323 // No call_stub frames left. 324 // 325 326 // All non-volatiles have been restored at this point!! 327 328 //------------------------------------------------------------------------ 329 // The following code makes some assumptions on the T_<type> enum values. 330 // The enum is defined in globalDefinitions.hpp. 331 // The validity of the assumptions is tested as far as possible. 332 // The assigned values should not be shuffled 333 // T_BOOLEAN==4 - lowest used enum value 334 // T_NARROWOOP==16 - largest used enum value 335 //------------------------------------------------------------------------ 336 BLOCK_COMMENT("process result {"); 337 Label firstHandler; 338 int handlerLen= 8; 339 #ifdef ASSERT 340 char assertMsg[] = "check BasicType definition in globalDefinitions.hpp"; 341 __ z_chi(r_arg_result_type, T_BOOLEAN); 342 __ asm_assert_low(assertMsg, 0x0234); 343 __ z_chi(r_arg_result_type, T_NARROWOOP); 344 __ asm_assert_high(assertMsg, 0x0235); 345 #endif 346 __ add2reg(r_arg_result_type, -T_BOOLEAN); // Remove offset. 347 __ z_larl(Z_R1, firstHandler); // location of first handler 348 __ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long. 349 __ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1); 350 351 __ align(handlerLen); 352 __ bind(firstHandler); 353 // T_BOOLEAN: 354 guarantee(T_BOOLEAN == 4, "check BasicType definition in globalDefinitions.hpp"); 355 __ z_st(Z_RET, 0, r_arg_result_addr); 356 __ z_br(Z_R14); // Return to caller. 357 __ align(handlerLen); 358 // T_CHAR: 359 guarantee(T_CHAR == T_BOOLEAN+1, "check BasicType definition in globalDefinitions.hpp"); 360 __ z_st(Z_RET, 0, r_arg_result_addr); 361 __ z_br(Z_R14); // Return to caller. 362 __ align(handlerLen); 363 // T_FLOAT: 364 guarantee(T_FLOAT == T_CHAR+1, "check BasicType definition in globalDefinitions.hpp"); 365 __ z_ste(Z_FRET, 0, r_arg_result_addr); 366 __ z_br(Z_R14); // Return to caller. 367 __ align(handlerLen); 368 // T_DOUBLE: 369 guarantee(T_DOUBLE == T_FLOAT+1, "check BasicType definition in globalDefinitions.hpp"); 370 __ z_std(Z_FRET, 0, r_arg_result_addr); 371 __ z_br(Z_R14); // Return to caller. 372 __ align(handlerLen); 373 // T_BYTE: 374 guarantee(T_BYTE == T_DOUBLE+1, "check BasicType definition in globalDefinitions.hpp"); 375 __ z_st(Z_RET, 0, r_arg_result_addr); 376 __ z_br(Z_R14); // Return to caller. 377 __ align(handlerLen); 378 // T_SHORT: 379 guarantee(T_SHORT == T_BYTE+1, "check BasicType definition in globalDefinitions.hpp"); 380 __ z_st(Z_RET, 0, r_arg_result_addr); 381 __ z_br(Z_R14); // Return to caller. 382 __ align(handlerLen); 383 // T_INT: 384 guarantee(T_INT == T_SHORT+1, "check BasicType definition in globalDefinitions.hpp"); 385 __ z_st(Z_RET, 0, r_arg_result_addr); 386 __ z_br(Z_R14); // Return to caller. 387 __ align(handlerLen); 388 // T_LONG: 389 guarantee(T_LONG == T_INT+1, "check BasicType definition in globalDefinitions.hpp"); 390 __ z_stg(Z_RET, 0, r_arg_result_addr); 391 __ z_br(Z_R14); // Return to caller. 392 __ align(handlerLen); 393 // T_OBJECT: 394 guarantee(T_OBJECT == T_LONG+1, "check BasicType definition in globalDefinitions.hpp"); 395 __ z_stg(Z_RET, 0, r_arg_result_addr); 396 __ z_br(Z_R14); // Return to caller. 397 __ align(handlerLen); 398 // T_ARRAY: 399 guarantee(T_ARRAY == T_OBJECT+1, "check BasicType definition in globalDefinitions.hpp"); 400 __ z_stg(Z_RET, 0, r_arg_result_addr); 401 __ z_br(Z_R14); // Return to caller. 402 __ align(handlerLen); 403 // T_VOID: 404 guarantee(T_VOID == T_ARRAY+1, "check BasicType definition in globalDefinitions.hpp"); 405 __ z_stg(Z_RET, 0, r_arg_result_addr); 406 __ z_br(Z_R14); // Return to caller. 407 __ align(handlerLen); 408 // T_ADDRESS: 409 guarantee(T_ADDRESS == T_VOID+1, "check BasicType definition in globalDefinitions.hpp"); 410 __ z_stg(Z_RET, 0, r_arg_result_addr); 411 __ z_br(Z_R14); // Return to caller. 412 __ align(handlerLen); 413 // T_NARROWOOP: 414 guarantee(T_NARROWOOP == T_ADDRESS+1, "check BasicType definition in globalDefinitions.hpp"); 415 __ z_st(Z_RET, 0, r_arg_result_addr); 416 __ z_br(Z_R14); // Return to caller. 417 __ align(handlerLen); 418 BLOCK_COMMENT("} process result"); 419 } 420 return start; 421 } 422 423 // Return point for a Java call if there's an exception thrown in 424 // Java code. The exception is caught and transformed into a 425 // pending exception stored in JavaThread that can be tested from 426 // within the VM. 427 address generate_catch_exception() { 428 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 429 430 address start = __ pc(); 431 432 // 433 // Registers alive 434 // 435 // Z_thread 436 // Z_ARG1 - address of pending exception 437 // Z_ARG2 - return address in call stub 438 // 439 440 const Register exception_file = Z_R0; 441 const Register exception_line = Z_R1; 442 443 __ load_const_optimized(exception_file, (void*)__FILE__); 444 __ load_const_optimized(exception_line, (void*)__LINE__); 445 446 __ z_stg(Z_ARG1, thread_(pending_exception)); 447 // Store into `char *'. 448 __ z_stg(exception_file, thread_(exception_file)); 449 // Store into `int'. 450 __ z_st(exception_line, thread_(exception_line)); 451 452 // Complete return to VM. 453 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); 454 455 // Continue in call stub. 456 __ z_br(Z_ARG2); 457 458 return start; 459 } 460 461 // Continuation point for runtime calls returning with a pending 462 // exception. The pending exception check happened in the runtime 463 // or native call stub. The pending exception in Thread is 464 // converted into a Java-level exception. 465 // 466 // Read: 467 // Z_R14: pc the runtime library callee wants to return to. 468 // Since the exception occurred in the callee, the return pc 469 // from the point of view of Java is the exception pc. 470 // 471 // Invalidate: 472 // Volatile registers (except below). 473 // 474 // Update: 475 // Z_ARG1: exception 476 // (Z_R14 is unchanged and is live out). 477 // 478 address generate_forward_exception() { 479 StubCodeMark mark(this, "StubRoutines", "forward_exception"); 480 address start = __ pc(); 481 482 #define pending_exception_offset in_bytes(Thread::pending_exception_offset()) 483 #ifdef ASSERT 484 // Get pending exception oop. 485 __ z_lg(Z_ARG1, pending_exception_offset, Z_thread); 486 487 // Make sure that this code is only executed if there is a pending exception. 488 { 489 Label L; 490 __ z_ltgr(Z_ARG1, Z_ARG1); 491 __ z_brne(L); 492 __ stop("StubRoutines::forward exception: no pending exception (1)"); 493 __ bind(L); 494 } 495 496 __ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop"); 497 #endif 498 499 __ z_lgr(Z_ARG2, Z_R14); // Copy exception pc into Z_ARG2. 500 __ save_return_pc(); 501 __ push_frame_abi160(0); 502 // Find exception handler. 503 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 504 Z_thread, 505 Z_ARG2); 506 // Copy handler's address. 507 __ z_lgr(Z_R1, Z_RET); 508 __ pop_frame(); 509 __ restore_return_pc(); 510 511 // Set up the arguments for the exception handler: 512 // - Z_ARG1: exception oop 513 // - Z_ARG2: exception pc 514 515 // Load pending exception oop. 516 __ z_lg(Z_ARG1, pending_exception_offset, Z_thread); 517 518 // The exception pc is the return address in the caller, 519 // must load it into Z_ARG2 520 __ z_lgr(Z_ARG2, Z_R14); 521 522 #ifdef ASSERT 523 // Make sure exception is set. 524 { Label L; 525 __ z_ltgr(Z_ARG1, Z_ARG1); 526 __ z_brne(L); 527 __ stop("StubRoutines::forward exception: no pending exception (2)"); 528 __ bind(L); 529 } 530 #endif 531 // Clear the pending exception. 532 __ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *)); 533 // Jump to exception handler 534 __ z_br(Z_R1 /*handler address*/); 535 536 return start; 537 538 #undef pending_exception_offset 539 } 540 541 // Continuation point for throwing of implicit exceptions that are 542 // not handled in the current activation. Fabricates an exception 543 // oop and initiates normal exception dispatching in this 544 // frame. Only callee-saved registers are preserved (through the 545 // normal RegisterMap handling). If the compiler 546 // needs all registers to be preserved between the fault point and 547 // the exception handler then it must assume responsibility for that 548 // in AbstractCompiler::continuation_for_implicit_null_exception or 549 // continuation_for_implicit_division_by_zero_exception. All other 550 // implicit exceptions (e.g., NullPointerException or 551 // AbstractMethodError on entry) are either at call sites or 552 // otherwise assume that stack unwinding will be initiated, so 553 // caller saved registers were assumed volatile in the compiler. 554 555 // Note that we generate only this stub into a RuntimeStub, because 556 // it needs to be properly traversed and ignored during GC, so we 557 // change the meaning of the "__" macro within this method. 558 559 // Note: the routine set_pc_not_at_call_for_caller in 560 // SharedRuntime.cpp requires that this code be generated into a 561 // RuntimeStub. 562 #undef __ 563 #define __ masm-> 564 565 address generate_throw_exception(const char* name, address runtime_entry, 566 bool restore_saved_exception_pc, 567 Register arg1 = noreg, Register arg2 = noreg) { 568 assert_different_registers(arg1, Z_R0_scratch); // would be destroyed by push_frame() 569 assert_different_registers(arg2, Z_R0_scratch); // would be destroyed by push_frame() 570 571 int insts_size = 256; 572 int locs_size = 0; 573 CodeBuffer code(name, insts_size, locs_size); 574 MacroAssembler* masm = new MacroAssembler(&code); 575 int framesize_in_bytes; 576 address start = __ pc(); 577 578 __ save_return_pc(); 579 framesize_in_bytes = __ push_frame_abi160(0); 580 581 address frame_complete_pc = __ pc(); 582 if (restore_saved_exception_pc) { 583 __ unimplemented("StubGenerator::throw_exception", 74); 584 } 585 586 // Note that we always have a runtime stub frame on the top of stack at this point. 587 __ get_PC(Z_R1); 588 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1); 589 590 // Do the call. 591 BLOCK_COMMENT("call runtime_entry"); 592 __ call_VM_leaf(runtime_entry, Z_thread, arg1, arg2); 593 594 __ reset_last_Java_frame(); 595 596 #ifdef ASSERT 597 // Make sure that this code is only executed if there is a pending exception. 598 { Label L; 599 __ z_lg(Z_R0, 600 in_bytes(Thread::pending_exception_offset()), 601 Z_thread); 602 __ z_ltgr(Z_R0, Z_R0); 603 __ z_brne(L); 604 __ stop("StubRoutines::throw_exception: no pending exception"); 605 __ bind(L); 606 } 607 #endif 608 609 __ pop_frame(); 610 __ restore_return_pc(); 611 612 __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry()); 613 __ z_br(Z_R1); 614 615 RuntimeStub* stub = 616 RuntimeStub::new_runtime_stub(name, &code, 617 frame_complete_pc - start, 618 framesize_in_bytes/wordSize, 619 NULL /*oop_maps*/, false); 620 621 return stub->entry_point(); 622 } 623 624 #undef __ 625 #ifdef PRODUCT 626 #define __ _masm-> 627 #else 628 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> 629 #endif 630 631 // Support for uint StubRoutine::zarch::partial_subtype_check(Klass 632 // sub, Klass super); 633 // 634 // Arguments: 635 // ret : Z_RET, returned 636 // sub : Z_ARG2, argument, not changed 637 // super: Z_ARG3, argument, not changed 638 // 639 // raddr: Z_R14, blown by call 640 // 641 address generate_partial_subtype_check() { 642 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 643 Label miss; 644 645 address start = __ pc(); 646 647 const Register Rsubklass = Z_ARG2; // subklass 648 const Register Rsuperklass = Z_ARG3; // superklass 649 650 // No args, but tmp registers that are killed. 651 const Register Rlength = Z_ARG4; // cache array length 652 const Register Rarray_ptr = Z_ARG5; // Current value from cache array. 653 654 if (UseCompressedOops) { 655 assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub"); 656 } 657 658 // Always take the slow path (see SPARC). 659 __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, 660 Rarray_ptr, Rlength, NULL, &miss); 661 662 // Match falls through here. 663 __ clear_reg(Z_RET); // Zero indicates a match. Set EQ flag in CC. 664 __ z_br(Z_R14); 665 666 __ BIND(miss); 667 __ load_const_optimized(Z_RET, 1); // One indicates a miss. 668 __ z_ltgr(Z_RET, Z_RET); // Set NE flag in CR. 669 __ z_br(Z_R14); 670 671 return start; 672 } 673 674 // Return address of code to be called from code generated by 675 // MacroAssembler::verify_oop. 676 // 677 // Don't generate, rather use C++ code. 678 address generate_verify_oop_subroutine() { 679 // Don't generate a StubCodeMark, because no code is generated! 680 // Generating the mark triggers notifying the oprofile jvmti agent 681 // about the dynamic code generation, but the stub without 682 // code (code_size == 0) confuses opjitconv 683 // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); 684 685 address start = 0; 686 return start; 687 } 688 689 // Generate pre-write barrier for array. 690 // 691 // Input: 692 // addr - register containing starting address 693 // count - register containing element count 694 // 695 // The input registers are overwritten. 696 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { 697 698 BarrierSet* const bs = Universe::heap()->barrier_set(); 699 switch (bs->kind()) { 700 case BarrierSet::G1BarrierSet: 701 // With G1, don't generate the call if we statically know that the target is uninitialized. 702 if (!dest_uninitialized) { 703 // Is marking active? 704 Label filtered; 705 assert_different_registers(addr, Z_R0_scratch); // would be destroyed by push_frame() 706 assert_different_registers(count, Z_R0_scratch); // would be destroyed by push_frame() 707 Register Rtmp1 = Z_R0_scratch; 708 const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() + 709 SATBMarkQueue::byte_offset_of_active()); 710 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 711 __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset)); 712 } else { 713 guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 714 __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset)); 715 } 716 __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently. 717 718 // __ push_frame_abi160(0); // implicitly done in save_live_registers() 719 (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers); 720 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), addr, count); 721 (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers); 722 // __ pop_frame(); // implicitly done in restore_live_registers() 723 724 __ bind(filtered); 725 } 726 break; 727 case BarrierSet::CardTableModRef: 728 case BarrierSet::ModRef: 729 break; 730 default: 731 ShouldNotReachHere(); 732 } 733 } 734 735 // Generate post-write barrier for array. 736 // 737 // Input: 738 // addr - register containing starting address 739 // count - register containing element count 740 // 741 // The input registers are overwritten. 742 void gen_write_ref_array_post_barrier(Register addr, Register count, bool branchToEnd) { 743 BarrierSet* const bs = Universe::heap()->barrier_set(); 744 switch (bs->kind()) { 745 case BarrierSet::G1BarrierSet: 746 { 747 if (branchToEnd) { 748 assert_different_registers(addr, Z_R0_scratch); // would be destroyed by push_frame() 749 assert_different_registers(count, Z_R0_scratch); // would be destroyed by push_frame() 750 // __ push_frame_abi160(0); // implicitly done in save_live_registers() 751 (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers); 752 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count); 753 (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers); 754 // __ pop_frame(); // implicitly done in restore_live_registers() 755 } else { 756 // Tail call: call c and return to stub caller. 757 address entry_point = CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post); 758 __ lgr_if_needed(Z_ARG1, addr); 759 __ lgr_if_needed(Z_ARG2, count); 760 __ load_const(Z_R1, entry_point); 761 __ z_br(Z_R1); // Branch without linking, callee will return to stub caller. 762 } 763 } 764 break; 765 case BarrierSet::CardTableModRef: 766 // These cases formerly known as 767 // void array_store_check(Register addr, Register count, bool branchToEnd). 768 { 769 NearLabel doXC, done; 770 CardTableModRefBS* ctbs = barrier_set_cast<CardTableModRefBS>(bs); 771 CardTable* ct = ctbs->card_table(); 772 assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); 773 assert_different_registers(Z_R0, Z_R1, addr, count); 774 775 // Nothing to do if count <= 0. 776 if (branchToEnd) { 777 __ compare64_and_branch(count, (intptr_t) 0, Assembler::bcondNotHigh, done); 778 } else { 779 __ z_ltgr(count, count); 780 __ z_bcr(Assembler::bcondNotPositive, Z_R14); 781 } 782 783 // Note: We can't combine the shifts. We could lose a carry 784 // from calculating the array end address. 785 // count = (count-1)*BytesPerHeapOop + addr 786 // Count holds addr of last oop in array then. 787 __ z_sllg(count, count, LogBytesPerHeapOop); 788 __ add2reg_with_index(count, -BytesPerHeapOop, count, addr); 789 790 // Get base address of card table. 791 __ load_const_optimized(Z_R1, (address)ct->byte_map_base()); 792 793 // count = (count>>shift) - (addr>>shift) 794 __ z_srlg(addr, addr, CardTable::card_shift); 795 __ z_srlg(count, count, CardTable::card_shift); 796 797 // Prefetch first elements of card table for update. 798 if (VM_Version::has_Prefetch()) { 799 __ z_pfd(0x02, 0, addr, Z_R1); 800 } 801 802 // Special case: clear just one byte. 803 __ clear_reg(Z_R0, true, false); // Used for doOneByte. 804 __ z_sgr(count, addr); // Count = n-1 now, CC used for brc below. 805 __ z_stc(Z_R0, 0, addr, Z_R1); // Must preserve CC from z_sgr. 806 if (branchToEnd) { 807 __ z_brz(done); 808 } else { 809 __ z_bcr(Assembler::bcondZero, Z_R14); 810 } 811 812 __ z_cghi(count, 255); 813 __ z_brnh(doXC); 814 815 // MVCLE: clear a long area. 816 // Start addr of card table range = base + addr. 817 // # bytes in card table range = (count + 1) 818 __ add2reg_with_index(Z_R0, 0, Z_R1, addr); 819 __ add2reg(Z_R1, 1, count); 820 821 // dirty hack: 822 // There are just two callers. Both pass 823 // count in Z_ARG3 = Z_R4 824 // addr in Z_ARG2 = Z_R3 825 // ==> use Z_ARG2 as src len reg = 0 826 // Z_ARG1 as src addr (ignored) 827 assert(count == Z_ARG3, "count: unexpected register number"); 828 assert(addr == Z_ARG2, "addr: unexpected register number"); 829 __ clear_reg(Z_ARG2, true, false); 830 831 __ MacroAssembler::move_long_ext(Z_R0, Z_ARG1, 0); 832 833 if (branchToEnd) { 834 __ z_bru(done); 835 } else { 836 __ z_bcr(Assembler::bcondAlways, Z_R14); 837 } 838 839 // XC: clear a short area. 840 Label XC_template; // Instr template, never exec directly! 841 __ bind(XC_template); 842 __ z_xc(0, 0, addr, 0, addr); 843 844 __ bind(doXC); 845 // start addr of card table range = base + addr 846 // end addr of card table range = base + addr + count 847 __ add2reg_with_index(addr, 0, Z_R1, addr); 848 849 if (VM_Version::has_ExecuteExtensions()) { 850 __ z_exrl(count, XC_template); // Execute XC with var. len. 851 } else { 852 __ z_larl(Z_R1, XC_template); 853 __ z_ex(count, 0, Z_R0, Z_R1); // Execute XC with var. len. 854 } 855 if (!branchToEnd) { 856 __ z_br(Z_R14); 857 } 858 859 __ bind(done); 860 } 861 break; 862 case BarrierSet::ModRef: 863 if (!branchToEnd) { __ z_br(Z_R14); } 864 break; 865 default: 866 ShouldNotReachHere(); 867 } 868 } 869 870 871 // This is to test that the count register contains a positive int value. 872 // Required because C2 does not respect int to long conversion for stub calls. 873 void assert_positive_int(Register count) { 874 #ifdef ASSERT 875 __ z_srag(Z_R0, count, 31); // Just leave the sign (must be zero) in Z_R0. 876 __ asm_assert_eq("missing zero extend", 0xAFFE); 877 #endif 878 } 879 880 // Generate overlap test for array copy stubs. 881 // If no actual overlap is detected, control is transferred to the 882 // "normal" copy stub (entry address passed in disjoint_copy_target). 883 // Otherwise, execution continues with the code generated by the 884 // caller of array_overlap_test. 885 // 886 // Input: 887 // Z_ARG1 - from 888 // Z_ARG2 - to 889 // Z_ARG3 - element count 890 void array_overlap_test(address disjoint_copy_target, int log2_elem_size) { 891 __ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh, 892 disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); 893 894 Register index = Z_ARG3; 895 if (log2_elem_size > 0) { 896 __ z_sllg(Z_R1, Z_ARG3, log2_elem_size); // byte count 897 index = Z_R1; 898 } 899 __ add2reg_with_index(Z_R1, 0, index, Z_ARG1); // First byte after "from" range. 900 901 __ MacroAssembler::compare_and_branch_optimized(Z_R1, Z_ARG2, Assembler::bcondNotHigh, 902 disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); 903 904 // Destructive overlap: let caller generate code for that. 905 } 906 907 // Generate stub for disjoint array copy. If "aligned" is true, the 908 // "from" and "to" addresses are assumed to be heapword aligned. 909 // 910 // Arguments for generated stub: 911 // from: Z_ARG1 912 // to: Z_ARG2 913 // count: Z_ARG3 treated as signed 914 void generate_disjoint_copy(bool aligned, int element_size, 915 bool branchToEnd, 916 bool restoreArgs) { 917 // This is the zarch specific stub generator for general array copy tasks. 918 // It has the following prereqs and features: 919 // 920 // - No destructive overlap allowed (else unpredictable results). 921 // - Destructive overlap does not exist if the leftmost byte of the target 922 // does not coincide with any of the source bytes (except the leftmost). 923 // 924 // Register usage upon entry: 925 // Z_ARG1 == Z_R2 : address of source array 926 // Z_ARG2 == Z_R3 : address of target array 927 // Z_ARG3 == Z_R4 : length of operands (# of elements on entry) 928 // 929 // Register usage within the generator: 930 // - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len). 931 // Used as pair register operand in complex moves, scratch registers anyway. 932 // - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg). 933 // Same as R0/R1, but no scratch register. 934 // - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine, 935 // but they might get temporarily overwritten. 936 937 Register save_reg = Z_ARG4; // (= Z_R5), holds original target operand address for restore. 938 939 { 940 Register llen_reg = Z_R1; // Holds left operand len (odd reg). 941 Register laddr_reg = Z_R0; // Holds left operand addr (even reg), overlaps with data_reg. 942 Register rlen_reg = Z_R5; // Holds right operand len (odd reg), overlaps with save_reg. 943 Register raddr_reg = Z_R4; // Holds right operand addr (even reg), overlaps with len_reg. 944 945 Register data_reg = Z_R0; // Holds copied data chunk in alignment process and copy loop. 946 Register len_reg = Z_ARG3; // Holds operand len (#elements at entry, #bytes shortly after). 947 Register dst_reg = Z_ARG2; // Holds left (target) operand addr. 948 Register src_reg = Z_ARG1; // Holds right (source) operand addr. 949 950 Label doMVCLOOP, doMVCLOOPcount, doMVCLOOPiterate; 951 Label doMVCUnrolled; 952 NearLabel doMVC, doMVCgeneral, done; 953 Label MVC_template; 954 address pcMVCblock_b, pcMVCblock_e; 955 956 bool usedMVCLE = true; 957 bool usedMVCLOOP = true; 958 bool usedMVCUnrolled = false; 959 bool usedMVC = false; 960 bool usedMVCgeneral = false; 961 962 int stride; 963 Register stride_reg; 964 Register ix_reg; 965 966 assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2"); 967 unsigned int log2_size = exact_log2(element_size); 968 969 switch (element_size) { 970 case 1: BLOCK_COMMENT("ARRAYCOPY DISJOINT byte {"); break; 971 case 2: BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break; 972 case 4: BLOCK_COMMENT("ARRAYCOPY DISJOINT int {"); break; 973 case 8: BLOCK_COMMENT("ARRAYCOPY DISJOINT long {"); break; 974 default: BLOCK_COMMENT("ARRAYCOPY DISJOINT {"); break; 975 } 976 977 assert_positive_int(len_reg); 978 979 BLOCK_COMMENT("preparation {"); 980 981 // No copying if len <= 0. 982 if (branchToEnd) { 983 __ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done); 984 } else { 985 if (VM_Version::has_CompareBranch()) { 986 __ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14); 987 } else { 988 __ z_ltgr(len_reg, len_reg); 989 __ z_bcr(Assembler::bcondNotPositive, Z_R14); 990 } 991 } 992 993 // Prefetch just one cache line. Speculative opt for short arrays. 994 // Do not use Z_R1 in prefetch. Is undefined here. 995 if (VM_Version::has_Prefetch()) { 996 __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. 997 __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. 998 } 999 1000 BLOCK_COMMENT("} preparation"); 1001 1002 // Save args only if really needed. 1003 // Keep len test local to branch. Is generated only once. 1004 1005 BLOCK_COMMENT("mode selection {"); 1006 1007 // Special handling for arrays with only a few elements. 1008 // Nothing fancy: just an executed MVC. 1009 if (log2_size > 0) { 1010 __ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1. 1011 } 1012 if (element_size != 8) { 1013 __ z_cghi(len_reg, 256/element_size); 1014 __ z_brnh(doMVC); 1015 usedMVC = true; 1016 } 1017 if (element_size == 8) { // Long and oop arrays are always aligned. 1018 __ z_cghi(len_reg, 256/element_size); 1019 __ z_brnh(doMVCUnrolled); 1020 usedMVCUnrolled = true; 1021 } 1022 1023 // Prefetch another cache line. We, for sure, have more than one line to copy. 1024 if (VM_Version::has_Prefetch()) { 1025 __ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access. 1026 __ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access. 1027 } 1028 1029 if (restoreArgs) { 1030 // Remember entry value of ARG2 to restore all arguments later from that knowledge. 1031 __ z_lgr(save_reg, dst_reg); 1032 } 1033 1034 __ z_cghi(len_reg, 4096/element_size); 1035 if (log2_size == 0) { 1036 __ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes 1037 } 1038 __ z_brnh(doMVCLOOP); 1039 1040 // Fall through to MVCLE case. 1041 1042 BLOCK_COMMENT("} mode selection"); 1043 1044 // MVCLE: for long arrays 1045 // DW aligned: Best performance for sizes > 4kBytes. 1046 // unaligned: Least complex for sizes > 256 bytes. 1047 if (usedMVCLE) { 1048 BLOCK_COMMENT("mode MVCLE {"); 1049 1050 // Setup registers for mvcle. 1051 //__ z_lgr(llen_reg, len_reg);// r1 <- r4 #bytes already in Z_R1, aka llen_reg. 1052 __ z_lgr(laddr_reg, dst_reg); // r0 <- r3 1053 __ z_lgr(raddr_reg, src_reg); // r4 <- r2 1054 __ z_lgr(rlen_reg, llen_reg); // r5 <- r1 1055 1056 __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0); // special: bypass cache 1057 // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache. 1058 // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0); 1059 1060 if (restoreArgs) { 1061 // MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs. 1062 // Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required. 1063 // Len_reg (Z_ARG3) is destroyed and must be restored. 1064 __ z_slgr(laddr_reg, dst_reg); // copied #bytes 1065 if (log2_size > 0) { 1066 __ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements. 1067 } else { 1068 __ z_lgr(Z_ARG3, laddr_reg); 1069 } 1070 } 1071 if (branchToEnd) { 1072 __ z_bru(done); 1073 } else { 1074 __ z_br(Z_R14); 1075 } 1076 BLOCK_COMMENT("} mode MVCLE"); 1077 } 1078 // No fallthru possible here. 1079 1080 // MVCUnrolled: for short, aligned arrays. 1081 1082 if (usedMVCUnrolled) { 1083 BLOCK_COMMENT("mode MVC unrolled {"); 1084 stride = 8; 1085 1086 // Generate unrolled MVC instructions. 1087 for (int ii = 32; ii > 1; ii--) { 1088 __ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy 1089 if (branchToEnd) { 1090 __ z_bru(done); 1091 } else { 1092 __ z_br(Z_R14); 1093 } 1094 } 1095 1096 pcMVCblock_b = __ pc(); 1097 __ z_mvc(0, 1 * stride-1, dst_reg, 0, src_reg); // 8 byte copy 1098 if (branchToEnd) { 1099 __ z_bru(done); 1100 } else { 1101 __ z_br(Z_R14); 1102 } 1103 1104 pcMVCblock_e = __ pc(); 1105 Label MVC_ListEnd; 1106 __ bind(MVC_ListEnd); 1107 1108 // This is an absolute fast path: 1109 // - Array len in bytes must be not greater than 256. 1110 // - Array len in bytes must be an integer mult of DW 1111 // to save expensive handling of trailing bytes. 1112 // - Argument restore is not done, 1113 // i.e. previous code must not alter arguments (this code doesn't either). 1114 1115 __ bind(doMVCUnrolled); 1116 1117 // Avoid mul, prefer shift where possible. 1118 // Combine shift right (for #DW) with shift left (for block size). 1119 // Set CC for zero test below (asm_assert). 1120 // Note: #bytes comes in Z_R1, #DW in len_reg. 1121 unsigned int MVCblocksize = pcMVCblock_e - pcMVCblock_b; 1122 unsigned int logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning). 1123 1124 if (log2_size > 0) { // Len was scaled into Z_R1. 1125 switch (MVCblocksize) { 1126 1127 case 8: logMVCblocksize = 3; 1128 __ z_ltgr(Z_R0, Z_R1); // #bytes is index 1129 break; // reasonable size, use shift 1130 1131 case 16: logMVCblocksize = 4; 1132 __ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size); 1133 break; // reasonable size, use shift 1134 1135 default: logMVCblocksize = 0; 1136 __ z_ltgr(Z_R0, len_reg); // #DW for mul 1137 break; // all other sizes: use mul 1138 } 1139 } else { 1140 guarantee(log2_size, "doMVCUnrolled: only for DW entities"); 1141 } 1142 1143 // This test (and branch) is redundant. Previous code makes sure that 1144 // - element count > 0 1145 // - element size == 8. 1146 // Thus, len reg should never be zero here. We insert an asm_assert() here, 1147 // just to double-check and to be on the safe side. 1148 __ asm_assert(false, "zero len cannot occur", 99); 1149 1150 __ z_larl(Z_R1, MVC_ListEnd); // Get addr of last instr block. 1151 // Avoid mul, prefer shift where possible. 1152 if (logMVCblocksize == 0) { 1153 __ z_mghi(Z_R0, MVCblocksize); 1154 } 1155 __ z_slgr(Z_R1, Z_R0); 1156 __ z_br(Z_R1); 1157 BLOCK_COMMENT("} mode MVC unrolled"); 1158 } 1159 // No fallthru possible here. 1160 1161 // MVC execute template 1162 // Must always generate. Usage may be switched on below. 1163 // There is no suitable place after here to put the template. 1164 __ bind(MVC_template); 1165 __ z_mvc(0,0,dst_reg,0,src_reg); // Instr template, never exec directly! 1166 1167 1168 // MVC Loop: for medium-sized arrays 1169 1170 // Only for DW aligned arrays (src and dst). 1171 // #bytes to copy must be at least 256!!! 1172 // Non-aligned cases handled separately. 1173 stride = 256; 1174 stride_reg = Z_R1; // Holds #bytes when control arrives here. 1175 ix_reg = Z_ARG3; // Alias for len_reg. 1176 1177 1178 if (usedMVCLOOP) { 1179 BLOCK_COMMENT("mode MVC loop {"); 1180 __ bind(doMVCLOOP); 1181 1182 __ z_lcgr(ix_reg, Z_R1); // Ix runs from -(n-2)*stride to 1*stride (inclusive). 1183 __ z_llill(stride_reg, stride); 1184 __ add2reg(ix_reg, 2*stride); // Thus: increment ix by 2*stride. 1185 1186 __ bind(doMVCLOOPiterate); 1187 __ z_mvc(0, stride-1, dst_reg, 0, src_reg); 1188 __ add2reg(dst_reg, stride); 1189 __ add2reg(src_reg, stride); 1190 __ bind(doMVCLOOPcount); 1191 __ z_brxlg(ix_reg, stride_reg, doMVCLOOPiterate); 1192 1193 // Don 't use add2reg() here, since we must set the condition code! 1194 __ z_aghi(ix_reg, -2*stride); // Compensate incr from above: zero diff means "all copied". 1195 1196 if (restoreArgs) { 1197 __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. 1198 __ z_brnz(doMVCgeneral); // We're not done yet, ix_reg is not zero. 1199 1200 // ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg. 1201 __ z_slgr(dst_reg, save_reg); // copied #bytes 1202 __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored) 1203 if (log2_size) { 1204 __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3. 1205 } else { 1206 __ z_lgr(Z_ARG3, dst_reg); 1207 } 1208 __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. 1209 1210 if (branchToEnd) { 1211 __ z_bru(done); 1212 } else { 1213 __ z_br(Z_R14); 1214 } 1215 1216 } else { 1217 if (branchToEnd) { 1218 __ z_brz(done); // CC set by aghi instr. 1219 } else { 1220 __ z_bcr(Assembler::bcondZero, Z_R14); // We're all done if zero. 1221 } 1222 1223 __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. 1224 // __ z_bru(doMVCgeneral); // fallthru 1225 } 1226 usedMVCgeneral = true; 1227 BLOCK_COMMENT("} mode MVC loop"); 1228 } 1229 // Fallthru to doMVCgeneral 1230 1231 // MVCgeneral: for short, unaligned arrays, after other copy operations 1232 1233 // Somewhat expensive due to use of EX instruction, but simple. 1234 if (usedMVCgeneral) { 1235 BLOCK_COMMENT("mode MVC general {"); 1236 __ bind(doMVCgeneral); 1237 1238 __ add2reg(len_reg, -1, Z_R1); // Get #bytes-1 for EXECUTE. 1239 if (VM_Version::has_ExecuteExtensions()) { 1240 __ z_exrl(len_reg, MVC_template); // Execute MVC with variable length. 1241 } else { 1242 __ z_larl(Z_R1, MVC_template); // Get addr of instr template. 1243 __ z_ex(len_reg, 0, Z_R0, Z_R1); // Execute MVC with variable length. 1244 } // penalty: 9 ticks 1245 1246 if (restoreArgs) { 1247 // ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg 1248 __ z_slgr(dst_reg, save_reg); // Copied #bytes without the "doMVCgeneral" chunk 1249 __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk 1250 __ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet. 1251 if (log2_size) { 1252 __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3 1253 } else { 1254 __ z_lgr(Z_ARG3, dst_reg); 1255 } 1256 __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. 1257 } 1258 1259 if (usedMVC) { 1260 if (branchToEnd) { 1261 __ z_bru(done); 1262 } else { 1263 __ z_br(Z_R14); 1264 } 1265 } else { 1266 if (!branchToEnd) __ z_br(Z_R14); 1267 } 1268 BLOCK_COMMENT("} mode MVC general"); 1269 } 1270 // Fallthru possible if following block not generated. 1271 1272 // MVC: for short, unaligned arrays 1273 1274 // Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks. 1275 // Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4. 1276 if (usedMVC) { 1277 BLOCK_COMMENT("mode MVC {"); 1278 __ bind(doMVC); 1279 1280 // get #bytes-1 for EXECUTE 1281 if (log2_size) { 1282 __ add2reg(Z_R1, -1); // Length was scaled into Z_R1. 1283 } else { 1284 __ add2reg(Z_R1, -1, len_reg); // Length was not scaled. 1285 } 1286 1287 if (VM_Version::has_ExecuteExtensions()) { 1288 __ z_exrl(Z_R1, MVC_template); // Execute MVC with variable length. 1289 } else { 1290 __ z_lgr(Z_R0, Z_R5); // Save ARG4, may be unnecessary. 1291 __ z_larl(Z_R5, MVC_template); // Get addr of instr template. 1292 __ z_ex(Z_R1, 0, Z_R0, Z_R5); // Execute MVC with variable length. 1293 __ z_lgr(Z_R5, Z_R0); // Restore ARG4, may be unnecessary. 1294 } 1295 1296 if (!branchToEnd) { 1297 __ z_br(Z_R14); 1298 } 1299 BLOCK_COMMENT("} mode MVC"); 1300 } 1301 1302 __ bind(done); 1303 1304 switch (element_size) { 1305 case 1: BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break; 1306 case 2: BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break; 1307 case 4: BLOCK_COMMENT("} ARRAYCOPY DISJOINT int "); break; 1308 case 8: BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break; 1309 default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT "); break; 1310 } 1311 } 1312 } 1313 1314 // Generate stub for conjoint array copy. If "aligned" is true, the 1315 // "from" and "to" addresses are assumed to be heapword aligned. 1316 // 1317 // Arguments for generated stub: 1318 // from: Z_ARG1 1319 // to: Z_ARG2 1320 // count: Z_ARG3 treated as signed 1321 void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) { 1322 1323 // This is the zarch specific stub generator for general array copy tasks. 1324 // It has the following prereqs and features: 1325 // 1326 // - Destructive overlap exists and is handled by reverse copy. 1327 // - Destructive overlap exists if the leftmost byte of the target 1328 // does coincide with any of the source bytes (except the leftmost). 1329 // - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride) 1330 // - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine. 1331 // - Z_ARG3 is USED but preserved by the stub routine. 1332 // - Z_ARG4 is used as index register and is thus KILLed. 1333 // 1334 { 1335 Register stride_reg = Z_R1; // Stride & compare value in loop (negative element_size). 1336 Register data_reg = Z_R0; // Holds value of currently processed element. 1337 Register ix_reg = Z_ARG4; // Holds byte index of currently processed element. 1338 Register len_reg = Z_ARG3; // Holds length (in #elements) of arrays. 1339 Register dst_reg = Z_ARG2; // Holds left operand addr. 1340 Register src_reg = Z_ARG1; // Holds right operand addr. 1341 1342 assert(256%element_size == 0, "Element size must be power of 2."); 1343 assert(element_size <= 8, "Can't handle more than DW units."); 1344 1345 switch (element_size) { 1346 case 1: BLOCK_COMMENT("ARRAYCOPY CONJOINT byte {"); break; 1347 case 2: BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break; 1348 case 4: BLOCK_COMMENT("ARRAYCOPY CONJOINT int {"); break; 1349 case 8: BLOCK_COMMENT("ARRAYCOPY CONJOINT long {"); break; 1350 default: BLOCK_COMMENT("ARRAYCOPY CONJOINT {"); break; 1351 } 1352 1353 assert_positive_int(len_reg); 1354 1355 if (VM_Version::has_Prefetch()) { 1356 __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. 1357 __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. 1358 } 1359 1360 unsigned int log2_size = exact_log2(element_size); 1361 if (log2_size) { 1362 __ z_sllg(ix_reg, len_reg, log2_size); 1363 } else { 1364 __ z_lgr(ix_reg, len_reg); 1365 } 1366 1367 // Optimize reverse copy loop. 1368 // Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks. 1369 // Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic. 1370 // Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length. 1371 1372 Label countLoop1; 1373 Label copyLoop1; 1374 Label skipBY; 1375 Label skipHW; 1376 int stride = -8; 1377 1378 __ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop. 1379 1380 if (element_size == 8) // Nothing to do here. 1381 __ z_bru(countLoop1); 1382 else { // Do not generate dead code. 1383 __ z_tmll(ix_reg, 7); // Check the "odd" bits. 1384 __ z_bre(countLoop1); // There are none, very good! 1385 } 1386 1387 if (log2_size == 0) { // Handle leftover Byte. 1388 __ z_tmll(ix_reg, 1); 1389 __ z_bre(skipBY); 1390 __ z_lb(data_reg, -1, ix_reg, src_reg); 1391 __ z_stcy(data_reg, -1, ix_reg, dst_reg); 1392 __ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI. 1393 __ bind(skipBY); 1394 // fallthru 1395 } 1396 if (log2_size <= 1) { // Handle leftover HW. 1397 __ z_tmll(ix_reg, 2); 1398 __ z_bre(skipHW); 1399 __ z_lhy(data_reg, -2, ix_reg, src_reg); 1400 __ z_sthy(data_reg, -2, ix_reg, dst_reg); 1401 __ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI. 1402 __ bind(skipHW); 1403 __ z_tmll(ix_reg, 4); 1404 __ z_bre(countLoop1); 1405 // fallthru 1406 } 1407 if (log2_size <= 2) { // There are just 4 bytes (left) that need to be copied. 1408 __ z_ly(data_reg, -4, ix_reg, src_reg); 1409 __ z_sty(data_reg, -4, ix_reg, dst_reg); 1410 __ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI. 1411 __ z_bru(countLoop1); 1412 } 1413 1414 // Control can never get to here. Never! Never ever! 1415 __ z_illtrap(0x99); 1416 __ bind(copyLoop1); 1417 __ z_lg(data_reg, 0, ix_reg, src_reg); 1418 __ z_stg(data_reg, 0, ix_reg, dst_reg); 1419 __ bind(countLoop1); 1420 __ z_brxhg(ix_reg, stride_reg, copyLoop1); 1421 1422 if (!branchToEnd) 1423 __ z_br(Z_R14); 1424 1425 switch (element_size) { 1426 case 1: BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break; 1427 case 2: BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break; 1428 case 4: BLOCK_COMMENT("} ARRAYCOPY CONJOINT int "); break; 1429 case 8: BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break; 1430 default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT "); break; 1431 } 1432 } 1433 } 1434 1435 // Generate stub for disjoint byte copy. If "aligned" is true, the 1436 // "from" and "to" addresses are assumed to be heapword aligned. 1437 address generate_disjoint_byte_copy(bool aligned, const char * name) { 1438 StubCodeMark mark(this, "StubRoutines", name); 1439 1440 // This is the zarch specific stub generator for byte array copy. 1441 // Refer to generate_disjoint_copy for a list of prereqs and features: 1442 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1443 generate_disjoint_copy(aligned, 1, false, false); 1444 return __ addr_at(start_off); 1445 } 1446 1447 1448 address generate_disjoint_short_copy(bool aligned, const char * name) { 1449 StubCodeMark mark(this, "StubRoutines", name); 1450 // This is the zarch specific stub generator for short array copy. 1451 // Refer to generate_disjoint_copy for a list of prereqs and features: 1452 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1453 generate_disjoint_copy(aligned, 2, false, false); 1454 return __ addr_at(start_off); 1455 } 1456 1457 1458 address generate_disjoint_int_copy(bool aligned, const char * name) { 1459 StubCodeMark mark(this, "StubRoutines", name); 1460 // This is the zarch specific stub generator for int array copy. 1461 // Refer to generate_disjoint_copy for a list of prereqs and features: 1462 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1463 generate_disjoint_copy(aligned, 4, false, false); 1464 return __ addr_at(start_off); 1465 } 1466 1467 1468 address generate_disjoint_long_copy(bool aligned, const char * name) { 1469 StubCodeMark mark(this, "StubRoutines", name); 1470 // This is the zarch specific stub generator for long array copy. 1471 // Refer to generate_disjoint_copy for a list of prereqs and features: 1472 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1473 generate_disjoint_copy(aligned, 8, false, false); 1474 return __ addr_at(start_off); 1475 } 1476 1477 1478 address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { 1479 StubCodeMark mark(this, "StubRoutines", name); 1480 // This is the zarch specific stub generator for oop array copy. 1481 // Refer to generate_disjoint_copy for a list of prereqs and features. 1482 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1483 unsigned int size = UseCompressedOops ? 4 : 8; 1484 1485 gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized); 1486 1487 generate_disjoint_copy(aligned, size, true, true); 1488 1489 gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false); 1490 1491 return __ addr_at(start_off); 1492 } 1493 1494 1495 address generate_conjoint_byte_copy(bool aligned, const char * name) { 1496 StubCodeMark mark(this, "StubRoutines", name); 1497 // This is the zarch specific stub generator for overlapping byte array copy. 1498 // Refer to generate_conjoint_copy for a list of prereqs and features: 1499 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1500 address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy() 1501 : StubRoutines::jbyte_disjoint_arraycopy(); 1502 1503 array_overlap_test(nooverlap_target, 0); // Branch away to nooverlap_target if disjoint. 1504 generate_conjoint_copy(aligned, 1, false); 1505 1506 return __ addr_at(start_off); 1507 } 1508 1509 1510 address generate_conjoint_short_copy(bool aligned, const char * name) { 1511 StubCodeMark mark(this, "StubRoutines", name); 1512 // This is the zarch specific stub generator for overlapping short array copy. 1513 // Refer to generate_conjoint_copy for a list of prereqs and features: 1514 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1515 address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy() 1516 : StubRoutines::jshort_disjoint_arraycopy(); 1517 1518 array_overlap_test(nooverlap_target, 1); // Branch away to nooverlap_target if disjoint. 1519 generate_conjoint_copy(aligned, 2, false); 1520 1521 return __ addr_at(start_off); 1522 } 1523 1524 address generate_conjoint_int_copy(bool aligned, const char * name) { 1525 StubCodeMark mark(this, "StubRoutines", name); 1526 // This is the zarch specific stub generator for overlapping int array copy. 1527 // Refer to generate_conjoint_copy for a list of prereqs and features: 1528 1529 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1530 address nooverlap_target = aligned ? StubRoutines::arrayof_jint_disjoint_arraycopy() 1531 : StubRoutines::jint_disjoint_arraycopy(); 1532 1533 array_overlap_test(nooverlap_target, 2); // Branch away to nooverlap_target if disjoint. 1534 generate_conjoint_copy(aligned, 4, false); 1535 1536 return __ addr_at(start_off); 1537 } 1538 1539 address generate_conjoint_long_copy(bool aligned, const char * name) { 1540 StubCodeMark mark(this, "StubRoutines", name); 1541 // This is the zarch specific stub generator for overlapping long array copy. 1542 // Refer to generate_conjoint_copy for a list of prereqs and features: 1543 1544 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1545 address nooverlap_target = aligned ? StubRoutines::arrayof_jlong_disjoint_arraycopy() 1546 : StubRoutines::jlong_disjoint_arraycopy(); 1547 1548 array_overlap_test(nooverlap_target, 3); // Branch away to nooverlap_target if disjoint. 1549 generate_conjoint_copy(aligned, 8, false); 1550 1551 return __ addr_at(start_off); 1552 } 1553 1554 address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { 1555 StubCodeMark mark(this, "StubRoutines", name); 1556 // This is the zarch specific stub generator for overlapping oop array copy. 1557 // Refer to generate_conjoint_copy for a list of prereqs and features. 1558 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1559 unsigned int size = UseCompressedOops ? 4 : 8; 1560 unsigned int shift = UseCompressedOops ? 2 : 3; 1561 1562 address nooverlap_target = aligned ? StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized) 1563 : StubRoutines::oop_disjoint_arraycopy(dest_uninitialized); 1564 1565 // Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier. 1566 array_overlap_test(nooverlap_target, shift); // Branch away to nooverlap_target if disjoint. 1567 1568 gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized); 1569 1570 generate_conjoint_copy(aligned, size, true); // Must preserve ARG2, ARG3. 1571 1572 gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false); 1573 1574 return __ addr_at(start_off); 1575 } 1576 1577 1578 void generate_arraycopy_stubs() { 1579 1580 // Note: the disjoint stubs must be generated first, some of 1581 // the conjoint stubs use them. 1582 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (false, "jbyte_disjoint_arraycopy"); 1583 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); 1584 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy (false, "jint_disjoint_arraycopy"); 1585 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy (false, "jlong_disjoint_arraycopy"); 1586 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy", false); 1587 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy_uninit", true); 1588 1589 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (true, "arrayof_jbyte_disjoint_arraycopy"); 1590 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); 1591 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy (true, "arrayof_jint_disjoint_arraycopy"); 1592 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy (true, "arrayof_jlong_disjoint_arraycopy"); 1593 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy", false); 1594 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy_uninit", true); 1595 1596 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy (false, "jbyte_arraycopy"); 1597 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); 1598 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy (false, "jint_arraycopy"); 1599 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy (false, "jlong_arraycopy"); 1600 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy (false, "oop_arraycopy", false); 1601 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy (false, "oop_arraycopy_uninit", true); 1602 1603 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy (true, "arrayof_jbyte_arraycopy"); 1604 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); 1605 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy (true, "arrayof_jint_arraycopy"); 1606 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy (true, "arrayof_jlong_arraycopy"); 1607 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy", false); 1608 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy_uninit", true); 1609 } 1610 1611 void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) { 1612 1613 // safefetch signatures: 1614 // int SafeFetch32(int* adr, int errValue); 1615 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); 1616 // 1617 // arguments: 1618 // Z_ARG1 = adr 1619 // Z_ARG2 = errValue 1620 // 1621 // result: 1622 // Z_RET = *adr or errValue 1623 1624 StubCodeMark mark(this, "StubRoutines", name); 1625 1626 // entry point 1627 // Load *adr into Z_ARG2, may fault. 1628 *entry = *fault_pc = __ pc(); 1629 switch (size) { 1630 case 4: 1631 // Sign extended int32_t. 1632 __ z_lgf(Z_ARG2, 0, Z_ARG1); 1633 break; 1634 case 8: 1635 // int64_t 1636 __ z_lg(Z_ARG2, 0, Z_ARG1); 1637 break; 1638 default: 1639 ShouldNotReachHere(); 1640 } 1641 1642 // Return errValue or *adr. 1643 *continuation_pc = __ pc(); 1644 __ z_lgr(Z_RET, Z_ARG2); 1645 __ z_br(Z_R14); 1646 1647 } 1648 1649 // Call interface for AES_encryptBlock, AES_decryptBlock stubs. 1650 // 1651 // Z_ARG1 - source data block. Ptr to leftmost byte to be processed. 1652 // Z_ARG2 - destination data block. Ptr to leftmost byte to be stored. 1653 // For in-place encryption/decryption, ARG1 and ARG2 can point 1654 // to the same piece of storage. 1655 // Z_ARG3 - Crypto key address (expanded key). The first n bits of 1656 // the expanded key constitute the original AES-<n> key (see below). 1657 // 1658 // Z_RET - return value. First unprocessed byte offset in src buffer. 1659 // 1660 // Some remarks: 1661 // The crypto key, as passed from the caller to these encryption stubs, 1662 // is a so-called expanded key. It is derived from the original key 1663 // by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule 1664 // With the expanded key, the cipher/decipher task is decomposed in 1665 // multiple, less complex steps, called rounds. Sun SPARC and Intel 1666 // processors obviously implement support for those less complex steps. 1667 // z/Architecture provides instructions for full cipher/decipher complexity. 1668 // Therefore, we need the original, not the expanded key here. 1669 // Luckily, the first n bits of an AES-<n> expanded key are formed 1670 // by the original key itself. That takes us out of trouble. :-) 1671 // The key length (in bytes) relation is as follows: 1672 // original expanded rounds key bit keylen 1673 // key bytes key bytes length in words 1674 // 16 176 11 128 44 1675 // 24 208 13 192 52 1676 // 32 240 15 256 60 1677 // 1678 // The crypto instructions used in the AES* stubs have some specific register requirements. 1679 // Z_R0 holds the crypto function code. Please refer to the KM/KMC instruction 1680 // description in the "z/Architecture Principles of Operation" manual for details. 1681 // Z_R1 holds the parameter block address. The parameter block contains the cryptographic key 1682 // (KM instruction) and the chaining value (KMC instruction). 1683 // dst must designate an even-numbered register, holding the address of the output message. 1684 // src must designate an even/odd register pair, holding the address/length of the original message 1685 1686 // Helper function which generates code to 1687 // - load the function code in register fCode (== Z_R0). 1688 // - load the data block length (depends on cipher function) into register srclen if requested. 1689 // - is_decipher switches between cipher/decipher function codes 1690 // - set_len requests (if true) loading the data block length in register srclen 1691 void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) { 1692 1693 BLOCK_COMMENT("Set fCode {"); { 1694 Label fCode_set; 1695 int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; 1696 bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) 1697 && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); 1698 // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256. 1699 __ z_cghi(keylen, 52); // Check only once at the beginning. keylen and fCode may share the same register. 1700 1701 __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); 1702 if (!identical_dataBlk_len) { 1703 __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); 1704 } 1705 __ z_brl(fCode_set); // keyLen < 52: AES128 1706 1707 __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode); 1708 if (!identical_dataBlk_len) { 1709 __ z_lghi(srclen, VM_Version::Cipher::_AES192_dataBlk); 1710 } 1711 __ z_bre(fCode_set); // keyLen == 52: AES192 1712 1713 __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); 1714 if (!identical_dataBlk_len) { 1715 __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk); 1716 } 1717 // __ z_brh(fCode_set); // keyLen < 52: AES128 // fallthru 1718 1719 __ bind(fCode_set); 1720 if (identical_dataBlk_len) { 1721 __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); 1722 } 1723 } 1724 BLOCK_COMMENT("} Set fCode"); 1725 } 1726 1727 // Push a parameter block for the cipher/decipher instruction on the stack. 1728 // Layout of the additional stack space allocated for AES_cipherBlockChaining: 1729 // 1730 // | | 1731 // +--------+ <-- SP before expansion 1732 // | | 1733 // : : alignment loss, 0..(AES_parmBlk_align-8) bytes 1734 // | | 1735 // +--------+ 1736 // | | 1737 // : : space for parameter block, size VM_Version::Cipher::_AES*_parmBlk_C 1738 // | | 1739 // +--------+ <-- parmBlk, octoword-aligned, start of parameter block 1740 // | | 1741 // : : additional stack space for spills etc., size AES_parmBlk_addspace, DW @ Z_SP not usable!!! 1742 // | | 1743 // +--------+ <-- Z_SP after expansion 1744 1745 void generate_push_Block(int dataBlk_len, int parmBlk_len, int crypto_fCode, 1746 Register parmBlk, Register keylen, Register fCode, Register cv, Register key) { 1747 const int AES_parmBlk_align = 32; // octoword alignment. 1748 const int AES_parmBlk_addspace = 24; // Must be sufficiently large to hold all spilled registers 1749 // (currently 2) PLUS 1 DW for the frame pointer. 1750 1751 const int cv_len = dataBlk_len; 1752 const int key_len = parmBlk_len - cv_len; 1753 // This len must be known at JIT compile time. Only then are we able to recalc the SP before resize. 1754 // We buy this knowledge by wasting some (up to AES_parmBlk_align) bytes of stack space. 1755 const int resize_len = cv_len + key_len + AES_parmBlk_align + AES_parmBlk_addspace; 1756 1757 // Use parmBlk as temp reg here to hold the frame pointer. 1758 __ resize_frame(-resize_len, parmBlk, true); 1759 1760 // calculate parmBlk address from updated (resized) SP. 1761 __ add2reg(parmBlk, resize_len - (cv_len + key_len), Z_SP); 1762 __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. 1763 1764 // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace+8, parmBlk). 1765 __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. 1766 1767 // calculate (SP before resize) from updated SP. 1768 __ add2reg(keylen, resize_len, Z_SP); // keylen holds prev SP for now. 1769 __ z_stg(keylen, -16, parmBlk); // Spill prev SP for easy revert. 1770 1771 __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. 1772 __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. 1773 __ z_lghi(fCode, crypto_fCode); 1774 } 1775 1776 // NOTE: 1777 // Before returning, the stub has to copy the chaining value from 1778 // the parmBlk, where it was updated by the crypto instruction, back 1779 // to the chaining value array the address of which was passed in the cv argument. 1780 // As all the available registers are used and modified by KMC, we need to save 1781 // the key length across the KMC instruction. We do so by spilling it to the stack, 1782 // just preceding the parmBlk (at (parmBlk - 8)). 1783 void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) { 1784 int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; 1785 Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; 1786 1787 BLOCK_COMMENT("push parmBlk {"); 1788 if (VM_Version::has_Crypto_AES() ) { __ z_cghi(keylen, 52); } 1789 if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); } // keyLen < 52: AES128 1790 if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); } // keyLen == 52: AES192 1791 if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); } // keyLen > 52: AES256 1792 1793 // Security net: requested AES function not available on this CPU. 1794 // NOTE: 1795 // As of now (March 2015), this safety net is not required. JCE policy files limit the 1796 // cryptographic strength of the keys used to 128 bit. If we have AES hardware support 1797 // at all, we have at least AES-128. 1798 __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0); 1799 1800 if (VM_Version::has_Crypto_AES256()) { 1801 __ bind(parmBlk_256); 1802 generate_push_Block(VM_Version::Cipher::_AES256_dataBlk, 1803 VM_Version::Cipher::_AES256_parmBlk_C, 1804 VM_Version::Cipher::_AES256 + mode, 1805 parmBlk, keylen, fCode, cv, key); 1806 if (VM_Version::has_Crypto_AES128() || VM_Version::has_Crypto_AES192()) { 1807 __ z_bru(parmBlk_set); // Fallthru otherwise. 1808 } 1809 } 1810 1811 if (VM_Version::has_Crypto_AES192()) { 1812 __ bind(parmBlk_192); 1813 generate_push_Block(VM_Version::Cipher::_AES192_dataBlk, 1814 VM_Version::Cipher::_AES192_parmBlk_C, 1815 VM_Version::Cipher::_AES192 + mode, 1816 parmBlk, keylen, fCode, cv, key); 1817 if (VM_Version::has_Crypto_AES128()) { 1818 __ z_bru(parmBlk_set); // Fallthru otherwise. 1819 } 1820 } 1821 1822 if (VM_Version::has_Crypto_AES128()) { 1823 __ bind(parmBlk_128); 1824 generate_push_Block(VM_Version::Cipher::_AES128_dataBlk, 1825 VM_Version::Cipher::_AES128_parmBlk_C, 1826 VM_Version::Cipher::_AES128 + mode, 1827 parmBlk, keylen, fCode, cv, key); 1828 // Fallthru 1829 } 1830 1831 __ bind(parmBlk_set); 1832 BLOCK_COMMENT("} push parmBlk"); 1833 } 1834 1835 // Pop a parameter block from the stack. The chaining value portion of the parameter block 1836 // is copied back to the cv array as it is needed for subsequent cipher steps. 1837 // The keylen value as well as the original SP (before resizing) was pushed to the stack 1838 // when pushing the parameter block. 1839 void generate_pop_parmBlk(Register keylen, Register parmBlk, Register key, Register cv) { 1840 1841 BLOCK_COMMENT("pop parmBlk {"); 1842 bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) && 1843 (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); 1844 if (identical_dataBlk_len) { 1845 int cv_len = VM_Version::Cipher::_AES128_dataBlk; 1846 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1847 } else { 1848 int cv_len; 1849 Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; 1850 __ z_lg(keylen, -8, parmBlk); // restore keylen 1851 __ z_cghi(keylen, 52); 1852 if (VM_Version::has_Crypto_AES256()) __ z_brh(parmBlk_256); // keyLen > 52: AES256 1853 if (VM_Version::has_Crypto_AES192()) __ z_bre(parmBlk_192); // keyLen == 52: AES192 1854 // if (VM_Version::has_Crypto_AES128()) __ z_brl(parmBlk_128); // keyLen < 52: AES128 // fallthru 1855 1856 // Security net: there is no one here. If we would need it, we should have 1857 // fallen into it already when pushing the parameter block. 1858 if (VM_Version::has_Crypto_AES128()) { 1859 __ bind(parmBlk_128); 1860 cv_len = VM_Version::Cipher::_AES128_dataBlk; 1861 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1862 if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) { 1863 __ z_bru(parmBlk_set); 1864 } 1865 } 1866 1867 if (VM_Version::has_Crypto_AES192()) { 1868 __ bind(parmBlk_192); 1869 cv_len = VM_Version::Cipher::_AES192_dataBlk; 1870 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1871 if (VM_Version::has_Crypto_AES256()) { 1872 __ z_bru(parmBlk_set); 1873 } 1874 } 1875 1876 if (VM_Version::has_Crypto_AES256()) { 1877 __ bind(parmBlk_256); 1878 cv_len = VM_Version::Cipher::_AES256_dataBlk; 1879 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1880 // __ z_bru(parmBlk_set); // fallthru 1881 } 1882 __ bind(parmBlk_set); 1883 } 1884 __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute. Z_SP saved by push_parmBlk. 1885 BLOCK_COMMENT("} pop parmBlk"); 1886 } 1887 1888 // Compute AES encrypt/decrypt function. 1889 void generate_AES_cipherBlock(bool is_decipher) { 1890 // Incoming arguments. 1891 Register from = Z_ARG1; // source byte array 1892 Register to = Z_ARG2; // destination byte array 1893 Register key = Z_ARG3; // expanded key array 1894 1895 const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. 1896 1897 // Register definitions as required by KM instruction. 1898 const Register fCode = Z_R0; // crypto function code 1899 const Register parmBlk = Z_R1; // parameter block address (points to crypto key) 1900 const Register src = Z_ARG1; // Must be even reg (KM requirement). 1901 const Register srclen = Z_ARG2; // Must be odd reg and pair with src. Overwrites destination address. 1902 const Register dst = Z_ARG3; // Must be even reg (KM requirement). Overwrites expanded key address. 1903 1904 // Read key len of expanded key (in 4-byte words). 1905 __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1906 1907 // Copy arguments to registers as required by crypto instruction. 1908 __ z_lgr(parmBlk, key); // crypto key (in T_INT array). 1909 __ lgr_if_needed(src, from); // Copy src address. Will not emit, src/from are identical. 1910 __ z_lgr(dst, to); // Copy dst address, even register required. 1911 1912 // Construct function code into fCode(Z_R0), data block length into srclen(Z_ARG2). 1913 generate_load_AES_fCode(keylen, fCode, srclen, is_decipher); 1914 1915 __ km(dst, src); // Cipher the message. 1916 1917 __ z_br(Z_R14); 1918 } 1919 1920 // Compute AES encrypt function. 1921 address generate_AES_encryptBlock(const char* name) { 1922 __ align(CodeEntryAlignment); 1923 StubCodeMark mark(this, "StubRoutines", name); 1924 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1925 1926 generate_AES_cipherBlock(false); 1927 1928 return __ addr_at(start_off); 1929 } 1930 1931 // Compute AES decrypt function. 1932 address generate_AES_decryptBlock(const char* name) { 1933 __ align(CodeEntryAlignment); 1934 StubCodeMark mark(this, "StubRoutines", name); 1935 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1936 1937 generate_AES_cipherBlock(true); 1938 1939 return __ addr_at(start_off); 1940 } 1941 1942 // These stubs receive the addresses of the cryptographic key and of the chaining value as two separate 1943 // arguments (registers "key" and "cv", respectively). The KMC instruction, on the other hand, requires 1944 // chaining value and key to be, in this sequence, adjacent in storage. Thus, we need to allocate some 1945 // thread-local working storage. Using heap memory incurs all the hassles of allocating/freeing. 1946 // Stack space, on the contrary, is deallocated automatically when we return from the stub to the caller. 1947 // *** WARNING *** 1948 // Please note that we do not formally allocate stack space, nor do we 1949 // update the stack pointer. Therefore, no function calls are allowed 1950 // and nobody else must use the stack range where the parameter block 1951 // is located. 1952 // We align the parameter block to the next available octoword. 1953 // 1954 // Compute chained AES encrypt function. 1955 void generate_AES_cipherBlockChaining(bool is_decipher) { 1956 1957 Register from = Z_ARG1; // source byte array (clear text) 1958 Register to = Z_ARG2; // destination byte array (ciphered) 1959 Register key = Z_ARG3; // expanded key array. 1960 Register cv = Z_ARG4; // chaining value 1961 const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned 1962 // in Z_RET upon completion of this stub. Is 32-bit integer. 1963 1964 const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only. 1965 const Register fCode = Z_R0; // crypto function code 1966 const Register parmBlk = Z_R1; // parameter block address (points to crypto key) 1967 const Register src = Z_ARG1; // is Z_R2 1968 const Register srclen = Z_ARG2; // Overwrites destination address. 1969 const Register dst = Z_ARG3; // Overwrites key address. 1970 1971 // Read key len of expanded key (in 4-byte words). 1972 __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1973 1974 // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block. 1975 // Construct function code in fCode (Z_R0). 1976 generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, is_decipher); 1977 1978 // Prepare other registers for instruction. 1979 __ lgr_if_needed(src, from); // Copy src address. Will not emit, src/from are identical. 1980 __ z_lgr(dst, to); 1981 __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required. 1982 1983 __ kmc(dst, src); // Cipher the message. 1984 1985 generate_pop_parmBlk(keylen, parmBlk, key, cv); 1986 1987 __ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required. 1988 __ z_br(Z_R14); 1989 } 1990 1991 // Compute chained AES encrypt function. 1992 address generate_cipherBlockChaining_AES_encrypt(const char* name) { 1993 __ align(CodeEntryAlignment); 1994 StubCodeMark mark(this, "StubRoutines", name); 1995 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1996 1997 generate_AES_cipherBlockChaining(false); 1998 1999 return __ addr_at(start_off); 2000 } 2001 2002 // Compute chained AES encrypt function. 2003 address generate_cipherBlockChaining_AES_decrypt(const char* name) { 2004 __ align(CodeEntryAlignment); 2005 StubCodeMark mark(this, "StubRoutines", name); 2006 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2007 2008 generate_AES_cipherBlockChaining(true); 2009 2010 return __ addr_at(start_off); 2011 } 2012 2013 2014 // Call interface for all SHA* stubs. 2015 // 2016 // Z_ARG1 - source data block. Ptr to leftmost byte to be processed. 2017 // Z_ARG2 - current SHA state. Ptr to state area. This area serves as 2018 // parameter block as required by the crypto instruction. 2019 // Z_ARG3 - current byte offset in source data block. 2020 // Z_ARG4 - last byte offset in source data block. 2021 // (Z_ARG4 - Z_ARG3) gives the #bytes remaining to be processed. 2022 // 2023 // Z_RET - return value. First unprocessed byte offset in src buffer. 2024 // 2025 // A few notes on the call interface: 2026 // - All stubs, whether they are single-block or multi-block, are assumed to 2027 // digest an integer multiple of the data block length of data. All data 2028 // blocks are digested using the intermediate message digest (KIMD) instruction. 2029 // Special end processing, as done by the KLMD instruction, seems to be 2030 // emulated by the calling code. 2031 // 2032 // - Z_ARG1 addresses the first byte of source data. The offset (Z_ARG3) is 2033 // already accounted for. 2034 // 2035 // - The current SHA state (the intermediate message digest value) is contained 2036 // in an area addressed by Z_ARG2. The area size depends on the SHA variant 2037 // and is accessible via the enum VM_Version::MsgDigest::_SHA<n>_parmBlk_I 2038 // 2039 // - The single-block stub is expected to digest exactly one data block, starting 2040 // at the address passed in Z_ARG1. 2041 // 2042 // - The multi-block stub is expected to digest all data blocks which start in 2043 // the offset interval [srcOff(Z_ARG3), srcLimit(Z_ARG4)). The exact difference 2044 // (srcLimit-srcOff), rounded up to the next multiple of the data block length, 2045 // gives the number of blocks to digest. It must be assumed that the calling code 2046 // provides for a large enough source data buffer. 2047 // 2048 // Compute SHA-1 function. 2049 address generate_SHA1_stub(bool multiBlock, const char* name) { 2050 __ align(CodeEntryAlignment); 2051 StubCodeMark mark(this, "StubRoutines", name); 2052 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2053 2054 const Register srcBuff = Z_ARG1; // Points to first block to process (offset already added). 2055 const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter for kimd register pairs. 2056 const Register srcOff = Z_ARG3; // int 2057 const Register srcLimit = Z_ARG4; // Only passed in multiBlock case. int 2058 2059 const Register SHAState_local = Z_R1; 2060 const Register SHAState_save = Z_ARG3; 2061 const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. 2062 Label useKLMD, rtn; 2063 2064 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA1); // function code 2065 __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block 2066 2067 if (multiBlock) { // Process everything from offset to limit. 2068 2069 // The following description is valid if we get a raw (unpimped) source data buffer, 2070 // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, 2071 // the calling convention for these stubs is different. We leave the description in 2072 // to inform the reader what must be happening hidden in the calling code. 2073 // 2074 // The data block to be processed can have arbitrary length, i.e. its length does not 2075 // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement 2076 // two different paths. If the length is an integer multiple, we use KIMD, saving us 2077 // to copy the SHA state back and forth. If the length is odd, we copy the SHA state 2078 // to the stack, execute a KLMD instruction on it and copy the result back to the 2079 // caller's SHA state location. 2080 2081 // Total #srcBuff blocks to process. 2082 if (VM_Version::has_DistinctOpnds()) { 2083 __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference 2084 __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up 2085 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); 2086 __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. 2087 __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. 2088 } else { 2089 __ z_lgfr(srcBufLen, srcLimit); // Exact difference. srcLimit passed as int. 2090 __ z_sgfr(srcBufLen, srcOff); // SrcOff passed as int, now properly casted to long. 2091 __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up 2092 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); 2093 __ z_lgr(srcLimit, srcOff); // SrcLimit temporarily holds return value. 2094 __ z_agr(srcLimit, srcBufLen); 2095 } 2096 2097 // Integral #blocks to digest? 2098 // As a result of the calculations above, srcBufLen MUST be an integer 2099 // multiple of _SHA1_dataBlk, or else we are in big trouble. 2100 // We insert an asm_assert into the KLMD case to guard against that. 2101 __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); 2102 __ z_brc(Assembler::bcondNotAllZero, useKLMD); 2103 2104 // Process all full blocks. 2105 __ kimd(srcBuff); 2106 2107 __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. 2108 } else { // Process one data block only. 2109 __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA1_dataBlk); // #srcBuff bytes to process 2110 __ kimd(srcBuff); 2111 __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA1_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. No 32 to 64 bit extension needed. 2112 } 2113 2114 __ bind(rtn); 2115 __ z_br(Z_R14); 2116 2117 if (multiBlock) { 2118 __ bind(useKLMD); 2119 2120 #if 1 2121 // Security net: this stub is believed to be called for full-sized data blocks only 2122 // NOTE: The following code is believed to be correct, but is is not tested. 2123 __ stop_static("SHA128 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); 2124 #endif 2125 } 2126 2127 return __ addr_at(start_off); 2128 } 2129 2130 // Compute SHA-256 function. 2131 address generate_SHA256_stub(bool multiBlock, const char* name) { 2132 __ align(CodeEntryAlignment); 2133 StubCodeMark mark(this, "StubRoutines", name); 2134 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2135 2136 const Register srcBuff = Z_ARG1; 2137 const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. 2138 const Register SHAState_local = Z_R1; 2139 const Register SHAState_save = Z_ARG3; 2140 const Register srcOff = Z_ARG3; 2141 const Register srcLimit = Z_ARG4; 2142 const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. 2143 Label useKLMD, rtn; 2144 2145 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA256); // function code 2146 __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block 2147 2148 if (multiBlock) { // Process everything from offset to limit. 2149 // The following description is valid if we get a raw (unpimped) source data buffer, 2150 // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, 2151 // the calling convention for these stubs is different. We leave the description in 2152 // to inform the reader what must be happening hidden in the calling code. 2153 // 2154 // The data block to be processed can have arbitrary length, i.e. its length does not 2155 // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement 2156 // two different paths. If the length is an integer multiple, we use KIMD, saving us 2157 // to copy the SHA state back and forth. If the length is odd, we copy the SHA state 2158 // to the stack, execute a KLMD instruction on it and copy the result back to the 2159 // caller's SHA state location. 2160 2161 // total #srcBuff blocks to process 2162 if (VM_Version::has_DistinctOpnds()) { 2163 __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference 2164 __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up 2165 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); 2166 __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. 2167 __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. 2168 } else { 2169 __ z_lgfr(srcBufLen, srcLimit); // exact difference 2170 __ z_sgfr(srcBufLen, srcOff); 2171 __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up 2172 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); 2173 __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. 2174 __ z_agr(srcLimit, srcBufLen); 2175 } 2176 2177 // Integral #blocks to digest? 2178 // As a result of the calculations above, srcBufLen MUST be an integer 2179 // multiple of _SHA1_dataBlk, or else we are in big trouble. 2180 // We insert an asm_assert into the KLMD case to guard against that. 2181 __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); 2182 __ z_brc(Assembler::bcondNotAllZero, useKLMD); 2183 2184 // Process all full blocks. 2185 __ kimd(srcBuff); 2186 2187 __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. 2188 } else { // Process one data block only. 2189 __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA256_dataBlk); // #srcBuff bytes to process 2190 __ kimd(srcBuff); 2191 __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA256_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. 2192 } 2193 2194 __ bind(rtn); 2195 __ z_br(Z_R14); 2196 2197 if (multiBlock) { 2198 __ bind(useKLMD); 2199 #if 1 2200 // Security net: this stub is believed to be called for full-sized data blocks only. 2201 // NOTE: 2202 // The following code is believed to be correct, but is is not tested. 2203 __ stop_static("SHA256 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); 2204 #endif 2205 } 2206 2207 return __ addr_at(start_off); 2208 } 2209 2210 // Compute SHA-512 function. 2211 address generate_SHA512_stub(bool multiBlock, const char* name) { 2212 __ align(CodeEntryAlignment); 2213 StubCodeMark mark(this, "StubRoutines", name); 2214 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2215 2216 const Register srcBuff = Z_ARG1; 2217 const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. 2218 const Register SHAState_local = Z_R1; 2219 const Register SHAState_save = Z_ARG3; 2220 const Register srcOff = Z_ARG3; 2221 const Register srcLimit = Z_ARG4; 2222 const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. 2223 Label useKLMD, rtn; 2224 2225 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA512); // function code 2226 __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block 2227 2228 if (multiBlock) { // Process everything from offset to limit. 2229 // The following description is valid if we get a raw (unpimped) source data buffer, 2230 // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, 2231 // the calling convention for these stubs is different. We leave the description in 2232 // to inform the reader what must be happening hidden in the calling code. 2233 // 2234 // The data block to be processed can have arbitrary length, i.e. its length does not 2235 // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement 2236 // two different paths. If the length is an integer multiple, we use KIMD, saving us 2237 // to copy the SHA state back and forth. If the length is odd, we copy the SHA state 2238 // to the stack, execute a KLMD instruction on it and copy the result back to the 2239 // caller's SHA state location. 2240 2241 // total #srcBuff blocks to process 2242 if (VM_Version::has_DistinctOpnds()) { 2243 __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference 2244 __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up 2245 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); 2246 __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. 2247 __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. 2248 } else { 2249 __ z_lgfr(srcBufLen, srcLimit); // exact difference 2250 __ z_sgfr(srcBufLen, srcOff); 2251 __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up 2252 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); 2253 __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. 2254 __ z_agr(srcLimit, srcBufLen); 2255 } 2256 2257 // integral #blocks to digest? 2258 // As a result of the calculations above, srcBufLen MUST be an integer 2259 // multiple of _SHA1_dataBlk, or else we are in big trouble. 2260 // We insert an asm_assert into the KLMD case to guard against that. 2261 __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); 2262 __ z_brc(Assembler::bcondNotAllZero, useKLMD); 2263 2264 // Process all full blocks. 2265 __ kimd(srcBuff); 2266 2267 __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. 2268 } else { // Process one data block only. 2269 __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA512_dataBlk); // #srcBuff bytes to process 2270 __ kimd(srcBuff); 2271 __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA512_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. 2272 } 2273 2274 __ bind(rtn); 2275 __ z_br(Z_R14); 2276 2277 if (multiBlock) { 2278 __ bind(useKLMD); 2279 #if 1 2280 // Security net: this stub is believed to be called for full-sized data blocks only 2281 // NOTE: 2282 // The following code is believed to be correct, but is is not tested. 2283 __ stop_static("SHA512 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); 2284 #endif 2285 } 2286 2287 return __ addr_at(start_off); 2288 } 2289 2290 2291 /** 2292 * Arguments: 2293 * 2294 * Inputs: 2295 * Z_ARG1 - int crc 2296 * Z_ARG2 - byte* buf 2297 * Z_ARG3 - int length (of buffer) 2298 * 2299 * Result: 2300 * Z_RET - int crc result 2301 **/ 2302 // Compute CRC function (generic, for all polynomials). 2303 void generate_CRC_updateBytes(const char* name, Register table, bool invertCRC) { 2304 2305 // arguments to kernel_crc32: 2306 Register crc = Z_ARG1; // Current checksum, preset by caller or result from previous call, int. 2307 Register data = Z_ARG2; // source byte array 2308 Register dataLen = Z_ARG3; // #bytes to process, int 2309 // Register table = Z_ARG4; // crc table address. Preloaded and passed in by caller. 2310 const Register t0 = Z_R10; // work reg for kernel* emitters 2311 const Register t1 = Z_R11; // work reg for kernel* emitters 2312 const Register t2 = Z_R12; // work reg for kernel* emitters 2313 const Register t3 = Z_R13; // work reg for kernel* emitters 2314 2315 assert_different_registers(crc, data, dataLen, table); 2316 2317 // We pass these values as ints, not as longs as required by C calling convention. 2318 // Crc used as int. 2319 __ z_llgfr(dataLen, dataLen); 2320 2321 __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. 2322 __ z_stmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 to make them available as work registers. 2323 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, invertCRC); 2324 __ z_lmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 back from stack. 2325 __ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. 2326 2327 __ z_llgfr(Z_RET, crc); // Updated crc is function result. No copying required, just zero upper 32 bits. 2328 __ z_br(Z_R14); // Result already in Z_RET == Z_ARG1. 2329 } 2330 2331 2332 // Compute CRC32 function. 2333 address generate_CRC32_updateBytes(const char* name) { 2334 __ align(CodeEntryAlignment); 2335 StubCodeMark mark(this, "StubRoutines", name); 2336 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2337 2338 assert(UseCRC32Intrinsics, "should not generate this stub (%s) with CRC32 intrinsics disabled", name); 2339 2340 BLOCK_COMMENT("CRC32_updateBytes {"); 2341 Register table = Z_ARG4; // crc32 table address. 2342 StubRoutines::zarch::generate_load_crc_table_addr(_masm, table); 2343 2344 generate_CRC_updateBytes(name, table, true); 2345 BLOCK_COMMENT("} CRC32_updateBytes"); 2346 2347 return __ addr_at(start_off); 2348 } 2349 2350 2351 // Compute CRC32C function. 2352 address generate_CRC32C_updateBytes(const char* name) { 2353 __ align(CodeEntryAlignment); 2354 StubCodeMark mark(this, "StubRoutines", name); 2355 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2356 2357 assert(UseCRC32CIntrinsics, "should not generate this stub (%s) with CRC32C intrinsics disabled", name); 2358 2359 BLOCK_COMMENT("CRC32C_updateBytes {"); 2360 Register table = Z_ARG4; // crc32c table address. 2361 StubRoutines::zarch::generate_load_crc32c_table_addr(_masm, table); 2362 2363 generate_CRC_updateBytes(name, table, false); 2364 BLOCK_COMMENT("} CRC32C_updateBytes"); 2365 2366 return __ addr_at(start_off); 2367 } 2368 2369 2370 // Arguments: 2371 // Z_ARG1 - x address 2372 // Z_ARG2 - x length 2373 // Z_ARG3 - y address 2374 // Z_ARG4 - y length 2375 // Z_ARG5 - z address 2376 // 160[Z_SP] - z length 2377 address generate_multiplyToLen() { 2378 __ align(CodeEntryAlignment); 2379 StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); 2380 2381 address start = __ pc(); 2382 2383 const Register x = Z_ARG1; 2384 const Register xlen = Z_ARG2; 2385 const Register y = Z_ARG3; 2386 const Register ylen = Z_ARG4; 2387 const Register z = Z_ARG5; 2388 // zlen is passed on the stack: 2389 // Address zlen(Z_SP, _z_abi(remaining_cargs)); 2390 2391 // Next registers will be saved on stack in multiply_to_len(). 2392 const Register tmp1 = Z_tmp_1; 2393 const Register tmp2 = Z_tmp_2; 2394 const Register tmp3 = Z_tmp_3; 2395 const Register tmp4 = Z_tmp_4; 2396 const Register tmp5 = Z_R9; 2397 2398 BLOCK_COMMENT("Entry:"); 2399 2400 __ z_llgfr(xlen, xlen); 2401 __ z_llgfr(ylen, ylen); 2402 2403 __ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5); 2404 2405 __ z_br(Z_R14); // Return to caller. 2406 2407 return start; 2408 } 2409 2410 void generate_initial() { 2411 // Generates all stubs and initializes the entry points. 2412 2413 // Entry points that exist in all platforms. 2414 // Note: This is code that could be shared among different 2415 // platforms - however the benefit seems to be smaller than the 2416 // disadvantage of having a much more complicated generator 2417 // structure. See also comment in stubRoutines.hpp. 2418 StubRoutines::_forward_exception_entry = generate_forward_exception(); 2419 2420 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 2421 StubRoutines::_catch_exception_entry = generate_catch_exception(); 2422 2423 // Build this early so it's available for the interpreter. 2424 StubRoutines::_throw_StackOverflowError_entry = 2425 generate_throw_exception("StackOverflowError throw_exception", 2426 CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); 2427 StubRoutines::_throw_delayed_StackOverflowError_entry = 2428 generate_throw_exception("delayed StackOverflowError throw_exception", 2429 CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), false); 2430 2431 //---------------------------------------------------------------------- 2432 // Entry points that are platform specific. 2433 2434 if (UseCRC32Intrinsics) { 2435 StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table; 2436 StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes"); 2437 } 2438 2439 if (UseCRC32CIntrinsics) { 2440 StubRoutines::_crc32c_table_addr = (address)StubRoutines::zarch::_crc32c_table; 2441 StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes("CRC32C_updateBytes"); 2442 } 2443 2444 // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. 2445 StubRoutines::zarch::_trot_table_addr = (address)StubRoutines::zarch::_trot_table; 2446 } 2447 2448 2449 void generate_all() { 2450 // Generates all stubs and initializes the entry points. 2451 2452 StubRoutines::zarch::_partial_subtype_check = generate_partial_subtype_check(); 2453 2454 // These entry points require SharedInfo::stack0 to be set up in non-core builds. 2455 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); 2456 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); 2457 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); 2458 2459 // Support for verify_oop (must happen after universe_init). 2460 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); 2461 2462 // Arraycopy stubs used by compilers. 2463 generate_arraycopy_stubs(); 2464 2465 // safefetch stubs 2466 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc); 2467 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, &StubRoutines::_safefetchN_fault_pc, &StubRoutines::_safefetchN_continuation_pc); 2468 2469 // Generate AES intrinsics code. 2470 if (UseAESIntrinsics) { 2471 StubRoutines::_aescrypt_encryptBlock = generate_AES_encryptBlock("AES_encryptBlock"); 2472 StubRoutines::_aescrypt_decryptBlock = generate_AES_decryptBlock("AES_decryptBlock"); 2473 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_AES_encrypt("AES_encryptBlock_chaining"); 2474 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_AES_decrypt("AES_decryptBlock_chaining"); 2475 } 2476 2477 // Generate SHA1/SHA256/SHA512 intrinsics code. 2478 if (UseSHA1Intrinsics) { 2479 StubRoutines::_sha1_implCompress = generate_SHA1_stub(false, "SHA1_singleBlock"); 2480 StubRoutines::_sha1_implCompressMB = generate_SHA1_stub(true, "SHA1_multiBlock"); 2481 } 2482 if (UseSHA256Intrinsics) { 2483 StubRoutines::_sha256_implCompress = generate_SHA256_stub(false, "SHA256_singleBlock"); 2484 StubRoutines::_sha256_implCompressMB = generate_SHA256_stub(true, "SHA256_multiBlock"); 2485 } 2486 if (UseSHA512Intrinsics) { 2487 StubRoutines::_sha512_implCompress = generate_SHA512_stub(false, "SHA512_singleBlock"); 2488 StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(true, "SHA512_multiBlock"); 2489 } 2490 2491 #ifdef COMPILER2 2492 if (UseMultiplyToLenIntrinsic) { 2493 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 2494 } 2495 if (UseMontgomeryMultiplyIntrinsic) { 2496 StubRoutines::_montgomeryMultiply 2497 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); 2498 } 2499 if (UseMontgomerySquareIntrinsic) { 2500 StubRoutines::_montgomerySquare 2501 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); 2502 } 2503 #endif 2504 } 2505 2506 public: 2507 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 2508 // Replace the standard masm with a special one: 2509 _masm = new MacroAssembler(code); 2510 2511 _stub_count = !all ? 0x100 : 0x200; 2512 if (all) { 2513 generate_all(); 2514 } else { 2515 generate_initial(); 2516 } 2517 } 2518 2519 private: 2520 int _stub_count; 2521 void stub_prolog(StubCodeDesc* cdesc) { 2522 #ifdef ASSERT 2523 // Put extra information in the stub code, to make it more readable. 2524 // Write the high part of the address. 2525 // [RGV] Check if there is a dependency on the size of this prolog. 2526 __ emit_32((intptr_t)cdesc >> 32); 2527 __ emit_32((intptr_t)cdesc); 2528 __ emit_32(++_stub_count); 2529 #endif 2530 align(true); 2531 } 2532 2533 void align(bool at_header = false) { 2534 // z/Architecture cache line size is 256 bytes. 2535 // There is no obvious benefit in aligning stub 2536 // code to cache lines. Use CodeEntryAlignment instead. 2537 const unsigned int icache_line_size = CodeEntryAlignment; 2538 const unsigned int icache_half_line_size = MIN2<unsigned int>(32, CodeEntryAlignment); 2539 2540 if (at_header) { 2541 while ((intptr_t)(__ pc()) % icache_line_size != 0) { 2542 __ emit_16(0); 2543 } 2544 } else { 2545 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { 2546 __ z_nop(); 2547 } 2548 } 2549 } 2550 2551 }; 2552 2553 void StubGenerator_generate(CodeBuffer* code, bool all) { 2554 StubGenerator g(code, all); 2555 }