1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, Red Hat Inc. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/macroAssembler.hpp" 28 #include "asm/macroAssembler.inline.hpp" 29 #include "interpreter/interpreter.hpp" 30 #include "nativeInst_aarch64.hpp" 31 #include "oops/instanceOop.hpp" 32 #include "oops/method.hpp" 33 #include "oops/objArrayKlass.hpp" 34 #include "oops/oop.inline.hpp" 35 #include "prims/methodHandles.hpp" 36 #include "runtime/frame.inline.hpp" 37 #include "runtime/handles.inline.hpp" 38 #include "runtime/sharedRuntime.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/stubRoutines.hpp" 41 #include "runtime/thread.inline.hpp" 42 #include "utilities/top.hpp" 43 #ifdef COMPILER2 44 #include "opto/runtime.hpp" 45 #endif 46 47 #ifdef BUILTIN_SIM 48 #include "../../../../../../simulator/simulator.hpp" 49 #endif 50 51 // Declaration and definition of StubGenerator (no .hpp file). 52 // For a more detailed description of the stub routine structure 53 // see the comment in stubRoutines.hpp 54 55 #undef __ 56 #define __ _masm-> 57 #define TIMES_OOP Address::sxtw(exact_log2(UseCompressedOops ? 4 : 8)) 58 59 #ifdef PRODUCT 60 #define BLOCK_COMMENT(str) /* nothing */ 61 #else 62 #define BLOCK_COMMENT(str) __ block_comment(str) 63 #endif 64 65 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 66 67 // Stub Code definitions 68 69 class StubGenerator: public StubCodeGenerator { 70 private: 71 72 #ifdef PRODUCT 73 #define inc_counter_np(counter) ((void)0) 74 #else 75 void inc_counter_np_(int& counter) { 76 __ lea(rscratch2, ExternalAddress((address)&counter)); 77 __ ldrw(rscratch1, Address(rscratch2)); 78 __ addw(rscratch1, rscratch1, 1); 79 __ strw(rscratch1, Address(rscratch2)); 80 } 81 #define inc_counter_np(counter) \ 82 BLOCK_COMMENT("inc_counter " #counter); \ 83 inc_counter_np_(counter); 84 #endif 85 86 // Call stubs are used to call Java from C 87 // 88 // Arguments: 89 // c_rarg0: call wrapper address address 90 // c_rarg1: result address 91 // c_rarg2: result type BasicType 92 // c_rarg3: method Method* 93 // c_rarg4: (interpreter) entry point address 94 // c_rarg5: parameters intptr_t* 95 // c_rarg6: parameter size (in words) int 96 // c_rarg7: thread Thread* 97 // 98 // There is no return from the stub itself as any Java result 99 // is written to result 100 // 101 // we save r30 (lr) as the return PC at the base of the frame and 102 // link r29 (fp) below it as the frame pointer installing sp (r31) 103 // into fp. 104 // 105 // we save r0-r7, which accounts for all the c arguments. 106 // 107 // TODO: strictly do we need to save them all? they are treated as 108 // volatile by C so could we omit saving the ones we are going to 109 // place in global registers (thread? method?) or those we only use 110 // during setup of the Java call? 111 // 112 // we don't need to save r8 which C uses as an indirect result location 113 // return register. 114 // 115 // we don't need to save r9-r15 which both C and Java treat as 116 // volatile 117 // 118 // we don't need to save r16-18 because Java does not use them 119 // 120 // we save r19-r28 which Java uses as scratch registers and C 121 // expects to be callee-save 122 // 123 // we don't save any FP registers since only v8-v15 are callee-save 124 // (strictly only the f and d components) and Java uses them as 125 // callee-save. v0-v7 are arg registers and C treats v16-v31 as 126 // volatile (as does Java?) 127 // 128 // so the stub frame looks like this when we enter Java code 129 // 130 // [ return_from_Java ] <--- sp 131 // [ argument word n ] 132 // ... 133 // -27 [ argument word 1 ] 134 // -26 [ saved d15 ] <--- sp_after_call 135 // -25 [ saved d14 ] 136 // -24 [ saved d13 ] 137 // -23 [ saved d12 ] 138 // -22 [ saved d11 ] 139 // -21 [ saved d10 ] 140 // -20 [ saved d9 ] 141 // -19 [ saved d8 ] 142 // -18 [ saved r28 ] 143 // -17 [ saved r27 ] 144 // -16 [ saved r26 ] 145 // -15 [ saved r25 ] 146 // -14 [ saved r24 ] 147 // -13 [ saved r23 ] 148 // -12 [ saved r22 ] 149 // -11 [ saved r21 ] 150 // -10 [ saved r20 ] 151 // -9 [ saved r19 ] 152 // -8 [ call wrapper (r0) ] 153 // -7 [ result (r1) ] 154 // -6 [ result type (r2) ] 155 // -5 [ method (r3) ] 156 // -4 [ entry point (r4) ] 157 // -3 [ parameters (r5) ] 158 // -2 [ parameter size (r6) ] 159 // -1 [ thread (r7) ] 160 // 0 [ saved fp (r29) ] <--- fp == saved sp (r31) 161 // 1 [ saved lr (r30) ] 162 163 // Call stub stack layout word offsets from fp 164 enum call_stub_layout { 165 sp_after_call_off = -26, 166 167 d15_off = -26, 168 d14_off = -25, 169 d13_off = -24, 170 d12_off = -23, 171 d11_off = -22, 172 d10_off = -21, 173 d9_off = -20, 174 d8_off = -19, 175 176 r28_off = -18, 177 r27_off = -17, 178 r26_off = -16, 179 r25_off = -15, 180 r24_off = -14, 181 r23_off = -13, 182 r22_off = -12, 183 r21_off = -11, 184 r20_off = -10, 185 r19_off = -9, 186 call_wrapper_off = -8, 187 result_off = -7, 188 result_type_off = -6, 189 method_off = -5, 190 entry_point_off = -4, 191 parameters_off = -3, 192 parameter_size_off = -2, 193 thread_off = -1, 194 fp_f = 0, 195 retaddr_off = 1, 196 }; 197 198 address generate_call_stub(address& return_address) { 199 assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 && 200 (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, 201 "adjust this code"); 202 203 StubCodeMark mark(this, "StubRoutines", "call_stub"); 204 address start = __ pc(); 205 206 const Address sp_after_call(rfp, sp_after_call_off * wordSize); 207 208 const Address call_wrapper (rfp, call_wrapper_off * wordSize); 209 const Address result (rfp, result_off * wordSize); 210 const Address result_type (rfp, result_type_off * wordSize); 211 const Address method (rfp, method_off * wordSize); 212 const Address entry_point (rfp, entry_point_off * wordSize); 213 const Address parameters (rfp, parameters_off * wordSize); 214 const Address parameter_size(rfp, parameter_size_off * wordSize); 215 216 const Address thread (rfp, thread_off * wordSize); 217 218 const Address d15_save (rfp, d15_off * wordSize); 219 const Address d14_save (rfp, d14_off * wordSize); 220 const Address d13_save (rfp, d13_off * wordSize); 221 const Address d12_save (rfp, d12_off * wordSize); 222 const Address d11_save (rfp, d11_off * wordSize); 223 const Address d10_save (rfp, d10_off * wordSize); 224 const Address d9_save (rfp, d9_off * wordSize); 225 const Address d8_save (rfp, d8_off * wordSize); 226 227 const Address r28_save (rfp, r28_off * wordSize); 228 const Address r27_save (rfp, r27_off * wordSize); 229 const Address r26_save (rfp, r26_off * wordSize); 230 const Address r25_save (rfp, r25_off * wordSize); 231 const Address r24_save (rfp, r24_off * wordSize); 232 const Address r23_save (rfp, r23_off * wordSize); 233 const Address r22_save (rfp, r22_off * wordSize); 234 const Address r21_save (rfp, r21_off * wordSize); 235 const Address r20_save (rfp, r20_off * wordSize); 236 const Address r19_save (rfp, r19_off * wordSize); 237 238 // stub code 239 240 // we need a C prolog to bootstrap the x86 caller into the sim 241 __ c_stub_prolog(8, 0, MacroAssembler::ret_type_void); 242 243 address aarch64_entry = __ pc(); 244 245 #ifdef BUILTIN_SIM 246 // Save sender's SP for stack traces. 247 __ mov(rscratch1, sp); 248 __ str(rscratch1, Address(__ pre(sp, -2 * wordSize))); 249 #endif 250 // set up frame and move sp to end of save area 251 __ enter(); 252 __ sub(sp, rfp, -sp_after_call_off * wordSize); 253 254 // save register parameters and Java scratch/global registers 255 // n.b. we save thread even though it gets installed in 256 // rthread because we want to sanity check rthread later 257 __ str(c_rarg7, thread); 258 __ strw(c_rarg6, parameter_size); 259 __ str(c_rarg5, parameters); 260 __ str(c_rarg4, entry_point); 261 __ str(c_rarg3, method); 262 __ str(c_rarg2, result_type); 263 __ str(c_rarg1, result); 264 __ str(c_rarg0, call_wrapper); 265 __ str(r19, r19_save); 266 __ str(r20, r20_save); 267 __ str(r21, r21_save); 268 __ str(r22, r22_save); 269 __ str(r23, r23_save); 270 __ str(r24, r24_save); 271 __ str(r25, r25_save); 272 __ str(r26, r26_save); 273 __ str(r27, r27_save); 274 __ str(r28, r28_save); 275 276 __ strd(v8, d8_save); 277 __ strd(v9, d9_save); 278 __ strd(v10, d10_save); 279 __ strd(v11, d11_save); 280 __ strd(v12, d12_save); 281 __ strd(v13, d13_save); 282 __ strd(v14, d14_save); 283 __ strd(v15, d15_save); 284 285 // install Java thread in global register now we have saved 286 // whatever value it held 287 __ mov(rthread, c_rarg7); 288 // And method 289 __ mov(rmethod, c_rarg3); 290 291 // set up the heapbase register 292 __ reinit_heapbase(); 293 294 #ifdef ASSERT 295 // make sure we have no pending exceptions 296 { 297 Label L; 298 __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); 299 __ cmp(rscratch1, (unsigned)NULL_WORD); 300 __ br(Assembler::EQ, L); 301 __ stop("StubRoutines::call_stub: entered with pending exception"); 302 __ BIND(L); 303 } 304 #endif 305 // pass parameters if any 306 __ mov(esp, sp); 307 __ sub(rscratch1, sp, c_rarg6, ext::uxtw, LogBytesPerWord); // Move SP out of the way 308 __ andr(sp, rscratch1, -2 * wordSize); 309 310 BLOCK_COMMENT("pass parameters if any"); 311 Label parameters_done; 312 // parameter count is still in c_rarg6 313 // and parameter pointer identifying param 1 is in c_rarg5 314 __ cbzw(c_rarg6, parameters_done); 315 316 address loop = __ pc(); 317 __ ldr(rscratch1, Address(__ post(c_rarg5, wordSize))); 318 __ subsw(c_rarg6, c_rarg6, 1); 319 __ push(rscratch1); 320 __ br(Assembler::GT, loop); 321 322 __ BIND(parameters_done); 323 324 // call Java entry -- passing methdoOop, and current sp 325 // rmethod: Method* 326 // r13: sender sp 327 BLOCK_COMMENT("call Java function"); 328 __ mov(r13, sp); 329 __ blr(c_rarg4); 330 331 // tell the simulator we have returned to the stub 332 333 // we do this here because the notify will already have been done 334 // if we get to the next instruction via an exception 335 // 336 // n.b. adding this instruction here affects the calculation of 337 // whether or not a routine returns to the call stub (used when 338 // doing stack walks) since the normal test is to check the return 339 // pc against the address saved below. so we may need to allow for 340 // this extra instruction in the check. 341 342 if (NotifySimulator) { 343 __ notify(Assembler::method_reentry); 344 } 345 // save current address for use by exception handling code 346 347 return_address = __ pc(); 348 349 // store result depending on type (everything that is not 350 // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) 351 // n.b. this assumes Java returns an integral result in r0 352 // and a floating result in j_farg0 353 __ ldr(j_rarg2, result); 354 Label is_long, is_float, is_double, exit; 355 __ ldr(j_rarg1, result_type); 356 __ cmp(j_rarg1, T_OBJECT); 357 __ br(Assembler::EQ, is_long); 358 __ cmp(j_rarg1, T_LONG); 359 __ br(Assembler::EQ, is_long); 360 __ cmp(j_rarg1, T_FLOAT); 361 __ br(Assembler::EQ, is_float); 362 __ cmp(j_rarg1, T_DOUBLE); 363 __ br(Assembler::EQ, is_double); 364 365 // handle T_INT case 366 __ strw(r0, Address(j_rarg2)); 367 368 __ BIND(exit); 369 370 // pop parameters 371 __ sub(esp, rfp, -sp_after_call_off * wordSize); 372 373 #ifdef ASSERT 374 // verify that threads correspond 375 { 376 Label L, S; 377 __ ldr(rscratch1, thread); 378 __ cmp(rthread, rscratch1); 379 __ br(Assembler::NE, S); 380 __ get_thread(rscratch1); 381 __ cmp(rthread, rscratch1); 382 __ br(Assembler::EQ, L); 383 __ BIND(S); 384 __ stop("StubRoutines::call_stub: threads must correspond"); 385 __ BIND(L); 386 } 387 #endif 388 389 // restore callee-save registers 390 __ ldrd(v15, d15_save); 391 __ ldrd(v14, d14_save); 392 __ ldrd(v13, d13_save); 393 __ ldrd(v12, d12_save); 394 __ ldrd(v11, d11_save); 395 __ ldrd(v10, d10_save); 396 __ ldrd(v9, d9_save); 397 __ ldrd(v8, d8_save); 398 399 __ ldr(r28, r28_save); 400 __ ldr(r27, r27_save); 401 __ ldr(r26, r26_save); 402 __ ldr(r25, r25_save); 403 __ ldr(r24, r24_save); 404 __ ldr(r23, r23_save); 405 __ ldr(r22, r22_save); 406 __ ldr(r21, r21_save); 407 __ ldr(r20, r20_save); 408 __ ldr(r19, r19_save); 409 __ ldr(c_rarg0, call_wrapper); 410 __ ldr(c_rarg1, result); 411 __ ldrw(c_rarg2, result_type); 412 __ ldr(c_rarg3, method); 413 __ ldr(c_rarg4, entry_point); 414 __ ldr(c_rarg5, parameters); 415 __ ldr(c_rarg6, parameter_size); 416 __ ldr(c_rarg7, thread); 417 418 #ifndef PRODUCT 419 // tell the simulator we are about to end Java execution 420 if (NotifySimulator) { 421 __ notify(Assembler::method_exit); 422 } 423 #endif 424 // leave frame and return to caller 425 __ leave(); 426 __ ret(lr); 427 428 // handle return types different from T_INT 429 430 __ BIND(is_long); 431 __ str(r0, Address(j_rarg2, 0)); 432 __ br(Assembler::AL, exit); 433 434 __ BIND(is_float); 435 __ strs(j_farg0, Address(j_rarg2, 0)); 436 __ br(Assembler::AL, exit); 437 438 __ BIND(is_double); 439 __ strd(j_farg0, Address(j_rarg2, 0)); 440 __ br(Assembler::AL, exit); 441 442 return start; 443 } 444 445 // Return point for a Java call if there's an exception thrown in 446 // Java code. The exception is caught and transformed into a 447 // pending exception stored in JavaThread that can be tested from 448 // within the VM. 449 // 450 // Note: Usually the parameters are removed by the callee. In case 451 // of an exception crossing an activation frame boundary, that is 452 // not the case if the callee is compiled code => need to setup the 453 // rsp. 454 // 455 // r0: exception oop 456 457 // NOTE: this is used as a target from the signal handler so it 458 // needs an x86 prolog which returns into the current simulator 459 // executing the generated catch_exception code. so the prolog 460 // needs to install rax in a sim register and adjust the sim's 461 // restart pc to enter the generated code at the start position 462 // then return from native to simulated execution. 463 464 address generate_catch_exception() { 465 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 466 address start = __ pc(); 467 468 // same as in generate_call_stub(): 469 const Address sp_after_call(rfp, sp_after_call_off * wordSize); 470 const Address thread (rfp, thread_off * wordSize); 471 472 #ifdef ASSERT 473 // verify that threads correspond 474 { 475 Label L, S; 476 __ ldr(rscratch1, thread); 477 __ cmp(rthread, rscratch1); 478 __ br(Assembler::NE, S); 479 __ get_thread(rscratch1); 480 __ cmp(rthread, rscratch1); 481 __ br(Assembler::EQ, L); 482 __ bind(S); 483 __ stop("StubRoutines::catch_exception: threads must correspond"); 484 __ bind(L); 485 } 486 #endif 487 488 // set pending exception 489 __ verify_oop(r0); 490 491 __ str(r0, Address(rthread, Thread::pending_exception_offset())); 492 __ mov(rscratch1, (address)__FILE__); 493 __ str(rscratch1, Address(rthread, Thread::exception_file_offset())); 494 __ movw(rscratch1, (int)__LINE__); 495 __ strw(rscratch1, Address(rthread, Thread::exception_line_offset())); 496 497 // complete return to VM 498 assert(StubRoutines::_call_stub_return_address != NULL, 499 "_call_stub_return_address must have been generated before"); 500 __ b(StubRoutines::_call_stub_return_address); 501 502 return start; 503 } 504 505 // Continuation point for runtime calls returning with a pending 506 // exception. The pending exception check happened in the runtime 507 // or native call stub. The pending exception in Thread is 508 // converted into a Java-level exception. 509 // 510 // Contract with Java-level exception handlers: 511 // r0: exception 512 // r3: throwing pc 513 // 514 // NOTE: At entry of this stub, exception-pc must be in LR !! 515 516 // NOTE: this is always used as a jump target within generated code 517 // so it just needs to be generated code wiht no x86 prolog 518 519 address generate_forward_exception() { 520 StubCodeMark mark(this, "StubRoutines", "forward exception"); 521 address start = __ pc(); 522 523 // Upon entry, LR points to the return address returning into 524 // Java (interpreted or compiled) code; i.e., the return address 525 // becomes the throwing pc. 526 // 527 // Arguments pushed before the runtime call are still on the stack 528 // but the exception handler will reset the stack pointer -> 529 // ignore them. A potential result in registers can be ignored as 530 // well. 531 532 #ifdef ASSERT 533 // make sure this code is only executed if there is a pending exception 534 { 535 Label L; 536 __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); 537 __ cbnz(rscratch1, L); 538 __ stop("StubRoutines::forward exception: no pending exception (1)"); 539 __ bind(L); 540 } 541 #endif 542 543 // compute exception handler into r19 544 545 // call the VM to find the handler address associated with the 546 // caller address. pass thread in r0 and caller pc (ret address) 547 // in r1. n.b. the caller pc is in lr, unlike x86 where it is on 548 // the stack. 549 __ mov(c_rarg1, lr); 550 // lr will be trashed by the VM call so we move it to R19 551 // (callee-saved) because we also need to pass it to the handler 552 // returned by this call. 553 __ mov(r19, lr); 554 BLOCK_COMMENT("call exception_handler_for_return_address"); 555 __ call_VM_leaf(CAST_FROM_FN_PTR(address, 556 SharedRuntime::exception_handler_for_return_address), 557 rthread, c_rarg1); 558 // we should not really care that lr is no longer the callee 559 // address. we saved the value the handler needs in r19 so we can 560 // just copy it to r3. however, the C2 handler will push its own 561 // frame and then calls into the VM and the VM code asserts that 562 // the PC for the frame above the handler belongs to a compiled 563 // Java method. So, we restore lr here to satisfy that assert. 564 __ mov(lr, r19); 565 // setup r0 & r3 & clear pending exception 566 __ mov(r3, r19); 567 __ mov(r19, r0); 568 __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); 569 __ str(zr, Address(rthread, Thread::pending_exception_offset())); 570 571 #ifdef ASSERT 572 // make sure exception is set 573 { 574 Label L; 575 __ cbnz(r0, L); 576 __ stop("StubRoutines::forward exception: no pending exception (2)"); 577 __ bind(L); 578 } 579 #endif 580 581 // continue at exception handler 582 // r0: exception 583 // r3: throwing pc 584 // r19: exception handler 585 __ verify_oop(r0); 586 __ br(r19); 587 588 return start; 589 } 590 591 // Non-destructive plausibility checks for oops 592 // 593 // Arguments: 594 // r0: oop to verify 595 // rscratch1: error message 596 // 597 // Stack after saving c_rarg3: 598 // [tos + 0]: saved c_rarg3 599 // [tos + 1]: saved c_rarg2 600 // [tos + 2]: saved lr 601 // [tos + 3]: saved rscratch2 602 // [tos + 4]: saved r0 603 // [tos + 5]: saved rscratch1 604 address generate_verify_oop() { 605 606 StubCodeMark mark(this, "StubRoutines", "verify_oop"); 607 address start = __ pc(); 608 609 Label exit, error; 610 611 // save c_rarg2 and c_rarg3 612 __ stp(c_rarg3, c_rarg2, Address(__ pre(sp, -16))); 613 614 // __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); 615 __ lea(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); 616 __ ldr(c_rarg3, Address(c_rarg2)); 617 __ add(c_rarg3, c_rarg3, 1); 618 __ str(c_rarg3, Address(c_rarg2)); 619 620 // object is in r0 621 // make sure object is 'reasonable' 622 __ cbz(r0, exit); // if obj is NULL it is OK 623 624 // Check if the oop is in the right area of memory 625 __ mov(c_rarg3, (intptr_t) Universe::verify_oop_mask()); 626 __ andr(c_rarg2, r0, c_rarg3); 627 __ mov(c_rarg3, (intptr_t) Universe::verify_oop_bits()); 628 629 // Compare c_rarg2 and c_rarg3. We don't use a compare 630 // instruction here because the flags register is live. 631 __ eor(c_rarg2, c_rarg2, c_rarg3); 632 __ cbnz(c_rarg2, error); 633 634 // make sure klass is 'reasonable', which is not zero. 635 __ load_klass(r0, r0); // get klass 636 __ cbz(r0, error); // if klass is NULL it is broken 637 638 // return if everything seems ok 639 __ bind(exit); 640 641 __ ldp(c_rarg3, c_rarg2, Address(__ post(sp, 16))); 642 __ ret(lr); 643 644 // handle errors 645 __ bind(error); 646 __ ldp(c_rarg3, c_rarg2, Address(__ post(sp, 16))); 647 648 __ push(RegSet::range(r0, r29), sp); 649 // debug(char* msg, int64_t pc, int64_t regs[]) 650 __ mov(c_rarg0, rscratch1); // pass address of error message 651 __ mov(c_rarg1, lr); // pass return address 652 __ mov(c_rarg2, sp); // pass address of regs on stack 653 #ifndef PRODUCT 654 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); 655 #endif 656 BLOCK_COMMENT("call MacroAssembler::debug"); 657 __ mov(rscratch1, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); 658 __ blrt(rscratch1, 3, 0, 1); 659 660 return start; 661 } 662 663 void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); } 664 665 // Generate code for an array write pre barrier 666 // 667 // addr - starting address 668 // count - element count 669 // tmp - scratch register 670 // 671 // Destroy no registers! 672 // 673 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { 674 BarrierSet* bs = Universe::heap()->barrier_set(); 675 switch (bs->kind()) { 676 case BarrierSet::G1SATBCTLogging: 677 // With G1, don't generate the call if we statically know that the target in uninitialized 678 if (!dest_uninitialized) { 679 __ push(RegSet::range(r0, r29), sp); // integer registers except lr & sp 680 if (count == c_rarg0) { 681 if (addr == c_rarg1) { 682 // exactly backwards!! 683 __ stp(c_rarg0, c_rarg1, __ pre(sp, -2 * wordSize)); 684 __ ldp(c_rarg1, c_rarg0, __ post(sp, -2 * wordSize)); 685 } else { 686 __ mov(c_rarg1, count); 687 __ mov(c_rarg0, addr); 688 } 689 } else { 690 __ mov(c_rarg0, addr); 691 __ mov(c_rarg1, count); 692 } 693 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2); 694 __ pop(RegSet::range(r0, r29), sp); // integer registers except lr & sp } 695 break; 696 case BarrierSet::CardTableModRef: 697 case BarrierSet::CardTableExtension: 698 case BarrierSet::ModRef: 699 break; 700 default: 701 ShouldNotReachHere(); 702 703 } 704 } 705 } 706 707 // 708 // Generate code for an array write post barrier 709 // 710 // Input: 711 // start - register containing starting address of destination array 712 // end - register containing ending address of destination array 713 // scratch - scratch register 714 // 715 // The input registers are overwritten. 716 // The ending address is inclusive. 717 void gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) { 718 assert_different_registers(start, end, scratch); 719 BarrierSet* bs = Universe::heap()->barrier_set(); 720 switch (bs->kind()) { 721 case BarrierSet::G1SATBCTLogging: 722 723 { 724 __ push(RegSet::range(r0, r29), sp); // integer registers except lr & sp 725 // must compute element count unless barrier set interface is changed (other platforms supply count) 726 assert_different_registers(start, end, scratch); 727 __ lea(scratch, Address(end, BytesPerHeapOop)); 728 __ sub(scratch, scratch, start); // subtract start to get #bytes 729 __ lsr(scratch, scratch, LogBytesPerHeapOop); // convert to element count 730 __ mov(c_rarg0, start); 731 __ mov(c_rarg1, scratch); 732 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2); 733 __ pop(RegSet::range(r0, r29), sp); // integer registers except lr & sp } 734 } 735 break; 736 case BarrierSet::CardTableModRef: 737 case BarrierSet::CardTableExtension: 738 { 739 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 740 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 741 742 Label L_loop; 743 744 __ lsr(start, start, CardTableModRefBS::card_shift); 745 __ lsr(end, end, CardTableModRefBS::card_shift); 746 __ sub(end, end, start); // number of bytes to copy 747 748 const Register count = end; // 'end' register contains bytes count now 749 __ mov(scratch, (address)ct->byte_map_base); 750 __ add(start, start, scratch); 751 __ BIND(L_loop); 752 __ strb(zr, Address(start, count)); 753 __ subs(count, count, 1); 754 __ br(Assembler::HS, L_loop); 755 } 756 break; 757 default: 758 ShouldNotReachHere(); 759 760 } 761 } 762 763 typedef enum { 764 copy_forwards = 1, 765 copy_backwards = -1 766 } copy_direction; 767 768 // Bulk copy of blocks of 8 words. 769 // 770 // count is a count of words. 771 // 772 // Precondition: count >= 2 773 // 774 // Postconditions: 775 // 776 // The least significant bit of count contains the remaining count 777 // of words to copy. The rest of count is trash. 778 // 779 // s and d are adjusted to point to the remaining words to copy 780 // 781 void generate_copy_longs(Label &start, Register s, Register d, Register count, 782 copy_direction direction) { 783 int unit = wordSize * direction; 784 785 int offset; 786 const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6, 787 t4 = r7, t5 = r10, t6 = r11, t7 = r12; 788 789 assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7); 790 assert_different_registers(s, d, count, rscratch1); 791 792 Label again, large, small; 793 __ align(6); 794 __ bind(start); 795 __ cmp(count, 8); 796 __ br(Assembler::LO, small); 797 if (direction == copy_forwards) { 798 __ sub(s, s, 2 * wordSize); 799 __ sub(d, d, 2 * wordSize); 800 } 801 __ subs(count, count, 16); 802 __ br(Assembler::GE, large); 803 804 // 8 <= count < 16 words. Copy 8. 805 __ ldp(t0, t1, Address(s, 2 * unit)); 806 __ ldp(t2, t3, Address(s, 4 * unit)); 807 __ ldp(t4, t5, Address(s, 6 * unit)); 808 __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); 809 810 __ stp(t0, t1, Address(d, 2 * unit)); 811 __ stp(t2, t3, Address(d, 4 * unit)); 812 __ stp(t4, t5, Address(d, 6 * unit)); 813 __ stp(t6, t7, Address(__ pre(d, 8 * unit))); 814 815 if (direction == copy_forwards) { 816 __ add(s, s, 2 * wordSize); 817 __ add(d, d, 2 * wordSize); 818 } 819 820 { 821 Label L1, L2; 822 __ bind(small); 823 __ tbz(count, exact_log2(4), L1); 824 __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); 825 __ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); 826 __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); 827 __ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); 828 __ bind(L1); 829 830 __ tbz(count, 1, L2); 831 __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); 832 __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); 833 __ bind(L2); 834 } 835 836 __ ret(lr); 837 838 __ align(6); 839 __ bind(large); 840 841 // Fill 8 registers 842 __ ldp(t0, t1, Address(s, 2 * unit)); 843 __ ldp(t2, t3, Address(s, 4 * unit)); 844 __ ldp(t4, t5, Address(s, 6 * unit)); 845 __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); 846 847 __ bind(again); 848 849 if (direction == copy_forwards && PrefetchCopyIntervalInBytes > 0) 850 __ prfm(Address(s, PrefetchCopyIntervalInBytes), PLDL1KEEP); 851 852 __ stp(t0, t1, Address(d, 2 * unit)); 853 __ ldp(t0, t1, Address(s, 2 * unit)); 854 __ stp(t2, t3, Address(d, 4 * unit)); 855 __ ldp(t2, t3, Address(s, 4 * unit)); 856 __ stp(t4, t5, Address(d, 6 * unit)); 857 __ ldp(t4, t5, Address(s, 6 * unit)); 858 __ stp(t6, t7, Address(__ pre(d, 8 * unit))); 859 __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); 860 861 __ subs(count, count, 8); 862 __ br(Assembler::HS, again); 863 864 // Drain 865 __ stp(t0, t1, Address(d, 2 * unit)); 866 __ stp(t2, t3, Address(d, 4 * unit)); 867 __ stp(t4, t5, Address(d, 6 * unit)); 868 __ stp(t6, t7, Address(__ pre(d, 8 * unit))); 869 870 if (direction == copy_forwards) { 871 __ add(s, s, 2 * wordSize); 872 __ add(d, d, 2 * wordSize); 873 } 874 875 { 876 Label L1, L2; 877 __ tbz(count, exact_log2(4), L1); 878 __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); 879 __ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); 880 __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); 881 __ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); 882 __ bind(L1); 883 884 __ tbz(count, 1, L2); 885 __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); 886 __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); 887 __ bind(L2); 888 } 889 890 __ ret(lr); 891 } 892 893 // Small copy: less than 16 bytes. 894 // 895 // NB: Ignores all of the bits of count which represent more than 15 896 // bytes, so a caller doesn't have to mask them. 897 898 void copy_memory_small(Register s, Register d, Register count, Register tmp, int step) { 899 bool is_backwards = step < 0; 900 size_t granularity = uabs(step); 901 int direction = is_backwards ? -1 : 1; 902 int unit = wordSize * direction; 903 904 Label Lpair, Lword, Lint, Lshort, Lbyte; 905 906 assert(granularity 907 && granularity <= sizeof (jlong), "Impossible granularity in copy_memory_small"); 908 909 const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6; 910 911 // ??? I don't know if this bit-test-and-branch is the right thing 912 // to do. It does a lot of jumping, resulting in several 913 // mispredicted branches. It might make more sense to do this 914 // with something like Duff's device with a single computed branch. 915 916 __ tbz(count, 3 - exact_log2(granularity), Lword); 917 __ ldr(tmp, Address(__ adjust(s, unit, is_backwards))); 918 __ str(tmp, Address(__ adjust(d, unit, is_backwards))); 919 __ bind(Lword); 920 921 if (granularity <= sizeof (jint)) { 922 __ tbz(count, 2 - exact_log2(granularity), Lint); 923 __ ldrw(tmp, Address(__ adjust(s, sizeof (jint) * direction, is_backwards))); 924 __ strw(tmp, Address(__ adjust(d, sizeof (jint) * direction, is_backwards))); 925 __ bind(Lint); 926 } 927 928 if (granularity <= sizeof (jshort)) { 929 __ tbz(count, 1 - exact_log2(granularity), Lshort); 930 __ ldrh(tmp, Address(__ adjust(s, sizeof (jshort) * direction, is_backwards))); 931 __ strh(tmp, Address(__ adjust(d, sizeof (jshort) * direction, is_backwards))); 932 __ bind(Lshort); 933 } 934 935 if (granularity <= sizeof (jbyte)) { 936 __ tbz(count, 0, Lbyte); 937 __ ldrb(tmp, Address(__ adjust(s, sizeof (jbyte) * direction, is_backwards))); 938 __ strb(tmp, Address(__ adjust(d, sizeof (jbyte) * direction, is_backwards))); 939 __ bind(Lbyte); 940 } 941 } 942 943 Label copy_f, copy_b; 944 945 // All-singing all-dancing memory copy. 946 // 947 // Copy count units of memory from s to d. The size of a unit is 948 // step, which can be positive or negative depending on the direction 949 // of copy. If is_aligned is false, we align the source address. 950 // 951 952 void copy_memory(bool is_aligned, Register s, Register d, 953 Register count, Register tmp, int step) { 954 copy_direction direction = step < 0 ? copy_backwards : copy_forwards; 955 bool is_backwards = step < 0; 956 int granularity = uabs(step); 957 const Register t0 = r3, t1 = r4; 958 959 if (is_backwards) { 960 __ lea(s, Address(s, count, Address::uxtw(exact_log2(-step)))); 961 __ lea(d, Address(d, count, Address::uxtw(exact_log2(-step)))); 962 } 963 964 Label done, tail; 965 966 __ cmp(count, 16/granularity); 967 __ br(Assembler::LO, tail); 968 969 // Now we've got the small case out of the way we can align the 970 // source address on a 2-word boundary. 971 972 Label aligned; 973 974 if (is_aligned) { 975 // We may have to adjust by 1 word to get s 2-word-aligned. 976 __ tbz(s, exact_log2(wordSize), aligned); 977 __ ldr(tmp, Address(__ adjust(s, direction * wordSize, is_backwards))); 978 __ str(tmp, Address(__ adjust(d, direction * wordSize, is_backwards))); 979 __ sub(count, count, wordSize/granularity); 980 } else { 981 if (is_backwards) { 982 __ andr(rscratch2, s, 2 * wordSize - 1); 983 } else { 984 __ neg(rscratch2, s); 985 __ andr(rscratch2, rscratch2, 2 * wordSize - 1); 986 } 987 // rscratch2 is the byte adjustment needed to align s. 988 __ cbz(rscratch2, aligned); 989 __ lsr(rscratch2, rscratch2, exact_log2(granularity)); 990 __ sub(count, count, rscratch2); 991 992 #if 0 993 // ?? This code is only correct for a disjoint copy. It may or 994 // may not make sense to use it in that case. 995 996 // Copy the first pair; s and d may not be aligned. 997 __ ldp(t0, t1, Address(s, is_backwards ? -2 * wordSize : 0)); 998 __ stp(t0, t1, Address(d, is_backwards ? -2 * wordSize : 0)); 999 1000 // Align s and d, adjust count 1001 if (is_backwards) { 1002 __ sub(s, s, rscratch2); 1003 __ sub(d, d, rscratch2); 1004 } else { 1005 __ add(s, s, rscratch2); 1006 __ add(d, d, rscratch2); 1007 } 1008 #else 1009 copy_memory_small(s, d, rscratch2, rscratch1, step); 1010 #endif 1011 } 1012 1013 __ cmp(count, 16/granularity); 1014 __ br(Assembler::LT, tail); 1015 __ bind(aligned); 1016 1017 // s is now 2-word-aligned. 1018 1019 // We have a count of units and some trailing bytes. Adjust the 1020 // count and do a bulk copy of words. 1021 __ lsr(rscratch2, count, exact_log2(wordSize/granularity)); 1022 if (direction == copy_forwards) 1023 __ bl(copy_f); 1024 else 1025 __ bl(copy_b); 1026 1027 // And the tail. 1028 1029 __ bind(tail); 1030 copy_memory_small(s, d, count, tmp, step); 1031 } 1032 1033 1034 void clobber_registers() { 1035 #ifdef ASSERT 1036 __ mov(rscratch1, (uint64_t)0xdeadbeef); 1037 __ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32); 1038 for (Register r = r3; r <= r18; r++) 1039 if (r != rscratch1) __ mov(r, rscratch1); 1040 #endif 1041 } 1042 1043 // Scan over array at a for count oops, verifying each one. 1044 // Preserves a and count, clobbers rscratch1 and rscratch2. 1045 void verify_oop_array (size_t size, Register a, Register count, Register temp) { 1046 Label loop, end; 1047 __ mov(rscratch1, a); 1048 __ mov(rscratch2, zr); 1049 __ bind(loop); 1050 __ cmp(rscratch2, count); 1051 __ br(Assembler::HS, end); 1052 if (size == (size_t)wordSize) { 1053 __ ldr(temp, Address(a, rscratch2, Address::uxtw(exact_log2(size)))); 1054 __ verify_oop(temp); 1055 } else { 1056 __ ldrw(r16, Address(a, rscratch2, Address::uxtw(exact_log2(size)))); 1057 __ decode_heap_oop(temp); // calls verify_oop 1058 } 1059 __ add(rscratch2, rscratch2, size); 1060 __ b(loop); 1061 __ bind(end); 1062 } 1063 1064 // Arguments: 1065 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1066 // ignored 1067 // is_oop - true => oop array, so generate store check code 1068 // name - stub name string 1069 // 1070 // Inputs: 1071 // c_rarg0 - source array address 1072 // c_rarg1 - destination array address 1073 // c_rarg2 - element count, treated as ssize_t, can be zero 1074 // 1075 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1076 // the hardware handle it. The two dwords within qwords that span 1077 // cache line boundaries will still be loaded and stored atomicly. 1078 // 1079 // Side Effects: 1080 // disjoint_int_copy_entry is set to the no-overlap entry point 1081 // used by generate_conjoint_int_oop_copy(). 1082 // 1083 address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry, 1084 const char *name, bool dest_uninitialized = false) { 1085 Register s = c_rarg0, d = c_rarg1, count = c_rarg2; 1086 __ align(CodeEntryAlignment); 1087 StubCodeMark mark(this, "StubRoutines", name); 1088 address start = __ pc(); 1089 if (entry != NULL) { 1090 *entry = __ pc(); 1091 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1092 BLOCK_COMMENT("Entry:"); 1093 } 1094 __ enter(); 1095 if (is_oop) { 1096 __ push(RegSet::of(d, count), sp); 1097 // no registers are destroyed by this call 1098 gen_write_ref_array_pre_barrier(d, count, dest_uninitialized); 1099 } 1100 copy_memory(aligned, s, d, count, rscratch1, size); 1101 if (is_oop) { 1102 __ pop(RegSet::of(d, count), sp); 1103 if (VerifyOops) 1104 verify_oop_array(size, d, count, r16); 1105 __ sub(count, count, 1); // make an inclusive end pointer 1106 __ lea(count, Address(d, count, Address::uxtw(exact_log2(size)))); 1107 gen_write_ref_array_post_barrier(d, count, rscratch1); 1108 } 1109 __ leave(); 1110 __ ret(lr); 1111 #ifdef BUILTIN_SIM 1112 { 1113 AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck); 1114 sim->notifyCompile(const_cast<char*>(name), start); 1115 } 1116 #endif 1117 return start; 1118 } 1119 1120 // Arguments: 1121 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1122 // ignored 1123 // is_oop - true => oop array, so generate store check code 1124 // name - stub name string 1125 // 1126 // Inputs: 1127 // c_rarg0 - source array address 1128 // c_rarg1 - destination array address 1129 // c_rarg2 - element count, treated as ssize_t, can be zero 1130 // 1131 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1132 // the hardware handle it. The two dwords within qwords that span 1133 // cache line boundaries will still be loaded and stored atomicly. 1134 // 1135 address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, 1136 address *entry, const char *name, 1137 bool dest_uninitialized = false) { 1138 Register s = c_rarg0, d = c_rarg1, count = c_rarg2; 1139 1140 StubCodeMark mark(this, "StubRoutines", name); 1141 address start = __ pc(); 1142 1143 __ cmp(d, s); 1144 __ br(Assembler::LS, nooverlap_target); 1145 1146 __ enter(); 1147 if (is_oop) { 1148 __ push(RegSet::of(d, count), sp); 1149 // no registers are destroyed by this call 1150 gen_write_ref_array_pre_barrier(d, count, dest_uninitialized); 1151 } 1152 copy_memory(aligned, s, d, count, rscratch1, -size); 1153 if (is_oop) { 1154 __ pop(RegSet::of(d, count), sp); 1155 if (VerifyOops) 1156 verify_oop_array(size, d, count, r16); 1157 __ sub(count, count, 1); // make an inclusive end pointer 1158 __ lea(count, Address(d, count, Address::uxtw(exact_log2(size)))); 1159 gen_write_ref_array_post_barrier(d, count, rscratch1); 1160 } 1161 __ leave(); 1162 __ ret(lr); 1163 #ifdef BUILTIN_SIM 1164 { 1165 AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck); 1166 sim->notifyCompile(const_cast<char*>(name), start); 1167 } 1168 #endif 1169 return start; 1170 } 1171 1172 // Arguments: 1173 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1174 // ignored 1175 // name - stub name string 1176 // 1177 // Inputs: 1178 // c_rarg0 - source array address 1179 // c_rarg1 - destination array address 1180 // c_rarg2 - element count, treated as ssize_t, can be zero 1181 // 1182 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1183 // we let the hardware handle it. The one to eight bytes within words, 1184 // dwords or qwords that span cache line boundaries will still be loaded 1185 // and stored atomically. 1186 // 1187 // Side Effects: 1188 // disjoint_byte_copy_entry is set to the no-overlap entry point // 1189 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1190 // we let the hardware handle it. The one to eight bytes within words, 1191 // dwords or qwords that span cache line boundaries will still be loaded 1192 // and stored atomically. 1193 // 1194 // Side Effects: 1195 // disjoint_byte_copy_entry is set to the no-overlap entry point 1196 // used by generate_conjoint_byte_copy(). 1197 // 1198 address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { 1199 const bool not_oop = false; 1200 return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name); 1201 } 1202 1203 // Arguments: 1204 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1205 // ignored 1206 // name - stub name string 1207 // 1208 // Inputs: 1209 // c_rarg0 - source array address 1210 // c_rarg1 - destination array address 1211 // c_rarg2 - element count, treated as ssize_t, can be zero 1212 // 1213 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1214 // we let the hardware handle it. The one to eight bytes within words, 1215 // dwords or qwords that span cache line boundaries will still be loaded 1216 // and stored atomically. 1217 // 1218 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, 1219 address* entry, const char *name) { 1220 const bool not_oop = false; 1221 return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name); 1222 } 1223 1224 // Arguments: 1225 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1226 // ignored 1227 // name - stub name string 1228 // 1229 // Inputs: 1230 // c_rarg0 - source array address 1231 // c_rarg1 - destination array address 1232 // c_rarg2 - element count, treated as ssize_t, can be zero 1233 // 1234 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1235 // let the hardware handle it. The two or four words within dwords 1236 // or qwords that span cache line boundaries will still be loaded 1237 // and stored atomically. 1238 // 1239 // Side Effects: 1240 // disjoint_short_copy_entry is set to the no-overlap entry point 1241 // used by generate_conjoint_short_copy(). 1242 // 1243 address generate_disjoint_short_copy(bool aligned, 1244 address* entry, const char *name) { 1245 const bool not_oop = false; 1246 return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name); 1247 } 1248 1249 // Arguments: 1250 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1251 // ignored 1252 // name - stub name string 1253 // 1254 // Inputs: 1255 // c_rarg0 - source array address 1256 // c_rarg1 - destination array address 1257 // c_rarg2 - element count, treated as ssize_t, can be zero 1258 // 1259 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1260 // let the hardware handle it. The two or four words within dwords 1261 // or qwords that span cache line boundaries will still be loaded 1262 // and stored atomically. 1263 // 1264 address generate_conjoint_short_copy(bool aligned, address nooverlap_target, 1265 address *entry, const char *name) { 1266 const bool not_oop = false; 1267 return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name); 1268 1269 } 1270 // Arguments: 1271 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1272 // ignored 1273 // name - stub name string 1274 // 1275 // Inputs: 1276 // c_rarg0 - source array address 1277 // c_rarg1 - destination array address 1278 // c_rarg2 - element count, treated as ssize_t, can be zero 1279 // 1280 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1281 // the hardware handle it. The two dwords within qwords that span 1282 // cache line boundaries will still be loaded and stored atomicly. 1283 // 1284 // Side Effects: 1285 // disjoint_int_copy_entry is set to the no-overlap entry point 1286 // used by generate_conjoint_int_oop_copy(). 1287 // 1288 address generate_disjoint_int_copy(bool aligned, address *entry, 1289 const char *name, bool dest_uninitialized = false) { 1290 const bool not_oop = false; 1291 return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name); 1292 } 1293 1294 // Arguments: 1295 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1296 // ignored 1297 // name - stub name string 1298 // 1299 // Inputs: 1300 // c_rarg0 - source array address 1301 // c_rarg1 - destination array address 1302 // c_rarg2 - element count, treated as ssize_t, can be zero 1303 // 1304 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1305 // the hardware handle it. The two dwords within qwords that span 1306 // cache line boundaries will still be loaded and stored atomicly. 1307 // 1308 address generate_conjoint_int_copy(bool aligned, address nooverlap_target, 1309 address *entry, const char *name, 1310 bool dest_uninitialized = false) { 1311 const bool not_oop = false; 1312 return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name); 1313 } 1314 1315 1316 // Arguments: 1317 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 1318 // ignored 1319 // name - stub name string 1320 // 1321 // Inputs: 1322 // c_rarg0 - source array address 1323 // c_rarg1 - destination array address 1324 // c_rarg2 - element count, treated as size_t, can be zero 1325 // 1326 // Side Effects: 1327 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the 1328 // no-overlap entry point used by generate_conjoint_long_oop_copy(). 1329 // 1330 address generate_disjoint_long_copy(bool aligned, address *entry, 1331 const char *name, bool dest_uninitialized = false) { 1332 const bool not_oop = false; 1333 return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name); 1334 } 1335 1336 // Arguments: 1337 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 1338 // ignored 1339 // name - stub name string 1340 // 1341 // Inputs: 1342 // c_rarg0 - source array address 1343 // c_rarg1 - destination array address 1344 // c_rarg2 - element count, treated as size_t, can be zero 1345 // 1346 address generate_conjoint_long_copy(bool aligned, 1347 address nooverlap_target, address *entry, 1348 const char *name, bool dest_uninitialized = false) { 1349 const bool not_oop = false; 1350 return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name); 1351 } 1352 1353 // Arguments: 1354 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 1355 // ignored 1356 // name - stub name string 1357 // 1358 // Inputs: 1359 // c_rarg0 - source array address 1360 // c_rarg1 - destination array address 1361 // c_rarg2 - element count, treated as size_t, can be zero 1362 // 1363 // Side Effects: 1364 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the 1365 // no-overlap entry point used by generate_conjoint_long_oop_copy(). 1366 // 1367 address generate_disjoint_oop_copy(bool aligned, address *entry, 1368 const char *name, bool dest_uninitialized = false) { 1369 const bool is_oop = true; 1370 const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); 1371 return generate_disjoint_copy(size, aligned, is_oop, entry, name); 1372 } 1373 1374 // Arguments: 1375 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 1376 // ignored 1377 // name - stub name string 1378 // 1379 // Inputs: 1380 // c_rarg0 - source array address 1381 // c_rarg1 - destination array address 1382 // c_rarg2 - element count, treated as size_t, can be zero 1383 // 1384 address generate_conjoint_oop_copy(bool aligned, 1385 address nooverlap_target, address *entry, 1386 const char *name, bool dest_uninitialized = false) { 1387 const bool is_oop = true; 1388 const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); 1389 return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, name); 1390 } 1391 1392 1393 // Helper for generating a dynamic type check. 1394 // Smashes rscratch1. 1395 void generate_type_check(Register sub_klass, 1396 Register super_check_offset, 1397 Register super_klass, 1398 Label& L_success) { 1399 assert_different_registers(sub_klass, super_check_offset, super_klass); 1400 1401 BLOCK_COMMENT("type_check:"); 1402 1403 Label L_miss; 1404 1405 __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, 1406 super_check_offset); 1407 __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); 1408 1409 // Fall through on failure! 1410 __ BIND(L_miss); 1411 } 1412 1413 // 1414 // Generate checkcasting array copy stub 1415 // 1416 // Input: 1417 // c_rarg0 - source array address 1418 // c_rarg1 - destination array address 1419 // c_rarg2 - element count, treated as ssize_t, can be zero 1420 // c_rarg3 - size_t ckoff (super_check_offset) 1421 // c_rarg4 - oop ckval (super_klass) 1422 // 1423 // Output: 1424 // r0 == 0 - success 1425 // r0 == -1^K - failure, where K is partial transfer count 1426 // 1427 address generate_checkcast_copy(const char *name, address *entry, 1428 bool dest_uninitialized = false) { 1429 1430 Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; 1431 1432 // Input registers (after setup_arg_regs) 1433 const Register from = c_rarg0; // source array address 1434 const Register to = c_rarg1; // destination array address 1435 const Register count = c_rarg2; // elementscount 1436 const Register ckoff = c_rarg3; // super_check_offset 1437 const Register ckval = c_rarg4; // super_klass 1438 1439 // Registers used as temps (r18, r19, r20 are save-on-entry) 1440 const Register count_save = r21; // orig elementscount 1441 const Register start_to = r20; // destination array start address 1442 const Register copied_oop = r18; // actual oop copied 1443 const Register r19_klass = r19; // oop._klass 1444 1445 //--------------------------------------------------------------- 1446 // Assembler stub will be used for this call to arraycopy 1447 // if the two arrays are subtypes of Object[] but the 1448 // destination array type is not equal to or a supertype 1449 // of the source type. Each element must be separately 1450 // checked. 1451 1452 assert_different_registers(from, to, count, ckoff, ckval, start_to, 1453 copied_oop, r19_klass, count_save); 1454 1455 __ align(CodeEntryAlignment); 1456 StubCodeMark mark(this, "StubRoutines", name); 1457 address start = __ pc(); 1458 1459 __ enter(); // required for proper stackwalking of RuntimeStub frame 1460 1461 #ifdef ASSERT 1462 // caller guarantees that the arrays really are different 1463 // otherwise, we would have to make conjoint checks 1464 { Label L; 1465 array_overlap_test(L, TIMES_OOP); 1466 __ stop("checkcast_copy within a single array"); 1467 __ bind(L); 1468 } 1469 #endif //ASSERT 1470 1471 // Caller of this entry point must set up the argument registers. 1472 if (entry != NULL) { 1473 *entry = __ pc(); 1474 BLOCK_COMMENT("Entry:"); 1475 } 1476 1477 // Empty array: Nothing to do. 1478 __ cbz(count, L_done); 1479 1480 __ push(RegSet::of(r18, r19, r20, r21), sp); 1481 1482 #ifdef ASSERT 1483 BLOCK_COMMENT("assert consistent ckoff/ckval"); 1484 // The ckoff and ckval must be mutually consistent, 1485 // even though caller generates both. 1486 { Label L; 1487 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 1488 __ ldrw(start_to, Address(ckval, sco_offset)); 1489 __ cmpw(ckoff, start_to); 1490 __ br(Assembler::EQ, L); 1491 __ stop("super_check_offset inconsistent"); 1492 __ bind(L); 1493 } 1494 #endif //ASSERT 1495 1496 // save the original count 1497 __ mov(count_save, count); 1498 1499 // Copy from low to high addresses 1500 __ mov(start_to, to); // Save destination array start address 1501 __ b(L_load_element); 1502 1503 // ======== begin loop ======== 1504 // (Loop is rotated; its entry is L_load_element.) 1505 // Loop control: 1506 // for (; count != 0; count--) { 1507 // copied_oop = load_heap_oop(from++); 1508 // ... generate_type_check ...; 1509 // store_heap_oop(to++, copied_oop); 1510 // } 1511 __ align(OptoLoopAlignment); 1512 1513 __ BIND(L_store_element); 1514 __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop); // store the oop 1515 __ sub(count, count, 1); 1516 __ cbz(count, L_do_card_marks); 1517 1518 // ======== loop entry is here ======== 1519 __ BIND(L_load_element); 1520 __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8)); // load the oop 1521 __ cbz(copied_oop, L_store_element); 1522 1523 __ load_klass(r19_klass, copied_oop);// query the object klass 1524 generate_type_check(r19_klass, ckoff, ckval, L_store_element); 1525 // ======== end loop ======== 1526 1527 // It was a real error; we must depend on the caller to finish the job. 1528 // Register count = remaining oops, count_orig = total oops. 1529 // Emit GC store barriers for the oops we have copied and report 1530 // their number to the caller. 1531 1532 __ subs(count, count_save, count); // K = partially copied oop count 1533 __ eon(count, count, zr); // report (-1^K) to caller 1534 __ br(Assembler::EQ, L_done_pop); 1535 1536 __ BIND(L_do_card_marks); 1537 __ add(to, to, -heapOopSize); // make an inclusive end pointer 1538 gen_write_ref_array_post_barrier(start_to, to, rscratch1); 1539 1540 __ bind(L_done_pop); 1541 __ pop(RegSet::of(r18, r19, r20, r21), sp); 1542 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); 1543 1544 __ bind(L_done); 1545 __ mov(r0, count); 1546 __ leave(); 1547 __ ret(lr); 1548 1549 return start; 1550 } 1551 1552 // Perform range checks on the proposed arraycopy. 1553 // Kills temp, but nothing else. 1554 // Also, clean the sign bits of src_pos and dst_pos. 1555 void arraycopy_range_checks(Register src, // source array oop (c_rarg0) 1556 Register src_pos, // source position (c_rarg1) 1557 Register dst, // destination array oo (c_rarg2) 1558 Register dst_pos, // destination position (c_rarg3) 1559 Register length, 1560 Register temp, 1561 Label& L_failed) { Unimplemented(); } 1562 1563 // These stubs get called from some dumb test routine. 1564 // I'll write them properly when they're called from 1565 // something that's actually doing something. 1566 static void fake_arraycopy_stub(address src, address dst, int count) { 1567 assert(count == 0, "huh?"); 1568 } 1569 1570 1571 void generate_arraycopy_stubs() { 1572 address entry; 1573 address entry_jbyte_arraycopy; 1574 address entry_jshort_arraycopy; 1575 address entry_jint_arraycopy; 1576 address entry_oop_arraycopy; 1577 address entry_jlong_arraycopy; 1578 address entry_checkcast_arraycopy; 1579 1580 generate_copy_longs(copy_f, r0, r1, rscratch2, copy_forwards); 1581 generate_copy_longs(copy_b, r0, r1, rscratch2, copy_backwards); 1582 1583 //*** jbyte 1584 // Always need aligned and unaligned versions 1585 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, 1586 "jbyte_disjoint_arraycopy"); 1587 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, 1588 &entry_jbyte_arraycopy, 1589 "jbyte_arraycopy"); 1590 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry, 1591 "arrayof_jbyte_disjoint_arraycopy"); 1592 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL, 1593 "arrayof_jbyte_arraycopy"); 1594 1595 //*** jshort 1596 // Always need aligned and unaligned versions 1597 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, 1598 "jshort_disjoint_arraycopy"); 1599 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, 1600 &entry_jshort_arraycopy, 1601 "jshort_arraycopy"); 1602 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, 1603 "arrayof_jshort_disjoint_arraycopy"); 1604 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, 1605 "arrayof_jshort_arraycopy"); 1606 1607 //*** jint 1608 // Aligned versions 1609 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, 1610 "arrayof_jint_disjoint_arraycopy"); 1611 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, 1612 "arrayof_jint_arraycopy"); 1613 // In 64 bit we need both aligned and unaligned versions of jint arraycopy. 1614 // entry_jint_arraycopy always points to the unaligned version 1615 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, 1616 "jint_disjoint_arraycopy"); 1617 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, 1618 &entry_jint_arraycopy, 1619 "jint_arraycopy"); 1620 1621 //*** jlong 1622 // It is always aligned 1623 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, 1624 "arrayof_jlong_disjoint_arraycopy"); 1625 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, 1626 "arrayof_jlong_arraycopy"); 1627 StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; 1628 StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; 1629 1630 //*** oops 1631 { 1632 // With compressed oops we need unaligned versions; notice that 1633 // we overwrite entry_oop_arraycopy. 1634 bool aligned = !UseCompressedOops; 1635 1636 StubRoutines::_arrayof_oop_disjoint_arraycopy 1637 = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy"); 1638 StubRoutines::_arrayof_oop_arraycopy 1639 = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy"); 1640 // Aligned versions without pre-barriers 1641 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit 1642 = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit", 1643 /*dest_uninitialized*/true); 1644 StubRoutines::_arrayof_oop_arraycopy_uninit 1645 = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit", 1646 /*dest_uninitialized*/true); 1647 } 1648 1649 StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; 1650 StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; 1651 StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; 1652 StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; 1653 1654 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); 1655 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, 1656 /*dest_uninitialized*/true); 1657 } 1658 1659 void generate_math_stubs() { Unimplemented(); } 1660 1661 // Arguments: 1662 // 1663 // Inputs: 1664 // c_rarg0 - source byte array address 1665 // c_rarg1 - destination byte array address 1666 // c_rarg2 - K (key) in little endian int array 1667 // 1668 address generate_aescrypt_encryptBlock() { 1669 __ align(CodeEntryAlignment); 1670 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); 1671 1672 Label L_doLast; 1673 1674 const Register from = c_rarg0; // source array address 1675 const Register to = c_rarg1; // destination array address 1676 const Register key = c_rarg2; // key array address 1677 const Register keylen = rscratch1; 1678 1679 address start = __ pc(); 1680 __ enter(); 1681 1682 __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1683 1684 __ ld1(v0, __ T16B, from); // get 16 bytes of input 1685 1686 __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); 1687 __ rev32(v1, __ T16B, v1); 1688 __ rev32(v2, __ T16B, v2); 1689 __ rev32(v3, __ T16B, v3); 1690 __ rev32(v4, __ T16B, v4); 1691 __ aese(v0, v1); 1692 __ aesmc(v0, v0); 1693 __ aese(v0, v2); 1694 __ aesmc(v0, v0); 1695 __ aese(v0, v3); 1696 __ aesmc(v0, v0); 1697 __ aese(v0, v4); 1698 __ aesmc(v0, v0); 1699 1700 __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); 1701 __ rev32(v1, __ T16B, v1); 1702 __ rev32(v2, __ T16B, v2); 1703 __ rev32(v3, __ T16B, v3); 1704 __ rev32(v4, __ T16B, v4); 1705 __ aese(v0, v1); 1706 __ aesmc(v0, v0); 1707 __ aese(v0, v2); 1708 __ aesmc(v0, v0); 1709 __ aese(v0, v3); 1710 __ aesmc(v0, v0); 1711 __ aese(v0, v4); 1712 __ aesmc(v0, v0); 1713 1714 __ ld1(v1, v2, __ T16B, __ post(key, 32)); 1715 __ rev32(v1, __ T16B, v1); 1716 __ rev32(v2, __ T16B, v2); 1717 1718 __ cmpw(keylen, 44); 1719 __ br(Assembler::EQ, L_doLast); 1720 1721 __ aese(v0, v1); 1722 __ aesmc(v0, v0); 1723 __ aese(v0, v2); 1724 __ aesmc(v0, v0); 1725 1726 __ ld1(v1, v2, __ T16B, __ post(key, 32)); 1727 __ rev32(v1, __ T16B, v1); 1728 __ rev32(v2, __ T16B, v2); 1729 1730 __ cmpw(keylen, 52); 1731 __ br(Assembler::EQ, L_doLast); 1732 1733 __ aese(v0, v1); 1734 __ aesmc(v0, v0); 1735 __ aese(v0, v2); 1736 __ aesmc(v0, v0); 1737 1738 __ ld1(v1, v2, __ T16B, __ post(key, 32)); 1739 __ rev32(v1, __ T16B, v1); 1740 __ rev32(v2, __ T16B, v2); 1741 1742 __ BIND(L_doLast); 1743 1744 __ aese(v0, v1); 1745 __ aesmc(v0, v0); 1746 __ aese(v0, v2); 1747 1748 __ ld1(v1, __ T16B, key); 1749 __ rev32(v1, __ T16B, v1); 1750 __ eor(v0, __ T16B, v0, v1); 1751 1752 __ st1(v0, __ T16B, to); 1753 1754 __ mov(r0, 0); 1755 1756 __ leave(); 1757 __ ret(lr); 1758 1759 return start; 1760 } 1761 1762 // Arguments: 1763 // 1764 // Inputs: 1765 // c_rarg0 - source byte array address 1766 // c_rarg1 - destination byte array address 1767 // c_rarg2 - K (key) in little endian int array 1768 // 1769 address generate_aescrypt_decryptBlock() { 1770 assert(UseAES, "need AES instructions and misaligned SSE support"); 1771 __ align(CodeEntryAlignment); 1772 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); 1773 Label L_doLast; 1774 1775 const Register from = c_rarg0; // source array address 1776 const Register to = c_rarg1; // destination array address 1777 const Register key = c_rarg2; // key array address 1778 const Register keylen = rscratch1; 1779 1780 address start = __ pc(); 1781 __ enter(); // required for proper stackwalking of RuntimeStub frame 1782 1783 __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1784 1785 __ ld1(v0, __ T16B, from); // get 16 bytes of input 1786 1787 __ ld1(v5, __ T16B, __ post(key, 16)); 1788 __ rev32(v5, __ T16B, v5); 1789 1790 __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); 1791 __ rev32(v1, __ T16B, v1); 1792 __ rev32(v2, __ T16B, v2); 1793 __ rev32(v3, __ T16B, v3); 1794 __ rev32(v4, __ T16B, v4); 1795 __ aesd(v0, v1); 1796 __ aesimc(v0, v0); 1797 __ aesd(v0, v2); 1798 __ aesimc(v0, v0); 1799 __ aesd(v0, v3); 1800 __ aesimc(v0, v0); 1801 __ aesd(v0, v4); 1802 __ aesimc(v0, v0); 1803 1804 __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); 1805 __ rev32(v1, __ T16B, v1); 1806 __ rev32(v2, __ T16B, v2); 1807 __ rev32(v3, __ T16B, v3); 1808 __ rev32(v4, __ T16B, v4); 1809 __ aesd(v0, v1); 1810 __ aesimc(v0, v0); 1811 __ aesd(v0, v2); 1812 __ aesimc(v0, v0); 1813 __ aesd(v0, v3); 1814 __ aesimc(v0, v0); 1815 __ aesd(v0, v4); 1816 __ aesimc(v0, v0); 1817 1818 __ ld1(v1, v2, __ T16B, __ post(key, 32)); 1819 __ rev32(v1, __ T16B, v1); 1820 __ rev32(v2, __ T16B, v2); 1821 1822 __ cmpw(keylen, 44); 1823 __ br(Assembler::EQ, L_doLast); 1824 1825 __ aesd(v0, v1); 1826 __ aesimc(v0, v0); 1827 __ aesd(v0, v2); 1828 __ aesimc(v0, v0); 1829 1830 __ ld1(v1, v2, __ T16B, __ post(key, 32)); 1831 __ rev32(v1, __ T16B, v1); 1832 __ rev32(v2, __ T16B, v2); 1833 1834 __ cmpw(keylen, 52); 1835 __ br(Assembler::EQ, L_doLast); 1836 1837 __ aesd(v0, v1); 1838 __ aesimc(v0, v0); 1839 __ aesd(v0, v2); 1840 __ aesimc(v0, v0); 1841 1842 __ ld1(v1, v2, __ T16B, __ post(key, 32)); 1843 __ rev32(v1, __ T16B, v1); 1844 __ rev32(v2, __ T16B, v2); 1845 1846 __ BIND(L_doLast); 1847 1848 __ aesd(v0, v1); 1849 __ aesimc(v0, v0); 1850 __ aesd(v0, v2); 1851 1852 __ eor(v0, __ T16B, v0, v5); 1853 1854 __ st1(v0, __ T16B, to); 1855 1856 __ mov(r0, 0); 1857 1858 __ leave(); 1859 __ ret(lr); 1860 1861 return start; 1862 } 1863 1864 // Arguments: 1865 // 1866 // Inputs: 1867 // c_rarg0 - source byte array address 1868 // c_rarg1 - destination byte array address 1869 // c_rarg2 - K (key) in little endian int array 1870 // c_rarg3 - r vector byte array address 1871 // c_rarg4 - input length 1872 // 1873 // Output: 1874 // x0 - input length 1875 // 1876 address generate_cipherBlockChaining_encryptAESCrypt() { 1877 assert(UseAES, "need AES instructions and misaligned SSE support"); 1878 __ align(CodeEntryAlignment); 1879 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); 1880 1881 Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; 1882 1883 const Register from = c_rarg0; // source array address 1884 const Register to = c_rarg1; // destination array address 1885 const Register key = c_rarg2; // key array address 1886 const Register rvec = c_rarg3; // r byte array initialized from initvector array address 1887 // and left with the results of the last encryption block 1888 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) 1889 const Register keylen = rscratch1; 1890 1891 address start = __ pc(); 1892 __ enter(); 1893 1894 __ mov(rscratch1, len_reg); 1895 __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1896 1897 __ ld1(v0, __ T16B, rvec); 1898 1899 __ cmpw(keylen, 52); 1900 __ br(Assembler::CC, L_loadkeys_44); 1901 __ br(Assembler::EQ, L_loadkeys_52); 1902 1903 __ ld1(v17, v18, __ T16B, __ post(key, 32)); 1904 __ rev32(v17, __ T16B, v17); 1905 __ rev32(v18, __ T16B, v18); 1906 __ BIND(L_loadkeys_52); 1907 __ ld1(v19, v20, __ T16B, __ post(key, 32)); 1908 __ rev32(v19, __ T16B, v19); 1909 __ rev32(v20, __ T16B, v20); 1910 __ BIND(L_loadkeys_44); 1911 __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64)); 1912 __ rev32(v21, __ T16B, v21); 1913 __ rev32(v22, __ T16B, v22); 1914 __ rev32(v23, __ T16B, v23); 1915 __ rev32(v24, __ T16B, v24); 1916 __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64)); 1917 __ rev32(v25, __ T16B, v25); 1918 __ rev32(v26, __ T16B, v26); 1919 __ rev32(v27, __ T16B, v27); 1920 __ rev32(v28, __ T16B, v28); 1921 __ ld1(v29, v30, v31, __ T16B, key); 1922 __ rev32(v29, __ T16B, v29); 1923 __ rev32(v30, __ T16B, v30); 1924 __ rev32(v31, __ T16B, v31); 1925 1926 __ BIND(L_aes_loop); 1927 __ ld1(v1, __ T16B, __ post(from, 16)); 1928 __ eor(v0, __ T16B, v0, v1); 1929 1930 __ br(Assembler::CC, L_rounds_44); 1931 __ br(Assembler::EQ, L_rounds_52); 1932 1933 __ aese(v0, v17); __ aesmc(v0, v0); 1934 __ aese(v0, v18); __ aesmc(v0, v0); 1935 __ BIND(L_rounds_52); 1936 __ aese(v0, v19); __ aesmc(v0, v0); 1937 __ aese(v0, v20); __ aesmc(v0, v0); 1938 __ BIND(L_rounds_44); 1939 __ aese(v0, v21); __ aesmc(v0, v0); 1940 __ aese(v0, v22); __ aesmc(v0, v0); 1941 __ aese(v0, v23); __ aesmc(v0, v0); 1942 __ aese(v0, v24); __ aesmc(v0, v0); 1943 __ aese(v0, v25); __ aesmc(v0, v0); 1944 __ aese(v0, v26); __ aesmc(v0, v0); 1945 __ aese(v0, v27); __ aesmc(v0, v0); 1946 __ aese(v0, v28); __ aesmc(v0, v0); 1947 __ aese(v0, v29); __ aesmc(v0, v0); 1948 __ aese(v0, v30); 1949 __ eor(v0, __ T16B, v0, v31); 1950 1951 __ st1(v0, __ T16B, __ post(to, 16)); 1952 __ sub(len_reg, len_reg, 16); 1953 __ cbnz(len_reg, L_aes_loop); 1954 1955 __ st1(v0, __ T16B, rvec); 1956 1957 __ mov(r0, rscratch2); 1958 1959 __ leave(); 1960 __ ret(lr); 1961 1962 return start; 1963 } 1964 1965 // Arguments: 1966 // 1967 // Inputs: 1968 // c_rarg0 - source byte array address 1969 // c_rarg1 - destination byte array address 1970 // c_rarg2 - K (key) in little endian int array 1971 // c_rarg3 - r vector byte array address 1972 // c_rarg4 - input length 1973 // 1974 // Output: 1975 // rax - input length 1976 // 1977 address generate_cipherBlockChaining_decryptAESCrypt() { 1978 assert(UseAES, "need AES instructions and misaligned SSE support"); 1979 __ align(CodeEntryAlignment); 1980 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); 1981 1982 Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; 1983 1984 const Register from = c_rarg0; // source array address 1985 const Register to = c_rarg1; // destination array address 1986 const Register key = c_rarg2; // key array address 1987 const Register rvec = c_rarg3; // r byte array initialized from initvector array address 1988 // and left with the results of the last encryption block 1989 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) 1990 const Register keylen = rscratch1; 1991 1992 address start = __ pc(); 1993 __ enter(); 1994 1995 __ mov(rscratch2, len_reg); 1996 __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1997 1998 __ ld1(v2, __ T16B, rvec); 1999 2000 __ ld1(v31, __ T16B, __ post(key, 16)); 2001 __ rev32(v31, __ T16B, v31); 2002 2003 __ cmpw(keylen, 52); 2004 __ br(Assembler::CC, L_loadkeys_44); 2005 __ br(Assembler::EQ, L_loadkeys_52); 2006 2007 __ ld1(v17, v18, __ T16B, __ post(key, 32)); 2008 __ rev32(v17, __ T16B, v17); 2009 __ rev32(v18, __ T16B, v18); 2010 __ BIND(L_loadkeys_52); 2011 __ ld1(v19, v20, __ T16B, __ post(key, 32)); 2012 __ rev32(v19, __ T16B, v19); 2013 __ rev32(v20, __ T16B, v20); 2014 __ BIND(L_loadkeys_44); 2015 __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64)); 2016 __ rev32(v21, __ T16B, v21); 2017 __ rev32(v22, __ T16B, v22); 2018 __ rev32(v23, __ T16B, v23); 2019 __ rev32(v24, __ T16B, v24); 2020 __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64)); 2021 __ rev32(v25, __ T16B, v25); 2022 __ rev32(v26, __ T16B, v26); 2023 __ rev32(v27, __ T16B, v27); 2024 __ rev32(v28, __ T16B, v28); 2025 __ ld1(v29, v30, __ T16B, key); 2026 __ rev32(v29, __ T16B, v29); 2027 __ rev32(v30, __ T16B, v30); 2028 2029 __ BIND(L_aes_loop); 2030 __ ld1(v0, __ T16B, __ post(from, 16)); 2031 __ orr(v1, __ T16B, v0, v0); 2032 2033 __ br(Assembler::CC, L_rounds_44); 2034 __ br(Assembler::EQ, L_rounds_52); 2035 2036 __ aesd(v0, v17); __ aesimc(v0, v0); 2037 __ aesd(v0, v17); __ aesimc(v0, v0); 2038 __ BIND(L_rounds_52); 2039 __ aesd(v0, v19); __ aesimc(v0, v0); 2040 __ aesd(v0, v20); __ aesimc(v0, v0); 2041 __ BIND(L_rounds_44); 2042 __ aesd(v0, v21); __ aesimc(v0, v0); 2043 __ aesd(v0, v22); __ aesimc(v0, v0); 2044 __ aesd(v0, v23); __ aesimc(v0, v0); 2045 __ aesd(v0, v24); __ aesimc(v0, v0); 2046 __ aesd(v0, v25); __ aesimc(v0, v0); 2047 __ aesd(v0, v26); __ aesimc(v0, v0); 2048 __ aesd(v0, v27); __ aesimc(v0, v0); 2049 __ aesd(v0, v28); __ aesimc(v0, v0); 2050 __ aesd(v0, v29); __ aesimc(v0, v0); 2051 __ aesd(v0, v30); 2052 __ eor(v0, __ T16B, v0, v31); 2053 __ eor(v0, __ T16B, v0, v2); 2054 2055 __ st1(v0, __ T16B, __ post(to, 16)); 2056 __ orr(v2, __ T16B, v1, v1); 2057 2058 __ sub(len_reg, len_reg, 16); 2059 __ cbnz(len_reg, L_aes_loop); 2060 2061 __ st1(v2, __ T16B, rvec); 2062 2063 __ mov(r0, rscratch2); 2064 2065 __ leave(); 2066 __ ret(lr); 2067 2068 return start; 2069 } 2070 2071 // Arguments: 2072 // 2073 // Inputs: 2074 // c_rarg0 - byte[] source+offset 2075 // c_rarg1 - int[] SHA.state 2076 // c_rarg2 - int offset 2077 // c_rarg3 - int limit 2078 // 2079 address generate_sha1_implCompress(bool multi_block, const char *name) { 2080 __ align(CodeEntryAlignment); 2081 StubCodeMark mark(this, "StubRoutines", name); 2082 address start = __ pc(); 2083 2084 Register buf = c_rarg0; 2085 Register state = c_rarg1; 2086 Register ofs = c_rarg2; 2087 Register limit = c_rarg3; 2088 2089 Label keys; 2090 Label sha1_loop; 2091 2092 // load the keys into v0..v3 2093 __ adr(rscratch1, keys); 2094 __ ld4r(v0, v1, v2, v3, __ T4S, Address(rscratch1)); 2095 // load 5 words state into v6, v7 2096 __ ldrq(v6, Address(state, 0)); 2097 __ ldrs(v7, Address(state, 16)); 2098 2099 2100 __ BIND(sha1_loop); 2101 // load 64 bytes of data into v16..v19 2102 __ ld1(v16, v17, v18, v19, __ T4S, multi_block ? __ post(buf, 64) : buf); 2103 __ rev32(v16, __ T16B, v16); 2104 __ rev32(v17, __ T16B, v17); 2105 __ rev32(v18, __ T16B, v18); 2106 __ rev32(v19, __ T16B, v19); 2107 2108 // do the sha1 2109 __ addv(v4, __ T4S, v16, v0); 2110 __ orr(v20, __ T16B, v6, v6); 2111 2112 FloatRegister d0 = v16; 2113 FloatRegister d1 = v17; 2114 FloatRegister d2 = v18; 2115 FloatRegister d3 = v19; 2116 2117 for (int round = 0; round < 20; round++) { 2118 FloatRegister tmp1 = (round & 1) ? v4 : v5; 2119 FloatRegister tmp2 = (round & 1) ? v21 : v22; 2120 FloatRegister tmp3 = round ? ((round & 1) ? v22 : v21) : v7; 2121 FloatRegister tmp4 = (round & 1) ? v5 : v4; 2122 FloatRegister key = (round < 4) ? v0 : ((round < 9) ? v1 : ((round < 14) ? v2 : v3)); 2123 2124 if (round < 16) __ sha1su0(d0, __ T4S, d1, d2); 2125 if (round < 19) __ addv(tmp1, __ T4S, d1, key); 2126 __ sha1h(tmp2, __ T4S, v20); 2127 if (round < 5) 2128 __ sha1c(v20, __ T4S, tmp3, tmp4); 2129 else if (round < 10 || round >= 15) 2130 __ sha1p(v20, __ T4S, tmp3, tmp4); 2131 else 2132 __ sha1m(v20, __ T4S, tmp3, tmp4); 2133 if (round < 16) __ sha1su1(d0, __ T4S, d3); 2134 2135 tmp1 = d0; d0 = d1; d1 = d2; d2 = d3; d3 = tmp1; 2136 } 2137 2138 __ addv(v7, __ T2S, v7, v21); 2139 __ addv(v6, __ T4S, v6, v20); 2140 2141 if (multi_block) { 2142 __ add(ofs, ofs, 64); 2143 __ cmp(ofs, limit); 2144 __ br(Assembler::LE, sha1_loop); 2145 __ mov(c_rarg0, ofs); // return ofs 2146 } 2147 2148 __ strq(v6, Address(state, 0)); 2149 __ strs(v7, Address(state, 16)); 2150 2151 __ ret(lr); 2152 2153 __ bind(keys); 2154 __ emit_int32(0x5a827999); 2155 __ emit_int32(0x6ed9eba1); 2156 __ emit_int32(0x8f1bbcdc); 2157 __ emit_int32(0xca62c1d6); 2158 2159 return start; 2160 } 2161 2162 2163 // Arguments: 2164 // 2165 // Inputs: 2166 // c_rarg0 - byte[] source+offset 2167 // c_rarg1 - int[] SHA.state 2168 // c_rarg2 - int offset 2169 // c_rarg3 - int limit 2170 // 2171 address generate_sha256_implCompress(bool multi_block, const char *name) { 2172 static const uint32_t round_consts[64] = { 2173 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 2174 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 2175 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 2176 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 2177 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 2178 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 2179 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 2180 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 2181 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 2182 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 2183 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 2184 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 2185 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 2186 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 2187 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 2188 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, 2189 }; 2190 __ align(CodeEntryAlignment); 2191 StubCodeMark mark(this, "StubRoutines", name); 2192 address start = __ pc(); 2193 2194 Register buf = c_rarg0; 2195 Register state = c_rarg1; 2196 Register ofs = c_rarg2; 2197 Register limit = c_rarg3; 2198 2199 Label sha1_loop; 2200 2201 __ stpd(v8, v9, __ pre(sp, -32)); 2202 __ stpd(v10, v11, Address(sp, 16)); 2203 2204 // dga == v0 2205 // dgb == v1 2206 // dg0 == v2 2207 // dg1 == v3 2208 // dg2 == v4 2209 // t0 == v6 2210 // t1 == v7 2211 2212 // load 16 keys to v16..v31 2213 __ lea(rscratch1, ExternalAddress((address)round_consts)); 2214 __ ld1(v16, v17, v18, v19, __ T4S, __ post(rscratch1, 64)); 2215 __ ld1(v20, v21, v22, v23, __ T4S, __ post(rscratch1, 64)); 2216 __ ld1(v24, v25, v26, v27, __ T4S, __ post(rscratch1, 64)); 2217 __ ld1(v28, v29, v30, v31, __ T4S, rscratch1); 2218 2219 // load 8 words (256 bits) state 2220 __ ldpq(v0, v1, state); 2221 2222 __ BIND(sha1_loop); 2223 // load 64 bytes of data into v8..v11 2224 __ ld1(v8, v9, v10, v11, __ T4S, multi_block ? __ post(buf, 64) : buf); 2225 __ rev32(v8, __ T16B, v8); 2226 __ rev32(v9, __ T16B, v9); 2227 __ rev32(v10, __ T16B, v10); 2228 __ rev32(v11, __ T16B, v11); 2229 2230 __ addv(v6, __ T4S, v8, v16); 2231 __ orr(v2, __ T16B, v0, v0); 2232 __ orr(v3, __ T16B, v1, v1); 2233 2234 FloatRegister d0 = v8; 2235 FloatRegister d1 = v9; 2236 FloatRegister d2 = v10; 2237 FloatRegister d3 = v11; 2238 2239 2240 for (int round = 0; round < 16; round++) { 2241 FloatRegister tmp1 = (round & 1) ? v6 : v7; 2242 FloatRegister tmp2 = (round & 1) ? v7 : v6; 2243 FloatRegister tmp3 = (round & 1) ? v2 : v4; 2244 FloatRegister tmp4 = (round & 1) ? v4 : v2; 2245 2246 if (round < 12) __ sha256su0(d0, __ T4S, d1); 2247 __ orr(v4, __ T16B, v2, v2); 2248 if (round < 15) 2249 __ addv(tmp1, __ T4S, d1, as_FloatRegister(round + 17)); 2250 __ sha256h(v2, __ T4S, v3, tmp2); 2251 __ sha256h2(v3, __ T4S, v4, tmp2); 2252 if (round < 12) __ sha256su1(d0, __ T4S, d2, d3); 2253 2254 tmp1 = d0; d0 = d1; d1 = d2; d2 = d3; d3 = tmp1; 2255 } 2256 2257 __ addv(v0, __ T4S, v0, v2); 2258 __ addv(v1, __ T4S, v1, v3); 2259 2260 if (multi_block) { 2261 __ add(ofs, ofs, 64); 2262 __ cmp(ofs, limit); 2263 __ br(Assembler::LE, sha1_loop); 2264 __ mov(c_rarg0, ofs); // return ofs 2265 } 2266 2267 __ ldpd(v10, v11, Address(sp, 16)); 2268 __ ldpd(v8, v9, __ post(sp, 32)); 2269 2270 __ stpq(v0, v1, state); 2271 2272 __ ret(lr); 2273 2274 return start; 2275 } 2276 2277 #ifndef BUILTIN_SIM 2278 // Safefetch stubs. 2279 void generate_safefetch(const char* name, int size, address* entry, 2280 address* fault_pc, address* continuation_pc) { 2281 // safefetch signatures: 2282 // int SafeFetch32(int* adr, int errValue); 2283 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); 2284 // 2285 // arguments: 2286 // c_rarg0 = adr 2287 // c_rarg1 = errValue 2288 // 2289 // result: 2290 // PPC_RET = *adr or errValue 2291 2292 StubCodeMark mark(this, "StubRoutines", name); 2293 2294 // Entry point, pc or function descriptor. 2295 *entry = __ pc(); 2296 2297 // Load *adr into c_rarg1, may fault. 2298 *fault_pc = __ pc(); 2299 switch (size) { 2300 case 4: 2301 // int32_t 2302 __ ldrw(c_rarg1, Address(c_rarg0, 0)); 2303 break; 2304 case 8: 2305 // int64_t 2306 __ ldr(c_rarg1, Address(c_rarg0, 0)); 2307 break; 2308 default: 2309 ShouldNotReachHere(); 2310 } 2311 2312 // return errValue or *adr 2313 *continuation_pc = __ pc(); 2314 __ mov(r0, c_rarg1); 2315 __ ret(lr); 2316 } 2317 #endif 2318 2319 /** 2320 * Arguments: 2321 * 2322 * Inputs: 2323 * c_rarg0 - int crc 2324 * c_rarg1 - byte* buf 2325 * c_rarg2 - int length 2326 * 2327 * Ouput: 2328 * rax - int crc result 2329 */ 2330 address generate_updateBytesCRC32() { 2331 assert(UseCRC32Intrinsics, "what are we doing here?"); 2332 2333 __ align(CodeEntryAlignment); 2334 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); 2335 2336 address start = __ pc(); 2337 2338 const Register crc = c_rarg0; // crc 2339 const Register buf = c_rarg1; // source java byte array address 2340 const Register len = c_rarg2; // length 2341 const Register table0 = c_rarg3; // crc_table address 2342 const Register table1 = c_rarg4; 2343 const Register table2 = c_rarg5; 2344 const Register table3 = c_rarg6; 2345 const Register tmp3 = c_rarg7; 2346 2347 BLOCK_COMMENT("Entry:"); 2348 __ enter(); // required for proper stackwalking of RuntimeStub frame 2349 2350 __ kernel_crc32(crc, buf, len, 2351 table0, table1, table2, table3, rscratch1, rscratch2, tmp3); 2352 2353 __ leave(); // required for proper stackwalking of RuntimeStub frame 2354 __ ret(lr); 2355 2356 return start; 2357 } 2358 2359 #undef __ 2360 #define __ masm-> 2361 2362 // Continuation point for throwing of implicit exceptions that are 2363 // not handled in the current activation. Fabricates an exception 2364 // oop and initiates normal exception dispatching in this 2365 // frame. Since we need to preserve callee-saved values (currently 2366 // only for C2, but done for C1 as well) we need a callee-saved oop 2367 // map and therefore have to make these stubs into RuntimeStubs 2368 // rather than BufferBlobs. If the compiler needs all registers to 2369 // be preserved between the fault point and the exception handler 2370 // then it must assume responsibility for that in 2371 // AbstractCompiler::continuation_for_implicit_null_exception or 2372 // continuation_for_implicit_division_by_zero_exception. All other 2373 // implicit exceptions (e.g., NullPointerException or 2374 // AbstractMethodError on entry) are either at call sites or 2375 // otherwise assume that stack unwinding will be initiated, so 2376 // caller saved registers were assumed volatile in the compiler. 2377 2378 address generate_throw_exception(const char* name, 2379 address runtime_entry, 2380 Register arg1 = noreg, 2381 Register arg2 = noreg) { 2382 // Information about frame layout at time of blocking runtime call. 2383 // Note that we only have to preserve callee-saved registers since 2384 // the compilers are responsible for supplying a continuation point 2385 // if they expect all registers to be preserved. 2386 // n.b. aarch64 asserts that frame::arg_reg_save_area_bytes == 0 2387 enum layout { 2388 rfp_off = 0, 2389 rfp_off2, 2390 return_off, 2391 return_off2, 2392 framesize // inclusive of return address 2393 }; 2394 2395 int insts_size = 512; 2396 int locs_size = 64; 2397 2398 CodeBuffer code(name, insts_size, locs_size); 2399 OopMapSet* oop_maps = new OopMapSet(); 2400 MacroAssembler* masm = new MacroAssembler(&code); 2401 2402 address start = __ pc(); 2403 2404 // This is an inlined and slightly modified version of call_VM 2405 // which has the ability to fetch the return PC out of 2406 // thread-local storage and also sets up last_Java_sp slightly 2407 // differently than the real call_VM 2408 2409 __ enter(); // Save FP and LR before call 2410 2411 assert(is_even(framesize/2), "sp not 16-byte aligned"); 2412 2413 // lr and fp are already in place 2414 __ sub(sp, rfp, ((unsigned)framesize-4) << LogBytesPerInt); // prolog 2415 2416 int frame_complete = __ pc() - start; 2417 2418 // Set up last_Java_sp and last_Java_fp 2419 address the_pc = __ pc(); 2420 __ set_last_Java_frame(sp, rfp, (address)NULL, rscratch1); 2421 2422 // Call runtime 2423 if (arg1 != noreg) { 2424 assert(arg2 != c_rarg1, "clobbered"); 2425 __ mov(c_rarg1, arg1); 2426 } 2427 if (arg2 != noreg) { 2428 __ mov(c_rarg2, arg2); 2429 } 2430 __ mov(c_rarg0, rthread); 2431 BLOCK_COMMENT("call runtime_entry"); 2432 __ mov(rscratch1, runtime_entry); 2433 __ blrt(rscratch1, 3 /* number_of_arguments */, 0, 1); 2434 2435 // Generate oop map 2436 OopMap* map = new OopMap(framesize, 0); 2437 2438 oop_maps->add_gc_map(the_pc - start, map); 2439 2440 __ reset_last_Java_frame(true, true); 2441 __ maybe_isb(); 2442 2443 __ leave(); 2444 2445 // check for pending exceptions 2446 #ifdef ASSERT 2447 Label L; 2448 __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); 2449 __ cbnz(rscratch1, L); 2450 __ should_not_reach_here(); 2451 __ bind(L); 2452 #endif // ASSERT 2453 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2454 2455 2456 // codeBlob framesize is in words (not VMRegImpl::slot_size) 2457 RuntimeStub* stub = 2458 RuntimeStub::new_runtime_stub(name, 2459 &code, 2460 frame_complete, 2461 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 2462 oop_maps, false); 2463 return stub->entry_point(); 2464 } 2465 2466 // Initialization 2467 void generate_initial() { 2468 // Generate initial stubs and initializes the entry points 2469 2470 // entry points that exist in all platforms Note: This is code 2471 // that could be shared among different platforms - however the 2472 // benefit seems to be smaller than the disadvantage of having a 2473 // much more complicated generator structure. See also comment in 2474 // stubRoutines.hpp. 2475 2476 StubRoutines::_forward_exception_entry = generate_forward_exception(); 2477 2478 StubRoutines::_call_stub_entry = 2479 generate_call_stub(StubRoutines::_call_stub_return_address); 2480 2481 // is referenced by megamorphic call 2482 StubRoutines::_catch_exception_entry = generate_catch_exception(); 2483 2484 // Build this early so it's available for the interpreter. 2485 StubRoutines::_throw_StackOverflowError_entry = 2486 generate_throw_exception("StackOverflowError throw_exception", 2487 CAST_FROM_FN_PTR(address, 2488 SharedRuntime:: 2489 throw_StackOverflowError)); 2490 if (UseCRC32Intrinsics) { 2491 // set table address before stub generation which use it 2492 StubRoutines::_crc_table_adr = (address)StubRoutines::aarch64::_crc_table; 2493 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); 2494 } 2495 } 2496 2497 void generate_all() { 2498 // support for verify_oop (must happen after universe_init) 2499 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); 2500 StubRoutines::_throw_AbstractMethodError_entry = 2501 generate_throw_exception("AbstractMethodError throw_exception", 2502 CAST_FROM_FN_PTR(address, 2503 SharedRuntime:: 2504 throw_AbstractMethodError)); 2505 2506 StubRoutines::_throw_IncompatibleClassChangeError_entry = 2507 generate_throw_exception("IncompatibleClassChangeError throw_exception", 2508 CAST_FROM_FN_PTR(address, 2509 SharedRuntime:: 2510 throw_IncompatibleClassChangeError)); 2511 2512 StubRoutines::_throw_NullPointerException_at_call_entry = 2513 generate_throw_exception("NullPointerException at call throw_exception", 2514 CAST_FROM_FN_PTR(address, 2515 SharedRuntime:: 2516 throw_NullPointerException_at_call)); 2517 2518 // arraycopy stubs used by compilers 2519 generate_arraycopy_stubs(); 2520 2521 #ifndef BUILTIN_SIM 2522 if (UseAESIntrinsics) { 2523 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); 2524 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); 2525 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); 2526 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); 2527 } 2528 2529 if (UseSHA1Intrinsics) { 2530 StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); 2531 StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); 2532 } 2533 if (UseSHA256Intrinsics) { 2534 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); 2535 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); 2536 } 2537 2538 // Safefetch stubs. 2539 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 2540 &StubRoutines::_safefetch32_fault_pc, 2541 &StubRoutines::_safefetch32_continuation_pc); 2542 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 2543 &StubRoutines::_safefetchN_fault_pc, 2544 &StubRoutines::_safefetchN_continuation_pc); 2545 #endif 2546 } 2547 2548 public: 2549 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 2550 if (all) { 2551 generate_all(); 2552 } else { 2553 generate_initial(); 2554 } 2555 } 2556 }; // end class declaration 2557 2558 void StubGenerator_generate(CodeBuffer* code, bool all) { 2559 StubGenerator g(code, all); 2560 }