1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "nativeInst_x86.hpp" 30 #include "oops/instanceOop.hpp" 31 #include "oops/method.hpp" 32 #include "oops/objArrayKlass.hpp" 33 #include "oops/oop.inline.hpp" 34 #include "prims/methodHandles.hpp" 35 #include "runtime/frame.inline.hpp" 36 #include "runtime/handles.inline.hpp" 37 #include "runtime/sharedRuntime.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/stubRoutines.hpp" 40 #include "runtime/thread.inline.hpp" 41 #include "utilities/top.hpp" 42 #ifdef COMPILER2 43 #include "opto/runtime.hpp" 44 #endif 45 46 // Declaration and definition of StubGenerator (no .hpp file). 47 // For a more detailed description of the stub routine structure 48 // see the comment in stubRoutines.hpp 49 50 #define __ _masm-> 51 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) 52 #define a__ ((Assembler*)_masm)-> 53 54 #ifdef PRODUCT 55 #define BLOCK_COMMENT(str) /* nothing */ 56 #else 57 #define BLOCK_COMMENT(str) __ block_comment(str) 58 #endif 59 60 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 61 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions 62 63 // Stub Code definitions 64 65 static address handle_unsafe_access() { 66 JavaThread* thread = JavaThread::current(); 67 address pc = thread->saved_exception_pc(); 68 // pc is the instruction which we must emulate 69 // doing a no-op is fine: return garbage from the load 70 // therefore, compute npc 71 address npc = Assembler::locate_next_instruction(pc); 72 73 // request an async exception 74 thread->set_pending_unsafe_access_error(); 75 76 // return address of next instruction to execute 77 return npc; 78 } 79 80 class StubGenerator: public StubCodeGenerator { 81 private: 82 83 #ifdef PRODUCT 84 #define inc_counter_np(counter) ((void)0) 85 #else 86 void inc_counter_np_(int& counter) { 87 // This can destroy rscratch1 if counter is far from the code cache 88 __ incrementl(ExternalAddress((address)&counter)); 89 } 90 #define inc_counter_np(counter) \ 91 BLOCK_COMMENT("inc_counter " #counter); \ 92 inc_counter_np_(counter); 93 #endif 94 95 // Call stubs are used to call Java from C 96 // 97 // Linux Arguments: 98 // c_rarg0: call wrapper address address 99 // c_rarg1: result address 100 // c_rarg2: result type BasicType 101 // c_rarg3: method Method* 102 // c_rarg4: (interpreter) entry point address 103 // c_rarg5: parameters intptr_t* 104 // 16(rbp): parameter size (in words) int 105 // 24(rbp): thread Thread* 106 // 107 // [ return_from_Java ] <--- rsp 108 // [ argument word n ] 109 // ... 110 // -12 [ argument word 1 ] 111 // -11 [ saved r15 ] <--- rsp_after_call 112 // -10 [ saved r14 ] 113 // -9 [ saved r13 ] 114 // -8 [ saved r12 ] 115 // -7 [ saved rbx ] 116 // -6 [ call wrapper ] 117 // -5 [ result ] 118 // -4 [ result type ] 119 // -3 [ method ] 120 // -2 [ entry point ] 121 // -1 [ parameters ] 122 // 0 [ saved rbp ] <--- rbp 123 // 1 [ return address ] 124 // 2 [ parameter size ] 125 // 3 [ thread ] 126 // 127 // Windows Arguments: 128 // c_rarg0: call wrapper address address 129 // c_rarg1: result address 130 // c_rarg2: result type BasicType 131 // c_rarg3: method Method* 132 // 48(rbp): (interpreter) entry point address 133 // 56(rbp): parameters intptr_t* 134 // 64(rbp): parameter size (in words) int 135 // 72(rbp): thread Thread* 136 // 137 // [ return_from_Java ] <--- rsp 138 // [ argument word n ] 139 // ... 140 // -60 [ argument word 1 ] 141 // -59 [ saved xmm31 ] <--- rsp after_call 142 // [ saved xmm16-xmm30 ] (EVEX enabled, else the space is blank) 143 // -27 [ saved xmm15 ] 144 // [ saved xmm7-xmm14 ] 145 // -9 [ saved xmm6 ] (each xmm register takes 2 slots) 146 // -7 [ saved r15 ] 147 // -6 [ saved r14 ] 148 // -5 [ saved r13 ] 149 // -4 [ saved r12 ] 150 // -3 [ saved rdi ] 151 // -2 [ saved rsi ] 152 // -1 [ saved rbx ] 153 // 0 [ saved rbp ] <--- rbp 154 // 1 [ return address ] 155 // 2 [ call wrapper ] 156 // 3 [ result ] 157 // 4 [ result type ] 158 // 5 [ method ] 159 // 6 [ entry point ] 160 // 7 [ parameters ] 161 // 8 [ parameter size ] 162 // 9 [ thread ] 163 // 164 // Windows reserves the callers stack space for arguments 1-4. 165 // We spill c_rarg0-c_rarg3 to this space. 166 167 // Call stub stack layout word offsets from rbp 168 enum call_stub_layout { 169 #ifdef _WIN64 170 xmm_save_first = 6, // save from xmm6 171 xmm_save_last = 31, // to xmm31 172 xmm_save_base = -9, 173 rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27 174 r15_off = -7, 175 r14_off = -6, 176 r13_off = -5, 177 r12_off = -4, 178 rdi_off = -3, 179 rsi_off = -2, 180 rbx_off = -1, 181 rbp_off = 0, 182 retaddr_off = 1, 183 call_wrapper_off = 2, 184 result_off = 3, 185 result_type_off = 4, 186 method_off = 5, 187 entry_point_off = 6, 188 parameters_off = 7, 189 parameter_size_off = 8, 190 thread_off = 9 191 #else 192 rsp_after_call_off = -12, 193 mxcsr_off = rsp_after_call_off, 194 r15_off = -11, 195 r14_off = -10, 196 r13_off = -9, 197 r12_off = -8, 198 rbx_off = -7, 199 call_wrapper_off = -6, 200 result_off = -5, 201 result_type_off = -4, 202 method_off = -3, 203 entry_point_off = -2, 204 parameters_off = -1, 205 rbp_off = 0, 206 retaddr_off = 1, 207 parameter_size_off = 2, 208 thread_off = 3 209 #endif 210 }; 211 212 #ifdef _WIN64 213 Address xmm_save(int reg) { 214 assert(reg >= xmm_save_first && reg <= xmm_save_last, "XMM register number out of range"); 215 return Address(rbp, (xmm_save_base - (reg - xmm_save_first) * 2) * wordSize); 216 } 217 #endif 218 219 address generate_call_stub(address& return_address) { 220 assert((int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && 221 (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, 222 "adjust this code"); 223 StubCodeMark mark(this, "StubRoutines", "call_stub"); 224 address start = __ pc(); 225 226 // same as in generate_catch_exception()! 227 const Address rsp_after_call(rbp, rsp_after_call_off * wordSize); 228 229 const Address call_wrapper (rbp, call_wrapper_off * wordSize); 230 const Address result (rbp, result_off * wordSize); 231 const Address result_type (rbp, result_type_off * wordSize); 232 const Address method (rbp, method_off * wordSize); 233 const Address entry_point (rbp, entry_point_off * wordSize); 234 const Address parameters (rbp, parameters_off * wordSize); 235 const Address parameter_size(rbp, parameter_size_off * wordSize); 236 237 // same as in generate_catch_exception()! 238 const Address thread (rbp, thread_off * wordSize); 239 240 const Address r15_save(rbp, r15_off * wordSize); 241 const Address r14_save(rbp, r14_off * wordSize); 242 const Address r13_save(rbp, r13_off * wordSize); 243 const Address r12_save(rbp, r12_off * wordSize); 244 const Address rbx_save(rbp, rbx_off * wordSize); 245 246 // stub code 247 __ enter(); 248 __ subptr(rsp, -rsp_after_call_off * wordSize); 249 250 // save register parameters 251 #ifndef _WIN64 252 __ movptr(parameters, c_rarg5); // parameters 253 __ movptr(entry_point, c_rarg4); // entry_point 254 #endif 255 256 __ movptr(method, c_rarg3); // method 257 __ movl(result_type, c_rarg2); // result type 258 __ movptr(result, c_rarg1); // result 259 __ movptr(call_wrapper, c_rarg0); // call wrapper 260 261 // save regs belonging to calling function 262 __ movptr(rbx_save, rbx); 263 __ movptr(r12_save, r12); 264 __ movptr(r13_save, r13); 265 __ movptr(r14_save, r14); 266 __ movptr(r15_save, r15); 267 if (UseAVX > 2) { 268 __ movl(rbx, 0xffff); 269 __ kmovql(k1, rbx); 270 } 271 #ifdef _WIN64 272 if (UseAVX > 2) { 273 for (int i = 6; i <= 31; i++) { 274 __ movdqu(xmm_save(i), as_XMMRegister(i)); 275 } 276 } else { 277 for (int i = 6; i <= 15; i++) { 278 __ movdqu(xmm_save(i), as_XMMRegister(i)); 279 } 280 } 281 282 const Address rdi_save(rbp, rdi_off * wordSize); 283 const Address rsi_save(rbp, rsi_off * wordSize); 284 285 __ movptr(rsi_save, rsi); 286 __ movptr(rdi_save, rdi); 287 #else 288 const Address mxcsr_save(rbp, mxcsr_off * wordSize); 289 { 290 Label skip_ldmx; 291 __ stmxcsr(mxcsr_save); 292 __ movl(rax, mxcsr_save); 293 __ andl(rax, MXCSR_MASK); // Only check control and mask bits 294 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); 295 __ cmp32(rax, mxcsr_std); 296 __ jcc(Assembler::equal, skip_ldmx); 297 __ ldmxcsr(mxcsr_std); 298 __ bind(skip_ldmx); 299 } 300 #endif 301 302 // Load up thread register 303 __ movptr(r15_thread, thread); 304 __ reinit_heapbase(); 305 306 #ifdef ASSERT 307 // make sure we have no pending exceptions 308 { 309 Label L; 310 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); 311 __ jcc(Assembler::equal, L); 312 __ stop("StubRoutines::call_stub: entered with pending exception"); 313 __ bind(L); 314 } 315 #endif 316 317 // pass parameters if any 318 BLOCK_COMMENT("pass parameters if any"); 319 Label parameters_done; 320 __ movl(c_rarg3, parameter_size); 321 __ testl(c_rarg3, c_rarg3); 322 __ jcc(Assembler::zero, parameters_done); 323 324 Label loop; 325 __ movptr(c_rarg2, parameters); // parameter pointer 326 __ movl(c_rarg1, c_rarg3); // parameter counter is in c_rarg1 327 __ BIND(loop); 328 __ movptr(rax, Address(c_rarg2, 0));// get parameter 329 __ addptr(c_rarg2, wordSize); // advance to next parameter 330 __ decrementl(c_rarg1); // decrement counter 331 __ push(rax); // pass parameter 332 __ jcc(Assembler::notZero, loop); 333 334 // call Java function 335 __ BIND(parameters_done); 336 __ movptr(rbx, method); // get Method* 337 __ movptr(c_rarg1, entry_point); // get entry_point 338 __ mov(r13, rsp); // set sender sp 339 BLOCK_COMMENT("call Java function"); 340 __ call(c_rarg1); 341 342 BLOCK_COMMENT("call_stub_return_address:"); 343 return_address = __ pc(); 344 345 // store result depending on type (everything that is not 346 // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) 347 __ movptr(c_rarg0, result); 348 Label is_long, is_float, is_double, exit; 349 __ movl(c_rarg1, result_type); 350 __ cmpl(c_rarg1, T_OBJECT); 351 __ jcc(Assembler::equal, is_long); 352 __ cmpl(c_rarg1, T_LONG); 353 __ jcc(Assembler::equal, is_long); 354 __ cmpl(c_rarg1, T_FLOAT); 355 __ jcc(Assembler::equal, is_float); 356 __ cmpl(c_rarg1, T_DOUBLE); 357 __ jcc(Assembler::equal, is_double); 358 359 // handle T_INT case 360 __ movl(Address(c_rarg0, 0), rax); 361 362 __ BIND(exit); 363 364 // pop parameters 365 __ lea(rsp, rsp_after_call); 366 367 #ifdef ASSERT 368 // verify that threads correspond 369 { 370 Label L, S; 371 __ cmpptr(r15_thread, thread); 372 __ jcc(Assembler::notEqual, S); 373 __ get_thread(rbx); 374 __ cmpptr(r15_thread, rbx); 375 __ jcc(Assembler::equal, L); 376 __ bind(S); 377 __ jcc(Assembler::equal, L); 378 __ stop("StubRoutines::call_stub: threads must correspond"); 379 __ bind(L); 380 } 381 #endif 382 383 // restore regs belonging to calling function 384 #ifdef _WIN64 385 if (UseAVX > 2) { 386 for (int i = 6; i <= 31; i++) { 387 __ movdqu(as_XMMRegister(i), xmm_save(i)); 388 } 389 } 390 else { 391 for (int i = 6; i <= 15; i++) { 392 __ movdqu(as_XMMRegister(i), xmm_save(i)); 393 } 394 } 395 #endif 396 __ movptr(r15, r15_save); 397 __ movptr(r14, r14_save); 398 __ movptr(r13, r13_save); 399 __ movptr(r12, r12_save); 400 __ movptr(rbx, rbx_save); 401 402 #ifdef _WIN64 403 __ movptr(rdi, rdi_save); 404 __ movptr(rsi, rsi_save); 405 #else 406 __ ldmxcsr(mxcsr_save); 407 #endif 408 409 // restore rsp 410 __ addptr(rsp, -rsp_after_call_off * wordSize); 411 412 // return 413 __ pop(rbp); 414 __ ret(0); 415 416 // handle return types different from T_INT 417 __ BIND(is_long); 418 __ movq(Address(c_rarg0, 0), rax); 419 __ jmp(exit); 420 421 __ BIND(is_float); 422 __ movflt(Address(c_rarg0, 0), xmm0); 423 __ jmp(exit); 424 425 __ BIND(is_double); 426 __ movdbl(Address(c_rarg0, 0), xmm0); 427 __ jmp(exit); 428 429 return start; 430 } 431 432 // Return point for a Java call if there's an exception thrown in 433 // Java code. The exception is caught and transformed into a 434 // pending exception stored in JavaThread that can be tested from 435 // within the VM. 436 // 437 // Note: Usually the parameters are removed by the callee. In case 438 // of an exception crossing an activation frame boundary, that is 439 // not the case if the callee is compiled code => need to setup the 440 // rsp. 441 // 442 // rax: exception oop 443 444 address generate_catch_exception() { 445 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 446 address start = __ pc(); 447 448 // same as in generate_call_stub(): 449 const Address rsp_after_call(rbp, rsp_after_call_off * wordSize); 450 const Address thread (rbp, thread_off * wordSize); 451 452 #ifdef ASSERT 453 // verify that threads correspond 454 { 455 Label L, S; 456 __ cmpptr(r15_thread, thread); 457 __ jcc(Assembler::notEqual, S); 458 __ get_thread(rbx); 459 __ cmpptr(r15_thread, rbx); 460 __ jcc(Assembler::equal, L); 461 __ bind(S); 462 __ stop("StubRoutines::catch_exception: threads must correspond"); 463 __ bind(L); 464 } 465 #endif 466 467 // set pending exception 468 __ verify_oop(rax); 469 470 __ movptr(Address(r15_thread, Thread::pending_exception_offset()), rax); 471 __ lea(rscratch1, ExternalAddress((address)__FILE__)); 472 __ movptr(Address(r15_thread, Thread::exception_file_offset()), rscratch1); 473 __ movl(Address(r15_thread, Thread::exception_line_offset()), (int) __LINE__); 474 475 // complete return to VM 476 assert(StubRoutines::_call_stub_return_address != NULL, 477 "_call_stub_return_address must have been generated before"); 478 __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); 479 480 return start; 481 } 482 483 // Continuation point for runtime calls returning with a pending 484 // exception. The pending exception check happened in the runtime 485 // or native call stub. The pending exception in Thread is 486 // converted into a Java-level exception. 487 // 488 // Contract with Java-level exception handlers: 489 // rax: exception 490 // rdx: throwing pc 491 // 492 // NOTE: At entry of this stub, exception-pc must be on stack !! 493 494 address generate_forward_exception() { 495 StubCodeMark mark(this, "StubRoutines", "forward exception"); 496 address start = __ pc(); 497 498 // Upon entry, the sp points to the return address returning into 499 // Java (interpreted or compiled) code; i.e., the return address 500 // becomes the throwing pc. 501 // 502 // Arguments pushed before the runtime call are still on the stack 503 // but the exception handler will reset the stack pointer -> 504 // ignore them. A potential result in registers can be ignored as 505 // well. 506 507 #ifdef ASSERT 508 // make sure this code is only executed if there is a pending exception 509 { 510 Label L; 511 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t) NULL); 512 __ jcc(Assembler::notEqual, L); 513 __ stop("StubRoutines::forward exception: no pending exception (1)"); 514 __ bind(L); 515 } 516 #endif 517 518 // compute exception handler into rbx 519 __ movptr(c_rarg0, Address(rsp, 0)); 520 BLOCK_COMMENT("call exception_handler_for_return_address"); 521 __ call_VM_leaf(CAST_FROM_FN_PTR(address, 522 SharedRuntime::exception_handler_for_return_address), 523 r15_thread, c_rarg0); 524 __ mov(rbx, rax); 525 526 // setup rax & rdx, remove return address & clear pending exception 527 __ pop(rdx); 528 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 529 __ movptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); 530 531 #ifdef ASSERT 532 // make sure exception is set 533 { 534 Label L; 535 __ testptr(rax, rax); 536 __ jcc(Assembler::notEqual, L); 537 __ stop("StubRoutines::forward exception: no pending exception (2)"); 538 __ bind(L); 539 } 540 #endif 541 542 // continue at exception handler (return address removed) 543 // rax: exception 544 // rbx: exception handler 545 // rdx: throwing pc 546 __ verify_oop(rax); 547 __ jmp(rbx); 548 549 return start; 550 } 551 552 // Support for jint atomic::xchg(jint exchange_value, volatile jint* dest) 553 // 554 // Arguments : 555 // c_rarg0: exchange_value 556 // c_rarg0: dest 557 // 558 // Result: 559 // *dest <- ex, return (orig *dest) 560 address generate_atomic_xchg() { 561 StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); 562 address start = __ pc(); 563 564 __ movl(rax, c_rarg0); // Copy to eax we need a return value anyhow 565 __ xchgl(rax, Address(c_rarg1, 0)); // automatic LOCK 566 __ ret(0); 567 568 return start; 569 } 570 571 // Support for intptr_t atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) 572 // 573 // Arguments : 574 // c_rarg0: exchange_value 575 // c_rarg1: dest 576 // 577 // Result: 578 // *dest <- ex, return (orig *dest) 579 address generate_atomic_xchg_ptr() { 580 StubCodeMark mark(this, "StubRoutines", "atomic_xchg_ptr"); 581 address start = __ pc(); 582 583 __ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow 584 __ xchgptr(rax, Address(c_rarg1, 0)); // automatic LOCK 585 __ ret(0); 586 587 return start; 588 } 589 590 // Support for jint atomic::atomic_cmpxchg(jint exchange_value, volatile jint* dest, 591 // jint compare_value) 592 // 593 // Arguments : 594 // c_rarg0: exchange_value 595 // c_rarg1: dest 596 // c_rarg2: compare_value 597 // 598 // Result: 599 // if ( compare_value == *dest ) { 600 // *dest = exchange_value 601 // return compare_value; 602 // else 603 // return *dest; 604 address generate_atomic_cmpxchg() { 605 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg"); 606 address start = __ pc(); 607 608 __ movl(rax, c_rarg2); 609 if ( os::is_MP() ) __ lock(); 610 __ cmpxchgl(c_rarg0, Address(c_rarg1, 0)); 611 __ ret(0); 612 613 return start; 614 } 615 616 // Support for jbyte atomic::atomic_cmpxchg(jbyte exchange_value, volatile jbyte* dest, 617 // jbyte compare_value) 618 // 619 // Arguments : 620 // c_rarg0: exchange_value 621 // c_rarg1: dest 622 // c_rarg2: compare_value 623 // 624 // Result: 625 // if ( compare_value == *dest ) { 626 // *dest = exchange_value 627 // return compare_value; 628 // else 629 // return *dest; 630 address generate_atomic_cmpxchg_byte() { 631 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_byte"); 632 address start = __ pc(); 633 634 __ movsbq(rax, c_rarg2); 635 if ( os::is_MP() ) __ lock(); 636 __ cmpxchgb(c_rarg0, Address(c_rarg1, 0)); 637 __ ret(0); 638 639 return start; 640 } 641 642 // Support for jlong atomic::atomic_cmpxchg(jlong exchange_value, 643 // volatile jlong* dest, 644 // jlong compare_value) 645 // Arguments : 646 // c_rarg0: exchange_value 647 // c_rarg1: dest 648 // c_rarg2: compare_value 649 // 650 // Result: 651 // if ( compare_value == *dest ) { 652 // *dest = exchange_value 653 // return compare_value; 654 // else 655 // return *dest; 656 address generate_atomic_cmpxchg_long() { 657 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long"); 658 address start = __ pc(); 659 660 __ movq(rax, c_rarg2); 661 if ( os::is_MP() ) __ lock(); 662 __ cmpxchgq(c_rarg0, Address(c_rarg1, 0)); 663 __ ret(0); 664 665 return start; 666 } 667 668 // Support for jint atomic::add(jint add_value, volatile jint* dest) 669 // 670 // Arguments : 671 // c_rarg0: add_value 672 // c_rarg1: dest 673 // 674 // Result: 675 // *dest += add_value 676 // return *dest; 677 address generate_atomic_add() { 678 StubCodeMark mark(this, "StubRoutines", "atomic_add"); 679 address start = __ pc(); 680 681 __ movl(rax, c_rarg0); 682 if ( os::is_MP() ) __ lock(); 683 __ xaddl(Address(c_rarg1, 0), c_rarg0); 684 __ addl(rax, c_rarg0); 685 __ ret(0); 686 687 return start; 688 } 689 690 // Support for intptr_t atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) 691 // 692 // Arguments : 693 // c_rarg0: add_value 694 // c_rarg1: dest 695 // 696 // Result: 697 // *dest += add_value 698 // return *dest; 699 address generate_atomic_add_ptr() { 700 StubCodeMark mark(this, "StubRoutines", "atomic_add_ptr"); 701 address start = __ pc(); 702 703 __ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow 704 if ( os::is_MP() ) __ lock(); 705 __ xaddptr(Address(c_rarg1, 0), c_rarg0); 706 __ addptr(rax, c_rarg0); 707 __ ret(0); 708 709 return start; 710 } 711 712 // Support for intptr_t OrderAccess::fence() 713 // 714 // Arguments : 715 // 716 // Result: 717 address generate_orderaccess_fence() { 718 StubCodeMark mark(this, "StubRoutines", "orderaccess_fence"); 719 address start = __ pc(); 720 __ membar(Assembler::StoreLoad); 721 __ ret(0); 722 723 return start; 724 } 725 726 // Support for intptr_t get_previous_fp() 727 // 728 // This routine is used to find the previous frame pointer for the 729 // caller (current_frame_guess). This is used as part of debugging 730 // ps() is seemingly lost trying to find frames. 731 // This code assumes that caller current_frame_guess) has a frame. 732 address generate_get_previous_fp() { 733 StubCodeMark mark(this, "StubRoutines", "get_previous_fp"); 734 const Address old_fp(rbp, 0); 735 const Address older_fp(rax, 0); 736 address start = __ pc(); 737 738 __ enter(); 739 __ movptr(rax, old_fp); // callers fp 740 __ movptr(rax, older_fp); // the frame for ps() 741 __ pop(rbp); 742 __ ret(0); 743 744 return start; 745 } 746 747 // Support for intptr_t get_previous_sp() 748 // 749 // This routine is used to find the previous stack pointer for the 750 // caller. 751 address generate_get_previous_sp() { 752 StubCodeMark mark(this, "StubRoutines", "get_previous_sp"); 753 address start = __ pc(); 754 755 __ movptr(rax, rsp); 756 __ addptr(rax, 8); // return address is at the top of the stack. 757 __ ret(0); 758 759 return start; 760 } 761 762 //---------------------------------------------------------------------------------------------------- 763 // Support for void verify_mxcsr() 764 // 765 // This routine is used with -Xcheck:jni to verify that native 766 // JNI code does not return to Java code without restoring the 767 // MXCSR register to our expected state. 768 769 address generate_verify_mxcsr() { 770 StubCodeMark mark(this, "StubRoutines", "verify_mxcsr"); 771 address start = __ pc(); 772 773 const Address mxcsr_save(rsp, 0); 774 775 if (CheckJNICalls) { 776 Label ok_ret; 777 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); 778 __ push(rax); 779 __ subptr(rsp, wordSize); // allocate a temp location 780 __ stmxcsr(mxcsr_save); 781 __ movl(rax, mxcsr_save); 782 __ andl(rax, MXCSR_MASK); // Only check control and mask bits 783 __ cmp32(rax, mxcsr_std); 784 __ jcc(Assembler::equal, ok_ret); 785 786 __ warn("MXCSR changed by native JNI code, use -XX:+RestoreMXCSROnJNICall"); 787 788 __ ldmxcsr(mxcsr_std); 789 790 __ bind(ok_ret); 791 __ addptr(rsp, wordSize); 792 __ pop(rax); 793 } 794 795 __ ret(0); 796 797 return start; 798 } 799 800 address generate_f2i_fixup() { 801 StubCodeMark mark(this, "StubRoutines", "f2i_fixup"); 802 Address inout(rsp, 5 * wordSize); // return address + 4 saves 803 804 address start = __ pc(); 805 806 Label L; 807 808 __ push(rax); 809 __ push(c_rarg3); 810 __ push(c_rarg2); 811 __ push(c_rarg1); 812 813 __ movl(rax, 0x7f800000); 814 __ xorl(c_rarg3, c_rarg3); 815 __ movl(c_rarg2, inout); 816 __ movl(c_rarg1, c_rarg2); 817 __ andl(c_rarg1, 0x7fffffff); 818 __ cmpl(rax, c_rarg1); // NaN? -> 0 819 __ jcc(Assembler::negative, L); 820 __ testl(c_rarg2, c_rarg2); // signed ? min_jint : max_jint 821 __ movl(c_rarg3, 0x80000000); 822 __ movl(rax, 0x7fffffff); 823 __ cmovl(Assembler::positive, c_rarg3, rax); 824 825 __ bind(L); 826 __ movptr(inout, c_rarg3); 827 828 __ pop(c_rarg1); 829 __ pop(c_rarg2); 830 __ pop(c_rarg3); 831 __ pop(rax); 832 833 __ ret(0); 834 835 return start; 836 } 837 838 address generate_f2l_fixup() { 839 StubCodeMark mark(this, "StubRoutines", "f2l_fixup"); 840 Address inout(rsp, 5 * wordSize); // return address + 4 saves 841 address start = __ pc(); 842 843 Label L; 844 845 __ push(rax); 846 __ push(c_rarg3); 847 __ push(c_rarg2); 848 __ push(c_rarg1); 849 850 __ movl(rax, 0x7f800000); 851 __ xorl(c_rarg3, c_rarg3); 852 __ movl(c_rarg2, inout); 853 __ movl(c_rarg1, c_rarg2); 854 __ andl(c_rarg1, 0x7fffffff); 855 __ cmpl(rax, c_rarg1); // NaN? -> 0 856 __ jcc(Assembler::negative, L); 857 __ testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong 858 __ mov64(c_rarg3, 0x8000000000000000); 859 __ mov64(rax, 0x7fffffffffffffff); 860 __ cmov(Assembler::positive, c_rarg3, rax); 861 862 __ bind(L); 863 __ movptr(inout, c_rarg3); 864 865 __ pop(c_rarg1); 866 __ pop(c_rarg2); 867 __ pop(c_rarg3); 868 __ pop(rax); 869 870 __ ret(0); 871 872 return start; 873 } 874 875 address generate_d2i_fixup() { 876 StubCodeMark mark(this, "StubRoutines", "d2i_fixup"); 877 Address inout(rsp, 6 * wordSize); // return address + 5 saves 878 879 address start = __ pc(); 880 881 Label L; 882 883 __ push(rax); 884 __ push(c_rarg3); 885 __ push(c_rarg2); 886 __ push(c_rarg1); 887 __ push(c_rarg0); 888 889 __ movl(rax, 0x7ff00000); 890 __ movq(c_rarg2, inout); 891 __ movl(c_rarg3, c_rarg2); 892 __ mov(c_rarg1, c_rarg2); 893 __ mov(c_rarg0, c_rarg2); 894 __ negl(c_rarg3); 895 __ shrptr(c_rarg1, 0x20); 896 __ orl(c_rarg3, c_rarg2); 897 __ andl(c_rarg1, 0x7fffffff); 898 __ xorl(c_rarg2, c_rarg2); 899 __ shrl(c_rarg3, 0x1f); 900 __ orl(c_rarg1, c_rarg3); 901 __ cmpl(rax, c_rarg1); 902 __ jcc(Assembler::negative, L); // NaN -> 0 903 __ testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint 904 __ movl(c_rarg2, 0x80000000); 905 __ movl(rax, 0x7fffffff); 906 __ cmov(Assembler::positive, c_rarg2, rax); 907 908 __ bind(L); 909 __ movptr(inout, c_rarg2); 910 911 __ pop(c_rarg0); 912 __ pop(c_rarg1); 913 __ pop(c_rarg2); 914 __ pop(c_rarg3); 915 __ pop(rax); 916 917 __ ret(0); 918 919 return start; 920 } 921 922 address generate_d2l_fixup() { 923 StubCodeMark mark(this, "StubRoutines", "d2l_fixup"); 924 Address inout(rsp, 6 * wordSize); // return address + 5 saves 925 926 address start = __ pc(); 927 928 Label L; 929 930 __ push(rax); 931 __ push(c_rarg3); 932 __ push(c_rarg2); 933 __ push(c_rarg1); 934 __ push(c_rarg0); 935 936 __ movl(rax, 0x7ff00000); 937 __ movq(c_rarg2, inout); 938 __ movl(c_rarg3, c_rarg2); 939 __ mov(c_rarg1, c_rarg2); 940 __ mov(c_rarg0, c_rarg2); 941 __ negl(c_rarg3); 942 __ shrptr(c_rarg1, 0x20); 943 __ orl(c_rarg3, c_rarg2); 944 __ andl(c_rarg1, 0x7fffffff); 945 __ xorl(c_rarg2, c_rarg2); 946 __ shrl(c_rarg3, 0x1f); 947 __ orl(c_rarg1, c_rarg3); 948 __ cmpl(rax, c_rarg1); 949 __ jcc(Assembler::negative, L); // NaN -> 0 950 __ testq(c_rarg0, c_rarg0); // signed ? min_jlong : max_jlong 951 __ mov64(c_rarg2, 0x8000000000000000); 952 __ mov64(rax, 0x7fffffffffffffff); 953 __ cmovq(Assembler::positive, c_rarg2, rax); 954 955 __ bind(L); 956 __ movq(inout, c_rarg2); 957 958 __ pop(c_rarg0); 959 __ pop(c_rarg1); 960 __ pop(c_rarg2); 961 __ pop(c_rarg3); 962 __ pop(rax); 963 964 __ ret(0); 965 966 return start; 967 } 968 969 address generate_fp_mask(const char *stub_name, int64_t mask) { 970 __ align(CodeEntryAlignment); 971 StubCodeMark mark(this, "StubRoutines", stub_name); 972 address start = __ pc(); 973 974 __ emit_data64( mask, relocInfo::none ); 975 __ emit_data64( mask, relocInfo::none ); 976 977 return start; 978 } 979 980 // The following routine generates a subroutine to throw an 981 // asynchronous UnknownError when an unsafe access gets a fault that 982 // could not be reasonably prevented by the programmer. (Example: 983 // SIGBUS/OBJERR.) 984 address generate_handler_for_unsafe_access() { 985 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); 986 address start = __ pc(); 987 988 __ push(0); // hole for return address-to-be 989 __ pusha(); // push registers 990 Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord); 991 992 // FIXME: this probably needs alignment logic 993 994 __ subptr(rsp, frame::arg_reg_save_area_bytes); 995 BLOCK_COMMENT("call handle_unsafe_access"); 996 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access))); 997 __ addptr(rsp, frame::arg_reg_save_area_bytes); 998 999 __ movptr(next_pc, rax); // stuff next address 1000 __ popa(); 1001 __ ret(0); // jump to next address 1002 1003 return start; 1004 } 1005 1006 // Non-destructive plausibility checks for oops 1007 // 1008 // Arguments: 1009 // all args on stack! 1010 // 1011 // Stack after saving c_rarg3: 1012 // [tos + 0]: saved c_rarg3 1013 // [tos + 1]: saved c_rarg2 1014 // [tos + 2]: saved r12 (several TemplateTable methods use it) 1015 // [tos + 3]: saved flags 1016 // [tos + 4]: return address 1017 // * [tos + 5]: error message (char*) 1018 // * [tos + 6]: object to verify (oop) 1019 // * [tos + 7]: saved rax - saved by caller and bashed 1020 // * [tos + 8]: saved r10 (rscratch1) - saved by caller 1021 // * = popped on exit 1022 address generate_verify_oop() { 1023 StubCodeMark mark(this, "StubRoutines", "verify_oop"); 1024 address start = __ pc(); 1025 1026 Label exit, error; 1027 1028 __ pushf(); 1029 __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); 1030 1031 __ push(r12); 1032 1033 // save c_rarg2 and c_rarg3 1034 __ push(c_rarg2); 1035 __ push(c_rarg3); 1036 1037 enum { 1038 // After previous pushes. 1039 oop_to_verify = 6 * wordSize, 1040 saved_rax = 7 * wordSize, 1041 saved_r10 = 8 * wordSize, 1042 1043 // Before the call to MacroAssembler::debug(), see below. 1044 return_addr = 16 * wordSize, 1045 error_msg = 17 * wordSize 1046 }; 1047 1048 // get object 1049 __ movptr(rax, Address(rsp, oop_to_verify)); 1050 1051 // make sure object is 'reasonable' 1052 __ testptr(rax, rax); 1053 __ jcc(Assembler::zero, exit); // if obj is NULL it is OK 1054 // Check if the oop is in the right area of memory 1055 __ movptr(c_rarg2, rax); 1056 __ movptr(c_rarg3, (intptr_t) Universe::verify_oop_mask()); 1057 __ andptr(c_rarg2, c_rarg3); 1058 __ movptr(c_rarg3, (intptr_t) Universe::verify_oop_bits()); 1059 __ cmpptr(c_rarg2, c_rarg3); 1060 __ jcc(Assembler::notZero, error); 1061 1062 // set r12 to heapbase for load_klass() 1063 __ reinit_heapbase(); 1064 1065 // make sure klass is 'reasonable', which is not zero. 1066 __ load_klass(rax, rax); // get klass 1067 __ testptr(rax, rax); 1068 __ jcc(Assembler::zero, error); // if klass is NULL it is broken 1069 1070 // return if everything seems ok 1071 __ bind(exit); 1072 __ movptr(rax, Address(rsp, saved_rax)); // get saved rax back 1073 __ movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back 1074 __ pop(c_rarg3); // restore c_rarg3 1075 __ pop(c_rarg2); // restore c_rarg2 1076 __ pop(r12); // restore r12 1077 __ popf(); // restore flags 1078 __ ret(4 * wordSize); // pop caller saved stuff 1079 1080 // handle errors 1081 __ bind(error); 1082 __ movptr(rax, Address(rsp, saved_rax)); // get saved rax back 1083 __ movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back 1084 __ pop(c_rarg3); // get saved c_rarg3 back 1085 __ pop(c_rarg2); // get saved c_rarg2 back 1086 __ pop(r12); // get saved r12 back 1087 __ popf(); // get saved flags off stack -- 1088 // will be ignored 1089 1090 __ pusha(); // push registers 1091 // (rip is already 1092 // already pushed) 1093 // debug(char* msg, int64_t pc, int64_t regs[]) 1094 // We've popped the registers we'd saved (c_rarg3, c_rarg2 and flags), and 1095 // pushed all the registers, so now the stack looks like: 1096 // [tos + 0] 16 saved registers 1097 // [tos + 16] return address 1098 // * [tos + 17] error message (char*) 1099 // * [tos + 18] object to verify (oop) 1100 // * [tos + 19] saved rax - saved by caller and bashed 1101 // * [tos + 20] saved r10 (rscratch1) - saved by caller 1102 // * = popped on exit 1103 1104 __ movptr(c_rarg0, Address(rsp, error_msg)); // pass address of error message 1105 __ movptr(c_rarg1, Address(rsp, return_addr)); // pass return address 1106 __ movq(c_rarg2, rsp); // pass address of regs on stack 1107 __ mov(r12, rsp); // remember rsp 1108 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 1109 __ andptr(rsp, -16); // align stack as required by ABI 1110 BLOCK_COMMENT("call MacroAssembler::debug"); 1111 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 1112 __ mov(rsp, r12); // restore rsp 1113 __ popa(); // pop registers (includes r12) 1114 __ ret(4 * wordSize); // pop caller saved stuff 1115 1116 return start; 1117 } 1118 1119 // 1120 // Verify that a register contains clean 32-bits positive value 1121 // (high 32-bits are 0) so it could be used in 64-bits shifts. 1122 // 1123 // Input: 1124 // Rint - 32-bits value 1125 // Rtmp - scratch 1126 // 1127 void assert_clean_int(Register Rint, Register Rtmp) { 1128 #ifdef ASSERT 1129 Label L; 1130 assert_different_registers(Rtmp, Rint); 1131 __ movslq(Rtmp, Rint); 1132 __ cmpq(Rtmp, Rint); 1133 __ jcc(Assembler::equal, L); 1134 __ stop("high 32-bits of int value are not 0"); 1135 __ bind(L); 1136 #endif 1137 } 1138 1139 // Generate overlap test for array copy stubs 1140 // 1141 // Input: 1142 // c_rarg0 - from 1143 // c_rarg1 - to 1144 // c_rarg2 - element count 1145 // 1146 // Output: 1147 // rax - &from[element count - 1] 1148 // 1149 void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) { 1150 assert(no_overlap_target != NULL, "must be generated"); 1151 array_overlap_test(no_overlap_target, NULL, sf); 1152 } 1153 void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) { 1154 array_overlap_test(NULL, &L_no_overlap, sf); 1155 } 1156 void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf) { 1157 const Register from = c_rarg0; 1158 const Register to = c_rarg1; 1159 const Register count = c_rarg2; 1160 const Register end_from = rax; 1161 1162 __ cmpptr(to, from); 1163 __ lea(end_from, Address(from, count, sf, 0)); 1164 if (NOLp == NULL) { 1165 ExternalAddress no_overlap(no_overlap_target); 1166 __ jump_cc(Assembler::belowEqual, no_overlap); 1167 __ cmpptr(to, end_from); 1168 __ jump_cc(Assembler::aboveEqual, no_overlap); 1169 } else { 1170 __ jcc(Assembler::belowEqual, (*NOLp)); 1171 __ cmpptr(to, end_from); 1172 __ jcc(Assembler::aboveEqual, (*NOLp)); 1173 } 1174 } 1175 1176 // Shuffle first three arg regs on Windows into Linux/Solaris locations. 1177 // 1178 // Outputs: 1179 // rdi - rcx 1180 // rsi - rdx 1181 // rdx - r8 1182 // rcx - r9 1183 // 1184 // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter 1185 // are non-volatile. r9 and r10 should not be used by the caller. 1186 // 1187 void setup_arg_regs(int nargs = 3) { 1188 const Register saved_rdi = r9; 1189 const Register saved_rsi = r10; 1190 assert(nargs == 3 || nargs == 4, "else fix"); 1191 #ifdef _WIN64 1192 assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9, 1193 "unexpected argument registers"); 1194 if (nargs >= 4) 1195 __ mov(rax, r9); // r9 is also saved_rdi 1196 __ movptr(saved_rdi, rdi); 1197 __ movptr(saved_rsi, rsi); 1198 __ mov(rdi, rcx); // c_rarg0 1199 __ mov(rsi, rdx); // c_rarg1 1200 __ mov(rdx, r8); // c_rarg2 1201 if (nargs >= 4) 1202 __ mov(rcx, rax); // c_rarg3 (via rax) 1203 #else 1204 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx, 1205 "unexpected argument registers"); 1206 #endif 1207 } 1208 1209 void restore_arg_regs() { 1210 const Register saved_rdi = r9; 1211 const Register saved_rsi = r10; 1212 #ifdef _WIN64 1213 __ movptr(rdi, saved_rdi); 1214 __ movptr(rsi, saved_rsi); 1215 #endif 1216 } 1217 1218 // Generate code for an array write pre barrier 1219 // 1220 // addr - starting address 1221 // count - element count 1222 // tmp - scratch register 1223 // 1224 // Destroy no registers! 1225 // 1226 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { 1227 BarrierSet* bs = Universe::heap()->barrier_set(); 1228 switch (bs->kind()) { 1229 case BarrierSet::G1SATBCTLogging: 1230 // With G1, don't generate the call if we statically know that the target in uninitialized 1231 if (!dest_uninitialized) { 1232 __ pusha(); // push registers 1233 if (count == c_rarg0) { 1234 if (addr == c_rarg1) { 1235 // exactly backwards!! 1236 __ xchgptr(c_rarg1, c_rarg0); 1237 } else { 1238 __ movptr(c_rarg1, count); 1239 __ movptr(c_rarg0, addr); 1240 } 1241 } else { 1242 __ movptr(c_rarg0, addr); 1243 __ movptr(c_rarg1, count); 1244 } 1245 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2); 1246 __ popa(); 1247 } 1248 break; 1249 case BarrierSet::CardTableModRef: 1250 case BarrierSet::CardTableExtension: 1251 case BarrierSet::ModRef: 1252 break; 1253 default: 1254 ShouldNotReachHere(); 1255 1256 } 1257 } 1258 1259 // 1260 // Generate code for an array write post barrier 1261 // 1262 // Input: 1263 // start - register containing starting address of destination array 1264 // count - elements count 1265 // scratch - scratch register 1266 // 1267 // The input registers are overwritten. 1268 // 1269 void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) { 1270 assert_different_registers(start, count, scratch); 1271 BarrierSet* bs = Universe::heap()->barrier_set(); 1272 switch (bs->kind()) { 1273 case BarrierSet::G1SATBCTLogging: 1274 { 1275 __ pusha(); // push registers (overkill) 1276 if (c_rarg0 == count) { // On win64 c_rarg0 == rcx 1277 assert_different_registers(c_rarg1, start); 1278 __ mov(c_rarg1, count); 1279 __ mov(c_rarg0, start); 1280 } else { 1281 assert_different_registers(c_rarg0, count); 1282 __ mov(c_rarg0, start); 1283 __ mov(c_rarg1, count); 1284 } 1285 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2); 1286 __ popa(); 1287 } 1288 break; 1289 case BarrierSet::CardTableModRef: 1290 case BarrierSet::CardTableExtension: 1291 { 1292 CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs); 1293 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1294 1295 Label L_loop; 1296 const Register end = count; 1297 1298 __ leaq(end, Address(start, count, TIMES_OOP, 0)); // end == start+count*oop_size 1299 __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive 1300 __ shrptr(start, CardTableModRefBS::card_shift); 1301 __ shrptr(end, CardTableModRefBS::card_shift); 1302 __ subptr(end, start); // end --> cards count 1303 1304 int64_t disp = (int64_t) ct->byte_map_base; 1305 __ mov64(scratch, disp); 1306 __ addptr(start, scratch); 1307 __ BIND(L_loop); 1308 __ movb(Address(start, count, Address::times_1), 0); 1309 __ decrement(count); 1310 __ jcc(Assembler::greaterEqual, L_loop); 1311 } 1312 break; 1313 default: 1314 ShouldNotReachHere(); 1315 1316 } 1317 } 1318 1319 1320 // Copy big chunks forward 1321 // 1322 // Inputs: 1323 // end_from - source arrays end address 1324 // end_to - destination array end address 1325 // qword_count - 64-bits element count, negative 1326 // to - scratch 1327 // L_copy_bytes - entry label 1328 // L_copy_8_bytes - exit label 1329 // 1330 void copy_bytes_forward(Register end_from, Register end_to, 1331 Register qword_count, Register to, 1332 Label& L_copy_bytes, Label& L_copy_8_bytes) { 1333 DEBUG_ONLY(__ stop("enter at entry label, not here")); 1334 Label L_loop; 1335 __ align(OptoLoopAlignment); 1336 if (UseUnalignedLoadStores) { 1337 Label L_end; 1338 // Copy 64-bytes per iteration 1339 __ BIND(L_loop); 1340 if (UseAVX > 2) { 1341 __ evmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit); 1342 __ evmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit); 1343 } else if (UseAVX == 2) { 1344 __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); 1345 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); 1346 __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24)); 1347 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); 1348 } else { 1349 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); 1350 __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); 1351 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40)); 1352 __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1); 1353 __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24)); 1354 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2); 1355 __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8)); 1356 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3); 1357 } 1358 __ BIND(L_copy_bytes); 1359 __ addptr(qword_count, 8); 1360 __ jcc(Assembler::lessEqual, L_loop); 1361 __ subptr(qword_count, 4); // sub(8) and add(4) 1362 __ jccb(Assembler::greater, L_end); 1363 // Copy trailing 32 bytes 1364 if (UseAVX >= 2) { 1365 __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); 1366 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); 1367 } else { 1368 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); 1369 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); 1370 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); 1371 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); 1372 } 1373 __ addptr(qword_count, 4); 1374 __ BIND(L_end); 1375 if (UseAVX >= 2) { 1376 // clean upper bits of YMM registers 1377 __ vpxor(xmm0, xmm0); 1378 __ vpxor(xmm1, xmm1); 1379 } 1380 } else { 1381 // Copy 32-bytes per iteration 1382 __ BIND(L_loop); 1383 __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); 1384 __ movq(Address(end_to, qword_count, Address::times_8, -24), to); 1385 __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); 1386 __ movq(Address(end_to, qword_count, Address::times_8, -16), to); 1387 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); 1388 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); 1389 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); 1390 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); 1391 1392 __ BIND(L_copy_bytes); 1393 __ addptr(qword_count, 4); 1394 __ jcc(Assembler::lessEqual, L_loop); 1395 } 1396 __ subptr(qword_count, 4); 1397 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords 1398 } 1399 1400 // Copy big chunks backward 1401 // 1402 // Inputs: 1403 // from - source arrays address 1404 // dest - destination array address 1405 // qword_count - 64-bits element count 1406 // to - scratch 1407 // L_copy_bytes - entry label 1408 // L_copy_8_bytes - exit label 1409 // 1410 void copy_bytes_backward(Register from, Register dest, 1411 Register qword_count, Register to, 1412 Label& L_copy_bytes, Label& L_copy_8_bytes) { 1413 DEBUG_ONLY(__ stop("enter at entry label, not here")); 1414 Label L_loop; 1415 __ align(OptoLoopAlignment); 1416 if (UseUnalignedLoadStores) { 1417 Label L_end; 1418 // Copy 64-bytes per iteration 1419 __ BIND(L_loop); 1420 if (UseAVX > 2) { 1421 __ evmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32), Assembler::AVX_512bit); 1422 __ evmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0, Assembler::AVX_512bit); 1423 } else if (UseAVX == 2) { 1424 __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32)); 1425 __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); 1426 __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); 1427 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); 1428 } else { 1429 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48)); 1430 __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0); 1431 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32)); 1432 __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1); 1433 __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16)); 1434 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2); 1435 __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0)); 1436 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3); 1437 } 1438 __ BIND(L_copy_bytes); 1439 __ subptr(qword_count, 8); 1440 __ jcc(Assembler::greaterEqual, L_loop); 1441 1442 __ addptr(qword_count, 4); // add(8) and sub(4) 1443 __ jccb(Assembler::less, L_end); 1444 // Copy trailing 32 bytes 1445 if (UseAVX >= 2) { 1446 __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0)); 1447 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0); 1448 } else { 1449 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); 1450 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); 1451 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); 1452 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); 1453 } 1454 __ subptr(qword_count, 4); 1455 __ BIND(L_end); 1456 if (UseAVX >= 2) { 1457 // clean upper bits of YMM registers 1458 __ vpxor(xmm0, xmm0); 1459 __ vpxor(xmm1, xmm1); 1460 } 1461 } else { 1462 // Copy 32-bytes per iteration 1463 __ BIND(L_loop); 1464 __ movq(to, Address(from, qword_count, Address::times_8, 24)); 1465 __ movq(Address(dest, qword_count, Address::times_8, 24), to); 1466 __ movq(to, Address(from, qword_count, Address::times_8, 16)); 1467 __ movq(Address(dest, qword_count, Address::times_8, 16), to); 1468 __ movq(to, Address(from, qword_count, Address::times_8, 8)); 1469 __ movq(Address(dest, qword_count, Address::times_8, 8), to); 1470 __ movq(to, Address(from, qword_count, Address::times_8, 0)); 1471 __ movq(Address(dest, qword_count, Address::times_8, 0), to); 1472 1473 __ BIND(L_copy_bytes); 1474 __ subptr(qword_count, 4); 1475 __ jcc(Assembler::greaterEqual, L_loop); 1476 } 1477 __ addptr(qword_count, 4); 1478 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords 1479 } 1480 1481 1482 // Arguments: 1483 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1484 // ignored 1485 // name - stub name string 1486 // 1487 // Inputs: 1488 // c_rarg0 - source array address 1489 // c_rarg1 - destination array address 1490 // c_rarg2 - element count, treated as ssize_t, can be zero 1491 // 1492 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1493 // we let the hardware handle it. The one to eight bytes within words, 1494 // dwords or qwords that span cache line boundaries will still be loaded 1495 // and stored atomically. 1496 // 1497 // Side Effects: 1498 // disjoint_byte_copy_entry is set to the no-overlap entry point 1499 // used by generate_conjoint_byte_copy(). 1500 // 1501 address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { 1502 __ align(CodeEntryAlignment); 1503 StubCodeMark mark(this, "StubRoutines", name); 1504 address start = __ pc(); 1505 1506 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; 1507 Label L_copy_byte, L_exit; 1508 const Register from = rdi; // source array address 1509 const Register to = rsi; // destination array address 1510 const Register count = rdx; // elements count 1511 const Register byte_count = rcx; 1512 const Register qword_count = count; 1513 const Register end_from = from; // source array end address 1514 const Register end_to = to; // destination array end address 1515 // End pointers are inclusive, and if count is not zero they point 1516 // to the last unit copied: end_to[0] := end_from[0] 1517 1518 __ enter(); // required for proper stackwalking of RuntimeStub frame 1519 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1520 1521 if (entry != NULL) { 1522 *entry = __ pc(); 1523 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1524 BLOCK_COMMENT("Entry:"); 1525 } 1526 1527 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1528 // r9 and r10 may be used to save non-volatile registers 1529 1530 // 'from', 'to' and 'count' are now valid 1531 __ movptr(byte_count, count); 1532 __ shrptr(count, 3); // count => qword_count 1533 1534 // Copy from low to high addresses. Use 'to' as scratch. 1535 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1536 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1537 __ negptr(qword_count); // make the count negative 1538 __ jmp(L_copy_bytes); 1539 1540 // Copy trailing qwords 1541 __ BIND(L_copy_8_bytes); 1542 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 1543 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 1544 __ increment(qword_count); 1545 __ jcc(Assembler::notZero, L_copy_8_bytes); 1546 1547 // Check for and copy trailing dword 1548 __ BIND(L_copy_4_bytes); 1549 __ testl(byte_count, 4); 1550 __ jccb(Assembler::zero, L_copy_2_bytes); 1551 __ movl(rax, Address(end_from, 8)); 1552 __ movl(Address(end_to, 8), rax); 1553 1554 __ addptr(end_from, 4); 1555 __ addptr(end_to, 4); 1556 1557 // Check for and copy trailing word 1558 __ BIND(L_copy_2_bytes); 1559 __ testl(byte_count, 2); 1560 __ jccb(Assembler::zero, L_copy_byte); 1561 __ movw(rax, Address(end_from, 8)); 1562 __ movw(Address(end_to, 8), rax); 1563 1564 __ addptr(end_from, 2); 1565 __ addptr(end_to, 2); 1566 1567 // Check for and copy trailing byte 1568 __ BIND(L_copy_byte); 1569 __ testl(byte_count, 1); 1570 __ jccb(Assembler::zero, L_exit); 1571 __ movb(rax, Address(end_from, 8)); 1572 __ movb(Address(end_to, 8), rax); 1573 1574 __ BIND(L_exit); 1575 restore_arg_regs(); 1576 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free 1577 __ xorptr(rax, rax); // return 0 1578 __ leave(); // required for proper stackwalking of RuntimeStub frame 1579 __ ret(0); 1580 1581 // Copy in multi-bytes chunks 1582 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1583 __ jmp(L_copy_4_bytes); 1584 1585 return start; 1586 } 1587 1588 // Arguments: 1589 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1590 // ignored 1591 // name - stub name string 1592 // 1593 // Inputs: 1594 // c_rarg0 - source array address 1595 // c_rarg1 - destination array address 1596 // c_rarg2 - element count, treated as ssize_t, can be zero 1597 // 1598 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1599 // we let the hardware handle it. The one to eight bytes within words, 1600 // dwords or qwords that span cache line boundaries will still be loaded 1601 // and stored atomically. 1602 // 1603 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, 1604 address* entry, const char *name) { 1605 __ align(CodeEntryAlignment); 1606 StubCodeMark mark(this, "StubRoutines", name); 1607 address start = __ pc(); 1608 1609 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; 1610 const Register from = rdi; // source array address 1611 const Register to = rsi; // destination array address 1612 const Register count = rdx; // elements count 1613 const Register byte_count = rcx; 1614 const Register qword_count = count; 1615 1616 __ enter(); // required for proper stackwalking of RuntimeStub frame 1617 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1618 1619 if (entry != NULL) { 1620 *entry = __ pc(); 1621 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1622 BLOCK_COMMENT("Entry:"); 1623 } 1624 1625 array_overlap_test(nooverlap_target, Address::times_1); 1626 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1627 // r9 and r10 may be used to save non-volatile registers 1628 1629 // 'from', 'to' and 'count' are now valid 1630 __ movptr(byte_count, count); 1631 __ shrptr(count, 3); // count => qword_count 1632 1633 // Copy from high to low addresses. 1634 1635 // Check for and copy trailing byte 1636 __ testl(byte_count, 1); 1637 __ jcc(Assembler::zero, L_copy_2_bytes); 1638 __ movb(rax, Address(from, byte_count, Address::times_1, -1)); 1639 __ movb(Address(to, byte_count, Address::times_1, -1), rax); 1640 __ decrement(byte_count); // Adjust for possible trailing word 1641 1642 // Check for and copy trailing word 1643 __ BIND(L_copy_2_bytes); 1644 __ testl(byte_count, 2); 1645 __ jcc(Assembler::zero, L_copy_4_bytes); 1646 __ movw(rax, Address(from, byte_count, Address::times_1, -2)); 1647 __ movw(Address(to, byte_count, Address::times_1, -2), rax); 1648 1649 // Check for and copy trailing dword 1650 __ BIND(L_copy_4_bytes); 1651 __ testl(byte_count, 4); 1652 __ jcc(Assembler::zero, L_copy_bytes); 1653 __ movl(rax, Address(from, qword_count, Address::times_8)); 1654 __ movl(Address(to, qword_count, Address::times_8), rax); 1655 __ jmp(L_copy_bytes); 1656 1657 // Copy trailing qwords 1658 __ BIND(L_copy_8_bytes); 1659 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1660 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1661 __ decrement(qword_count); 1662 __ jcc(Assembler::notZero, L_copy_8_bytes); 1663 1664 restore_arg_regs(); 1665 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free 1666 __ xorptr(rax, rax); // return 0 1667 __ leave(); // required for proper stackwalking of RuntimeStub frame 1668 __ ret(0); 1669 1670 // Copy in multi-bytes chunks 1671 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1672 1673 restore_arg_regs(); 1674 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free 1675 __ xorptr(rax, rax); // return 0 1676 __ leave(); // required for proper stackwalking of RuntimeStub frame 1677 __ ret(0); 1678 1679 return start; 1680 } 1681 1682 // Arguments: 1683 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1684 // ignored 1685 // name - stub name string 1686 // 1687 // Inputs: 1688 // c_rarg0 - source array address 1689 // c_rarg1 - destination array address 1690 // c_rarg2 - element count, treated as ssize_t, can be zero 1691 // 1692 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1693 // let the hardware handle it. The two or four words within dwords 1694 // or qwords that span cache line boundaries will still be loaded 1695 // and stored atomically. 1696 // 1697 // Side Effects: 1698 // disjoint_short_copy_entry is set to the no-overlap entry point 1699 // used by generate_conjoint_short_copy(). 1700 // 1701 address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) { 1702 __ align(CodeEntryAlignment); 1703 StubCodeMark mark(this, "StubRoutines", name); 1704 address start = __ pc(); 1705 1706 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; 1707 const Register from = rdi; // source array address 1708 const Register to = rsi; // destination array address 1709 const Register count = rdx; // elements count 1710 const Register word_count = rcx; 1711 const Register qword_count = count; 1712 const Register end_from = from; // source array end address 1713 const Register end_to = to; // destination array end address 1714 // End pointers are inclusive, and if count is not zero they point 1715 // to the last unit copied: end_to[0] := end_from[0] 1716 1717 __ enter(); // required for proper stackwalking of RuntimeStub frame 1718 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1719 1720 if (entry != NULL) { 1721 *entry = __ pc(); 1722 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1723 BLOCK_COMMENT("Entry:"); 1724 } 1725 1726 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1727 // r9 and r10 may be used to save non-volatile registers 1728 1729 // 'from', 'to' and 'count' are now valid 1730 __ movptr(word_count, count); 1731 __ shrptr(count, 2); // count => qword_count 1732 1733 // Copy from low to high addresses. Use 'to' as scratch. 1734 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1735 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1736 __ negptr(qword_count); 1737 __ jmp(L_copy_bytes); 1738 1739 // Copy trailing qwords 1740 __ BIND(L_copy_8_bytes); 1741 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 1742 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 1743 __ increment(qword_count); 1744 __ jcc(Assembler::notZero, L_copy_8_bytes); 1745 1746 // Original 'dest' is trashed, so we can't use it as a 1747 // base register for a possible trailing word copy 1748 1749 // Check for and copy trailing dword 1750 __ BIND(L_copy_4_bytes); 1751 __ testl(word_count, 2); 1752 __ jccb(Assembler::zero, L_copy_2_bytes); 1753 __ movl(rax, Address(end_from, 8)); 1754 __ movl(Address(end_to, 8), rax); 1755 1756 __ addptr(end_from, 4); 1757 __ addptr(end_to, 4); 1758 1759 // Check for and copy trailing word 1760 __ BIND(L_copy_2_bytes); 1761 __ testl(word_count, 1); 1762 __ jccb(Assembler::zero, L_exit); 1763 __ movw(rax, Address(end_from, 8)); 1764 __ movw(Address(end_to, 8), rax); 1765 1766 __ BIND(L_exit); 1767 restore_arg_regs(); 1768 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free 1769 __ xorptr(rax, rax); // return 0 1770 __ leave(); // required for proper stackwalking of RuntimeStub frame 1771 __ ret(0); 1772 1773 // Copy in multi-bytes chunks 1774 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1775 __ jmp(L_copy_4_bytes); 1776 1777 return start; 1778 } 1779 1780 address generate_fill(BasicType t, bool aligned, const char *name) { 1781 __ align(CodeEntryAlignment); 1782 StubCodeMark mark(this, "StubRoutines", name); 1783 address start = __ pc(); 1784 1785 BLOCK_COMMENT("Entry:"); 1786 1787 const Register to = c_rarg0; // source array address 1788 const Register value = c_rarg1; // value 1789 const Register count = c_rarg2; // elements count 1790 1791 __ enter(); // required for proper stackwalking of RuntimeStub frame 1792 1793 __ generate_fill(t, aligned, to, value, count, rax, xmm0); 1794 1795 __ leave(); // required for proper stackwalking of RuntimeStub frame 1796 __ ret(0); 1797 return start; 1798 } 1799 1800 // Arguments: 1801 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1802 // ignored 1803 // name - stub name string 1804 // 1805 // Inputs: 1806 // c_rarg0 - source array address 1807 // c_rarg1 - destination array address 1808 // c_rarg2 - element count, treated as ssize_t, can be zero 1809 // 1810 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1811 // let the hardware handle it. The two or four words within dwords 1812 // or qwords that span cache line boundaries will still be loaded 1813 // and stored atomically. 1814 // 1815 address generate_conjoint_short_copy(bool aligned, address nooverlap_target, 1816 address *entry, const char *name) { 1817 __ align(CodeEntryAlignment); 1818 StubCodeMark mark(this, "StubRoutines", name); 1819 address start = __ pc(); 1820 1821 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes; 1822 const Register from = rdi; // source array address 1823 const Register to = rsi; // destination array address 1824 const Register count = rdx; // elements count 1825 const Register word_count = rcx; 1826 const Register qword_count = count; 1827 1828 __ enter(); // required for proper stackwalking of RuntimeStub frame 1829 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1830 1831 if (entry != NULL) { 1832 *entry = __ pc(); 1833 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1834 BLOCK_COMMENT("Entry:"); 1835 } 1836 1837 array_overlap_test(nooverlap_target, Address::times_2); 1838 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1839 // r9 and r10 may be used to save non-volatile registers 1840 1841 // 'from', 'to' and 'count' are now valid 1842 __ movptr(word_count, count); 1843 __ shrptr(count, 2); // count => qword_count 1844 1845 // Copy from high to low addresses. Use 'to' as scratch. 1846 1847 // Check for and copy trailing word 1848 __ testl(word_count, 1); 1849 __ jccb(Assembler::zero, L_copy_4_bytes); 1850 __ movw(rax, Address(from, word_count, Address::times_2, -2)); 1851 __ movw(Address(to, word_count, Address::times_2, -2), rax); 1852 1853 // Check for and copy trailing dword 1854 __ BIND(L_copy_4_bytes); 1855 __ testl(word_count, 2); 1856 __ jcc(Assembler::zero, L_copy_bytes); 1857 __ movl(rax, Address(from, qword_count, Address::times_8)); 1858 __ movl(Address(to, qword_count, Address::times_8), rax); 1859 __ jmp(L_copy_bytes); 1860 1861 // Copy trailing qwords 1862 __ BIND(L_copy_8_bytes); 1863 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1864 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1865 __ decrement(qword_count); 1866 __ jcc(Assembler::notZero, L_copy_8_bytes); 1867 1868 restore_arg_regs(); 1869 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free 1870 __ xorptr(rax, rax); // return 0 1871 __ leave(); // required for proper stackwalking of RuntimeStub frame 1872 __ ret(0); 1873 1874 // Copy in multi-bytes chunks 1875 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1876 1877 restore_arg_regs(); 1878 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free 1879 __ xorptr(rax, rax); // return 0 1880 __ leave(); // required for proper stackwalking of RuntimeStub frame 1881 __ ret(0); 1882 1883 return start; 1884 } 1885 1886 // Arguments: 1887 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1888 // ignored 1889 // is_oop - true => oop array, so generate store check code 1890 // name - stub name string 1891 // 1892 // Inputs: 1893 // c_rarg0 - source array address 1894 // c_rarg1 - destination array address 1895 // c_rarg2 - element count, treated as ssize_t, can be zero 1896 // 1897 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1898 // the hardware handle it. The two dwords within qwords that span 1899 // cache line boundaries will still be loaded and stored atomicly. 1900 // 1901 // Side Effects: 1902 // disjoint_int_copy_entry is set to the no-overlap entry point 1903 // used by generate_conjoint_int_oop_copy(). 1904 // 1905 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, 1906 const char *name, bool dest_uninitialized = false) { 1907 __ align(CodeEntryAlignment); 1908 StubCodeMark mark(this, "StubRoutines", name); 1909 address start = __ pc(); 1910 1911 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; 1912 const Register from = rdi; // source array address 1913 const Register to = rsi; // destination array address 1914 const Register count = rdx; // elements count 1915 const Register dword_count = rcx; 1916 const Register qword_count = count; 1917 const Register end_from = from; // source array end address 1918 const Register end_to = to; // destination array end address 1919 const Register saved_to = r11; // saved destination array address 1920 // End pointers are inclusive, and if count is not zero they point 1921 // to the last unit copied: end_to[0] := end_from[0] 1922 1923 __ enter(); // required for proper stackwalking of RuntimeStub frame 1924 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1925 1926 if (entry != NULL) { 1927 *entry = __ pc(); 1928 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1929 BLOCK_COMMENT("Entry:"); 1930 } 1931 1932 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1933 // r9 and r10 may be used to save non-volatile registers 1934 if (is_oop) { 1935 __ movq(saved_to, to); 1936 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); 1937 } 1938 1939 // 'from', 'to' and 'count' are now valid 1940 __ movptr(dword_count, count); 1941 __ shrptr(count, 1); // count => qword_count 1942 1943 // Copy from low to high addresses. Use 'to' as scratch. 1944 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1945 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1946 __ negptr(qword_count); 1947 __ jmp(L_copy_bytes); 1948 1949 // Copy trailing qwords 1950 __ BIND(L_copy_8_bytes); 1951 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 1952 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 1953 __ increment(qword_count); 1954 __ jcc(Assembler::notZero, L_copy_8_bytes); 1955 1956 // Check for and copy trailing dword 1957 __ BIND(L_copy_4_bytes); 1958 __ testl(dword_count, 1); // Only byte test since the value is 0 or 1 1959 __ jccb(Assembler::zero, L_exit); 1960 __ movl(rax, Address(end_from, 8)); 1961 __ movl(Address(end_to, 8), rax); 1962 1963 __ BIND(L_exit); 1964 if (is_oop) { 1965 gen_write_ref_array_post_barrier(saved_to, dword_count, rax); 1966 } 1967 restore_arg_regs(); 1968 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free 1969 __ xorptr(rax, rax); // return 0 1970 __ leave(); // required for proper stackwalking of RuntimeStub frame 1971 __ ret(0); 1972 1973 // Copy in multi-bytes chunks 1974 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1975 __ jmp(L_copy_4_bytes); 1976 1977 return start; 1978 } 1979 1980 // Arguments: 1981 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1982 // ignored 1983 // is_oop - true => oop array, so generate store check code 1984 // name - stub name string 1985 // 1986 // Inputs: 1987 // c_rarg0 - source array address 1988 // c_rarg1 - destination array address 1989 // c_rarg2 - element count, treated as ssize_t, can be zero 1990 // 1991 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1992 // the hardware handle it. The two dwords within qwords that span 1993 // cache line boundaries will still be loaded and stored atomicly. 1994 // 1995 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target, 1996 address *entry, const char *name, 1997 bool dest_uninitialized = false) { 1998 __ align(CodeEntryAlignment); 1999 StubCodeMark mark(this, "StubRoutines", name); 2000 address start = __ pc(); 2001 2002 Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; 2003 const Register from = rdi; // source array address 2004 const Register to = rsi; // destination array address 2005 const Register count = rdx; // elements count 2006 const Register dword_count = rcx; 2007 const Register qword_count = count; 2008 2009 __ enter(); // required for proper stackwalking of RuntimeStub frame 2010 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 2011 2012 if (entry != NULL) { 2013 *entry = __ pc(); 2014 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2015 BLOCK_COMMENT("Entry:"); 2016 } 2017 2018 array_overlap_test(nooverlap_target, Address::times_4); 2019 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 2020 // r9 and r10 may be used to save non-volatile registers 2021 2022 if (is_oop) { 2023 // no registers are destroyed by this call 2024 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); 2025 } 2026 2027 assert_clean_int(count, rax); // Make sure 'count' is clean int. 2028 // 'from', 'to' and 'count' are now valid 2029 __ movptr(dword_count, count); 2030 __ shrptr(count, 1); // count => qword_count 2031 2032 // Copy from high to low addresses. Use 'to' as scratch. 2033 2034 // Check for and copy trailing dword 2035 __ testl(dword_count, 1); 2036 __ jcc(Assembler::zero, L_copy_bytes); 2037 __ movl(rax, Address(from, dword_count, Address::times_4, -4)); 2038 __ movl(Address(to, dword_count, Address::times_4, -4), rax); 2039 __ jmp(L_copy_bytes); 2040 2041 // Copy trailing qwords 2042 __ BIND(L_copy_8_bytes); 2043 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 2044 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 2045 __ decrement(qword_count); 2046 __ jcc(Assembler::notZero, L_copy_8_bytes); 2047 2048 if (is_oop) { 2049 __ jmp(L_exit); 2050 } 2051 restore_arg_regs(); 2052 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free 2053 __ xorptr(rax, rax); // return 0 2054 __ leave(); // required for proper stackwalking of RuntimeStub frame 2055 __ ret(0); 2056 2057 // Copy in multi-bytes chunks 2058 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 2059 2060 __ BIND(L_exit); 2061 if (is_oop) { 2062 gen_write_ref_array_post_barrier(to, dword_count, rax); 2063 } 2064 restore_arg_regs(); 2065 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free 2066 __ xorptr(rax, rax); // return 0 2067 __ leave(); // required for proper stackwalking of RuntimeStub frame 2068 __ ret(0); 2069 2070 return start; 2071 } 2072 2073 // Arguments: 2074 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 2075 // ignored 2076 // is_oop - true => oop array, so generate store check code 2077 // name - stub name string 2078 // 2079 // Inputs: 2080 // c_rarg0 - source array address 2081 // c_rarg1 - destination array address 2082 // c_rarg2 - element count, treated as ssize_t, can be zero 2083 // 2084 // Side Effects: 2085 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the 2086 // no-overlap entry point used by generate_conjoint_long_oop_copy(). 2087 // 2088 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, 2089 const char *name, bool dest_uninitialized = false) { 2090 __ align(CodeEntryAlignment); 2091 StubCodeMark mark(this, "StubRoutines", name); 2092 address start = __ pc(); 2093 2094 Label L_copy_bytes, L_copy_8_bytes, L_exit; 2095 const Register from = rdi; // source array address 2096 const Register to = rsi; // destination array address 2097 const Register qword_count = rdx; // elements count 2098 const Register end_from = from; // source array end address 2099 const Register end_to = rcx; // destination array end address 2100 const Register saved_to = to; 2101 const Register saved_count = r11; 2102 // End pointers are inclusive, and if count is not zero they point 2103 // to the last unit copied: end_to[0] := end_from[0] 2104 2105 __ enter(); // required for proper stackwalking of RuntimeStub frame 2106 // Save no-overlap entry point for generate_conjoint_long_oop_copy() 2107 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 2108 2109 if (entry != NULL) { 2110 *entry = __ pc(); 2111 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2112 BLOCK_COMMENT("Entry:"); 2113 } 2114 2115 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 2116 // r9 and r10 may be used to save non-volatile registers 2117 // 'from', 'to' and 'qword_count' are now valid 2118 if (is_oop) { 2119 // Save to and count for store barrier 2120 __ movptr(saved_count, qword_count); 2121 // no registers are destroyed by this call 2122 gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized); 2123 } 2124 2125 // Copy from low to high addresses. Use 'to' as scratch. 2126 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 2127 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 2128 __ negptr(qword_count); 2129 __ jmp(L_copy_bytes); 2130 2131 // Copy trailing qwords 2132 __ BIND(L_copy_8_bytes); 2133 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 2134 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 2135 __ increment(qword_count); 2136 __ jcc(Assembler::notZero, L_copy_8_bytes); 2137 2138 if (is_oop) { 2139 __ jmp(L_exit); 2140 } else { 2141 restore_arg_regs(); 2142 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2143 __ xorptr(rax, rax); // return 0 2144 __ leave(); // required for proper stackwalking of RuntimeStub frame 2145 __ ret(0); 2146 } 2147 2148 // Copy in multi-bytes chunks 2149 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 2150 2151 if (is_oop) { 2152 __ BIND(L_exit); 2153 gen_write_ref_array_post_barrier(saved_to, saved_count, rax); 2154 } 2155 restore_arg_regs(); 2156 if (is_oop) { 2157 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free 2158 } else { 2159 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2160 } 2161 __ xorptr(rax, rax); // return 0 2162 __ leave(); // required for proper stackwalking of RuntimeStub frame 2163 __ ret(0); 2164 2165 return start; 2166 } 2167 2168 // Arguments: 2169 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 2170 // ignored 2171 // is_oop - true => oop array, so generate store check code 2172 // name - stub name string 2173 // 2174 // Inputs: 2175 // c_rarg0 - source array address 2176 // c_rarg1 - destination array address 2177 // c_rarg2 - element count, treated as ssize_t, can be zero 2178 // 2179 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, 2180 address nooverlap_target, address *entry, 2181 const char *name, bool dest_uninitialized = false) { 2182 __ align(CodeEntryAlignment); 2183 StubCodeMark mark(this, "StubRoutines", name); 2184 address start = __ pc(); 2185 2186 Label L_copy_bytes, L_copy_8_bytes, L_exit; 2187 const Register from = rdi; // source array address 2188 const Register to = rsi; // destination array address 2189 const Register qword_count = rdx; // elements count 2190 const Register saved_count = rcx; 2191 2192 __ enter(); // required for proper stackwalking of RuntimeStub frame 2193 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 2194 2195 if (entry != NULL) { 2196 *entry = __ pc(); 2197 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2198 BLOCK_COMMENT("Entry:"); 2199 } 2200 2201 array_overlap_test(nooverlap_target, Address::times_8); 2202 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 2203 // r9 and r10 may be used to save non-volatile registers 2204 // 'from', 'to' and 'qword_count' are now valid 2205 if (is_oop) { 2206 // Save to and count for store barrier 2207 __ movptr(saved_count, qword_count); 2208 // No registers are destroyed by this call 2209 gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized); 2210 } 2211 2212 __ jmp(L_copy_bytes); 2213 2214 // Copy trailing qwords 2215 __ BIND(L_copy_8_bytes); 2216 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 2217 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 2218 __ decrement(qword_count); 2219 __ jcc(Assembler::notZero, L_copy_8_bytes); 2220 2221 if (is_oop) { 2222 __ jmp(L_exit); 2223 } else { 2224 restore_arg_regs(); 2225 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2226 __ xorptr(rax, rax); // return 0 2227 __ leave(); // required for proper stackwalking of RuntimeStub frame 2228 __ ret(0); 2229 } 2230 2231 // Copy in multi-bytes chunks 2232 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 2233 2234 if (is_oop) { 2235 __ BIND(L_exit); 2236 gen_write_ref_array_post_barrier(to, saved_count, rax); 2237 } 2238 restore_arg_regs(); 2239 if (is_oop) { 2240 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free 2241 } else { 2242 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2243 } 2244 __ xorptr(rax, rax); // return 0 2245 __ leave(); // required for proper stackwalking of RuntimeStub frame 2246 __ ret(0); 2247 2248 return start; 2249 } 2250 2251 2252 // Helper for generating a dynamic type check. 2253 // Smashes no registers. 2254 void generate_type_check(Register sub_klass, 2255 Register super_check_offset, 2256 Register super_klass, 2257 Label& L_success) { 2258 assert_different_registers(sub_klass, super_check_offset, super_klass); 2259 2260 BLOCK_COMMENT("type_check:"); 2261 2262 Label L_miss; 2263 2264 __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, 2265 super_check_offset); 2266 __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); 2267 2268 // Fall through on failure! 2269 __ BIND(L_miss); 2270 } 2271 2272 // 2273 // Generate checkcasting array copy stub 2274 // 2275 // Input: 2276 // c_rarg0 - source array address 2277 // c_rarg1 - destination array address 2278 // c_rarg2 - element count, treated as ssize_t, can be zero 2279 // c_rarg3 - size_t ckoff (super_check_offset) 2280 // not Win64 2281 // c_rarg4 - oop ckval (super_klass) 2282 // Win64 2283 // rsp+40 - oop ckval (super_klass) 2284 // 2285 // Output: 2286 // rax == 0 - success 2287 // rax == -1^K - failure, where K is partial transfer count 2288 // 2289 address generate_checkcast_copy(const char *name, address *entry, 2290 bool dest_uninitialized = false) { 2291 2292 Label L_load_element, L_store_element, L_do_card_marks, L_done; 2293 2294 // Input registers (after setup_arg_regs) 2295 const Register from = rdi; // source array address 2296 const Register to = rsi; // destination array address 2297 const Register length = rdx; // elements count 2298 const Register ckoff = rcx; // super_check_offset 2299 const Register ckval = r8; // super_klass 2300 2301 // Registers used as temps (r13, r14 are save-on-entry) 2302 const Register end_from = from; // source array end address 2303 const Register end_to = r13; // destination array end address 2304 const Register count = rdx; // -(count_remaining) 2305 const Register r14_length = r14; // saved copy of length 2306 // End pointers are inclusive, and if length is not zero they point 2307 // to the last unit copied: end_to[0] := end_from[0] 2308 2309 const Register rax_oop = rax; // actual oop copied 2310 const Register r11_klass = r11; // oop._klass 2311 2312 //--------------------------------------------------------------- 2313 // Assembler stub will be used for this call to arraycopy 2314 // if the two arrays are subtypes of Object[] but the 2315 // destination array type is not equal to or a supertype 2316 // of the source type. Each element must be separately 2317 // checked. 2318 2319 __ align(CodeEntryAlignment); 2320 StubCodeMark mark(this, "StubRoutines", name); 2321 address start = __ pc(); 2322 2323 __ enter(); // required for proper stackwalking of RuntimeStub frame 2324 2325 #ifdef ASSERT 2326 // caller guarantees that the arrays really are different 2327 // otherwise, we would have to make conjoint checks 2328 { Label L; 2329 array_overlap_test(L, TIMES_OOP); 2330 __ stop("checkcast_copy within a single array"); 2331 __ bind(L); 2332 } 2333 #endif //ASSERT 2334 2335 setup_arg_regs(4); // from => rdi, to => rsi, length => rdx 2336 // ckoff => rcx, ckval => r8 2337 // r9 and r10 may be used to save non-volatile registers 2338 #ifdef _WIN64 2339 // last argument (#4) is on stack on Win64 2340 __ movptr(ckval, Address(rsp, 6 * wordSize)); 2341 #endif 2342 2343 // Caller of this entry point must set up the argument registers. 2344 if (entry != NULL) { 2345 *entry = __ pc(); 2346 BLOCK_COMMENT("Entry:"); 2347 } 2348 2349 // allocate spill slots for r13, r14 2350 enum { 2351 saved_r13_offset, 2352 saved_r14_offset, 2353 saved_rbp_offset 2354 }; 2355 __ subptr(rsp, saved_rbp_offset * wordSize); 2356 __ movptr(Address(rsp, saved_r13_offset * wordSize), r13); 2357 __ movptr(Address(rsp, saved_r14_offset * wordSize), r14); 2358 2359 // check that int operands are properly extended to size_t 2360 assert_clean_int(length, rax); 2361 assert_clean_int(ckoff, rax); 2362 2363 #ifdef ASSERT 2364 BLOCK_COMMENT("assert consistent ckoff/ckval"); 2365 // The ckoff and ckval must be mutually consistent, 2366 // even though caller generates both. 2367 { Label L; 2368 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2369 __ cmpl(ckoff, Address(ckval, sco_offset)); 2370 __ jcc(Assembler::equal, L); 2371 __ stop("super_check_offset inconsistent"); 2372 __ bind(L); 2373 } 2374 #endif //ASSERT 2375 2376 // Loop-invariant addresses. They are exclusive end pointers. 2377 Address end_from_addr(from, length, TIMES_OOP, 0); 2378 Address end_to_addr(to, length, TIMES_OOP, 0); 2379 // Loop-variant addresses. They assume post-incremented count < 0. 2380 Address from_element_addr(end_from, count, TIMES_OOP, 0); 2381 Address to_element_addr(end_to, count, TIMES_OOP, 0); 2382 2383 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); 2384 2385 // Copy from low to high addresses, indexed from the end of each array. 2386 __ lea(end_from, end_from_addr); 2387 __ lea(end_to, end_to_addr); 2388 __ movptr(r14_length, length); // save a copy of the length 2389 assert(length == count, ""); // else fix next line: 2390 __ negptr(count); // negate and test the length 2391 __ jcc(Assembler::notZero, L_load_element); 2392 2393 // Empty array: Nothing to do. 2394 __ xorptr(rax, rax); // return 0 on (trivial) success 2395 __ jmp(L_done); 2396 2397 // ======== begin loop ======== 2398 // (Loop is rotated; its entry is L_load_element.) 2399 // Loop control: 2400 // for (count = -count; count != 0; count++) 2401 // Base pointers src, dst are biased by 8*(count-1),to last element. 2402 __ align(OptoLoopAlignment); 2403 2404 __ BIND(L_store_element); 2405 __ store_heap_oop(to_element_addr, rax_oop); // store the oop 2406 __ increment(count); // increment the count toward zero 2407 __ jcc(Assembler::zero, L_do_card_marks); 2408 2409 // ======== loop entry is here ======== 2410 __ BIND(L_load_element); 2411 __ load_heap_oop(rax_oop, from_element_addr); // load the oop 2412 __ testptr(rax_oop, rax_oop); 2413 __ jcc(Assembler::zero, L_store_element); 2414 2415 __ load_klass(r11_klass, rax_oop);// query the object klass 2416 generate_type_check(r11_klass, ckoff, ckval, L_store_element); 2417 // ======== end loop ======== 2418 2419 // It was a real error; we must depend on the caller to finish the job. 2420 // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. 2421 // Emit GC store barriers for the oops we have copied (r14 + rdx), 2422 // and report their number to the caller. 2423 assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1); 2424 Label L_post_barrier; 2425 __ addptr(r14_length, count); // K = (original - remaining) oops 2426 __ movptr(rax, r14_length); // save the value 2427 __ notptr(rax); // report (-1^K) to caller (does not affect flags) 2428 __ jccb(Assembler::notZero, L_post_barrier); 2429 __ jmp(L_done); // K == 0, nothing was copied, skip post barrier 2430 2431 // Come here on success only. 2432 __ BIND(L_do_card_marks); 2433 __ xorptr(rax, rax); // return 0 on success 2434 2435 __ BIND(L_post_barrier); 2436 gen_write_ref_array_post_barrier(to, r14_length, rscratch1); 2437 2438 // Common exit point (success or failure). 2439 __ BIND(L_done); 2440 __ movptr(r13, Address(rsp, saved_r13_offset * wordSize)); 2441 __ movptr(r14, Address(rsp, saved_r14_offset * wordSize)); 2442 restore_arg_regs(); 2443 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free 2444 __ leave(); // required for proper stackwalking of RuntimeStub frame 2445 __ ret(0); 2446 2447 return start; 2448 } 2449 2450 // 2451 // Generate 'unsafe' array copy stub 2452 // Though just as safe as the other stubs, it takes an unscaled 2453 // size_t argument instead of an element count. 2454 // 2455 // Input: 2456 // c_rarg0 - source array address 2457 // c_rarg1 - destination array address 2458 // c_rarg2 - byte count, treated as ssize_t, can be zero 2459 // 2460 // Examines the alignment of the operands and dispatches 2461 // to a long, int, short, or byte copy loop. 2462 // 2463 address generate_unsafe_copy(const char *name, 2464 address byte_copy_entry, address short_copy_entry, 2465 address int_copy_entry, address long_copy_entry) { 2466 2467 Label L_long_aligned, L_int_aligned, L_short_aligned; 2468 2469 // Input registers (before setup_arg_regs) 2470 const Register from = c_rarg0; // source array address 2471 const Register to = c_rarg1; // destination array address 2472 const Register size = c_rarg2; // byte count (size_t) 2473 2474 // Register used as a temp 2475 const Register bits = rax; // test copy of low bits 2476 2477 __ align(CodeEntryAlignment); 2478 StubCodeMark mark(this, "StubRoutines", name); 2479 address start = __ pc(); 2480 2481 __ enter(); // required for proper stackwalking of RuntimeStub frame 2482 2483 // bump this on entry, not on exit: 2484 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); 2485 2486 __ mov(bits, from); 2487 __ orptr(bits, to); 2488 __ orptr(bits, size); 2489 2490 __ testb(bits, BytesPerLong-1); 2491 __ jccb(Assembler::zero, L_long_aligned); 2492 2493 __ testb(bits, BytesPerInt-1); 2494 __ jccb(Assembler::zero, L_int_aligned); 2495 2496 __ testb(bits, BytesPerShort-1); 2497 __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); 2498 2499 __ BIND(L_short_aligned); 2500 __ shrptr(size, LogBytesPerShort); // size => short_count 2501 __ jump(RuntimeAddress(short_copy_entry)); 2502 2503 __ BIND(L_int_aligned); 2504 __ shrptr(size, LogBytesPerInt); // size => int_count 2505 __ jump(RuntimeAddress(int_copy_entry)); 2506 2507 __ BIND(L_long_aligned); 2508 __ shrptr(size, LogBytesPerLong); // size => qword_count 2509 __ jump(RuntimeAddress(long_copy_entry)); 2510 2511 return start; 2512 } 2513 2514 // Perform range checks on the proposed arraycopy. 2515 // Kills temp, but nothing else. 2516 // Also, clean the sign bits of src_pos and dst_pos. 2517 void arraycopy_range_checks(Register src, // source array oop (c_rarg0) 2518 Register src_pos, // source position (c_rarg1) 2519 Register dst, // destination array oo (c_rarg2) 2520 Register dst_pos, // destination position (c_rarg3) 2521 Register length, 2522 Register temp, 2523 Label& L_failed) { 2524 BLOCK_COMMENT("arraycopy_range_checks:"); 2525 2526 // if (src_pos + length > arrayOop(src)->length()) FAIL; 2527 __ movl(temp, length); 2528 __ addl(temp, src_pos); // src_pos + length 2529 __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes())); 2530 __ jcc(Assembler::above, L_failed); 2531 2532 // if (dst_pos + length > arrayOop(dst)->length()) FAIL; 2533 __ movl(temp, length); 2534 __ addl(temp, dst_pos); // dst_pos + length 2535 __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes())); 2536 __ jcc(Assembler::above, L_failed); 2537 2538 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. 2539 // Move with sign extension can be used since they are positive. 2540 __ movslq(src_pos, src_pos); 2541 __ movslq(dst_pos, dst_pos); 2542 2543 BLOCK_COMMENT("arraycopy_range_checks done"); 2544 } 2545 2546 // 2547 // Generate generic array copy stubs 2548 // 2549 // Input: 2550 // c_rarg0 - src oop 2551 // c_rarg1 - src_pos (32-bits) 2552 // c_rarg2 - dst oop 2553 // c_rarg3 - dst_pos (32-bits) 2554 // not Win64 2555 // c_rarg4 - element count (32-bits) 2556 // Win64 2557 // rsp+40 - element count (32-bits) 2558 // 2559 // Output: 2560 // rax == 0 - success 2561 // rax == -1^K - failure, where K is partial transfer count 2562 // 2563 address generate_generic_copy(const char *name, 2564 address byte_copy_entry, address short_copy_entry, 2565 address int_copy_entry, address oop_copy_entry, 2566 address long_copy_entry, address checkcast_copy_entry) { 2567 2568 Label L_failed, L_failed_0, L_objArray; 2569 Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; 2570 2571 // Input registers 2572 const Register src = c_rarg0; // source array oop 2573 const Register src_pos = c_rarg1; // source position 2574 const Register dst = c_rarg2; // destination array oop 2575 const Register dst_pos = c_rarg3; // destination position 2576 #ifndef _WIN64 2577 const Register length = c_rarg4; 2578 #else 2579 const Address length(rsp, 6 * wordSize); // elements count is on stack on Win64 2580 #endif 2581 2582 { int modulus = CodeEntryAlignment; 2583 int target = modulus - 5; // 5 = sizeof jmp(L_failed) 2584 int advance = target - (__ offset() % modulus); 2585 if (advance < 0) advance += modulus; 2586 if (advance > 0) __ nop(advance); 2587 } 2588 StubCodeMark mark(this, "StubRoutines", name); 2589 2590 // Short-hop target to L_failed. Makes for denser prologue code. 2591 __ BIND(L_failed_0); 2592 __ jmp(L_failed); 2593 assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); 2594 2595 __ align(CodeEntryAlignment); 2596 address start = __ pc(); 2597 2598 __ enter(); // required for proper stackwalking of RuntimeStub frame 2599 2600 // bump this on entry, not on exit: 2601 inc_counter_np(SharedRuntime::_generic_array_copy_ctr); 2602 2603 //----------------------------------------------------------------------- 2604 // Assembler stub will be used for this call to arraycopy 2605 // if the following conditions are met: 2606 // 2607 // (1) src and dst must not be null. 2608 // (2) src_pos must not be negative. 2609 // (3) dst_pos must not be negative. 2610 // (4) length must not be negative. 2611 // (5) src klass and dst klass should be the same and not NULL. 2612 // (6) src and dst should be arrays. 2613 // (7) src_pos + length must not exceed length of src. 2614 // (8) dst_pos + length must not exceed length of dst. 2615 // 2616 2617 // if (src == NULL) return -1; 2618 __ testptr(src, src); // src oop 2619 size_t j1off = __ offset(); 2620 __ jccb(Assembler::zero, L_failed_0); 2621 2622 // if (src_pos < 0) return -1; 2623 __ testl(src_pos, src_pos); // src_pos (32-bits) 2624 __ jccb(Assembler::negative, L_failed_0); 2625 2626 // if (dst == NULL) return -1; 2627 __ testptr(dst, dst); // dst oop 2628 __ jccb(Assembler::zero, L_failed_0); 2629 2630 // if (dst_pos < 0) return -1; 2631 __ testl(dst_pos, dst_pos); // dst_pos (32-bits) 2632 size_t j4off = __ offset(); 2633 __ jccb(Assembler::negative, L_failed_0); 2634 2635 // The first four tests are very dense code, 2636 // but not quite dense enough to put four 2637 // jumps in a 16-byte instruction fetch buffer. 2638 // That's good, because some branch predicters 2639 // do not like jumps so close together. 2640 // Make sure of this. 2641 guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps"); 2642 2643 // registers used as temp 2644 const Register r11_length = r11; // elements count to copy 2645 const Register r10_src_klass = r10; // array klass 2646 2647 // if (length < 0) return -1; 2648 __ movl(r11_length, length); // length (elements count, 32-bits value) 2649 __ testl(r11_length, r11_length); 2650 __ jccb(Assembler::negative, L_failed_0); 2651 2652 __ load_klass(r10_src_klass, src); 2653 #ifdef ASSERT 2654 // assert(src->klass() != NULL); 2655 { 2656 BLOCK_COMMENT("assert klasses not null {"); 2657 Label L1, L2; 2658 __ testptr(r10_src_klass, r10_src_klass); 2659 __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL 2660 __ bind(L1); 2661 __ stop("broken null klass"); 2662 __ bind(L2); 2663 __ load_klass(rax, dst); 2664 __ cmpq(rax, 0); 2665 __ jcc(Assembler::equal, L1); // this would be broken also 2666 BLOCK_COMMENT("} assert klasses not null done"); 2667 } 2668 #endif 2669 2670 // Load layout helper (32-bits) 2671 // 2672 // |array_tag| | header_size | element_type | |log2_element_size| 2673 // 32 30 24 16 8 2 0 2674 // 2675 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 2676 // 2677 2678 const int lh_offset = in_bytes(Klass::layout_helper_offset()); 2679 2680 // Handle objArrays completely differently... 2681 const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); 2682 __ cmpl(Address(r10_src_klass, lh_offset), objArray_lh); 2683 __ jcc(Assembler::equal, L_objArray); 2684 2685 // if (src->klass() != dst->klass()) return -1; 2686 __ load_klass(rax, dst); 2687 __ cmpq(r10_src_klass, rax); 2688 __ jcc(Assembler::notEqual, L_failed); 2689 2690 const Register rax_lh = rax; // layout helper 2691 __ movl(rax_lh, Address(r10_src_klass, lh_offset)); 2692 2693 // if (!src->is_Array()) return -1; 2694 __ cmpl(rax_lh, Klass::_lh_neutral_value); 2695 __ jcc(Assembler::greaterEqual, L_failed); 2696 2697 // At this point, it is known to be a typeArray (array_tag 0x3). 2698 #ifdef ASSERT 2699 { 2700 BLOCK_COMMENT("assert primitive array {"); 2701 Label L; 2702 __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); 2703 __ jcc(Assembler::greaterEqual, L); 2704 __ stop("must be a primitive array"); 2705 __ bind(L); 2706 BLOCK_COMMENT("} assert primitive array done"); 2707 } 2708 #endif 2709 2710 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 2711 r10, L_failed); 2712 2713 // TypeArrayKlass 2714 // 2715 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); 2716 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); 2717 // 2718 2719 const Register r10_offset = r10; // array offset 2720 const Register rax_elsize = rax_lh; // element size 2721 2722 __ movl(r10_offset, rax_lh); 2723 __ shrl(r10_offset, Klass::_lh_header_size_shift); 2724 __ andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset 2725 __ addptr(src, r10_offset); // src array offset 2726 __ addptr(dst, r10_offset); // dst array offset 2727 BLOCK_COMMENT("choose copy loop based on element size"); 2728 __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize 2729 2730 // next registers should be set before the jump to corresponding stub 2731 const Register from = c_rarg0; // source array address 2732 const Register to = c_rarg1; // destination array address 2733 const Register count = c_rarg2; // elements count 2734 2735 // 'from', 'to', 'count' registers should be set in such order 2736 // since they are the same as 'src', 'src_pos', 'dst'. 2737 2738 __ BIND(L_copy_bytes); 2739 __ cmpl(rax_elsize, 0); 2740 __ jccb(Assembler::notEqual, L_copy_shorts); 2741 __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr 2742 __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr 2743 __ movl2ptr(count, r11_length); // length 2744 __ jump(RuntimeAddress(byte_copy_entry)); 2745 2746 __ BIND(L_copy_shorts); 2747 __ cmpl(rax_elsize, LogBytesPerShort); 2748 __ jccb(Assembler::notEqual, L_copy_ints); 2749 __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr 2750 __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr 2751 __ movl2ptr(count, r11_length); // length 2752 __ jump(RuntimeAddress(short_copy_entry)); 2753 2754 __ BIND(L_copy_ints); 2755 __ cmpl(rax_elsize, LogBytesPerInt); 2756 __ jccb(Assembler::notEqual, L_copy_longs); 2757 __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr 2758 __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr 2759 __ movl2ptr(count, r11_length); // length 2760 __ jump(RuntimeAddress(int_copy_entry)); 2761 2762 __ BIND(L_copy_longs); 2763 #ifdef ASSERT 2764 { 2765 BLOCK_COMMENT("assert long copy {"); 2766 Label L; 2767 __ cmpl(rax_elsize, LogBytesPerLong); 2768 __ jcc(Assembler::equal, L); 2769 __ stop("must be long copy, but elsize is wrong"); 2770 __ bind(L); 2771 BLOCK_COMMENT("} assert long copy done"); 2772 } 2773 #endif 2774 __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr 2775 __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr 2776 __ movl2ptr(count, r11_length); // length 2777 __ jump(RuntimeAddress(long_copy_entry)); 2778 2779 // ObjArrayKlass 2780 __ BIND(L_objArray); 2781 // live at this point: r10_src_klass, r11_length, src[_pos], dst[_pos] 2782 2783 Label L_plain_copy, L_checkcast_copy; 2784 // test array classes for subtyping 2785 __ load_klass(rax, dst); 2786 __ cmpq(r10_src_klass, rax); // usual case is exact equality 2787 __ jcc(Assembler::notEqual, L_checkcast_copy); 2788 2789 // Identically typed arrays can be copied without element-wise checks. 2790 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 2791 r10, L_failed); 2792 2793 __ lea(from, Address(src, src_pos, TIMES_OOP, 2794 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr 2795 __ lea(to, Address(dst, dst_pos, TIMES_OOP, 2796 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr 2797 __ movl2ptr(count, r11_length); // length 2798 __ BIND(L_plain_copy); 2799 __ jump(RuntimeAddress(oop_copy_entry)); 2800 2801 __ BIND(L_checkcast_copy); 2802 // live at this point: r10_src_klass, r11_length, rax (dst_klass) 2803 { 2804 // Before looking at dst.length, make sure dst is also an objArray. 2805 __ cmpl(Address(rax, lh_offset), objArray_lh); 2806 __ jcc(Assembler::notEqual, L_failed); 2807 2808 // It is safe to examine both src.length and dst.length. 2809 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 2810 rax, L_failed); 2811 2812 const Register r11_dst_klass = r11; 2813 __ load_klass(r11_dst_klass, dst); // reload 2814 2815 // Marshal the base address arguments now, freeing registers. 2816 __ lea(from, Address(src, src_pos, TIMES_OOP, 2817 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 2818 __ lea(to, Address(dst, dst_pos, TIMES_OOP, 2819 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 2820 __ movl(count, length); // length (reloaded) 2821 Register sco_temp = c_rarg3; // this register is free now 2822 assert_different_registers(from, to, count, sco_temp, 2823 r11_dst_klass, r10_src_klass); 2824 assert_clean_int(count, sco_temp); 2825 2826 // Generate the type check. 2827 const int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2828 __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 2829 assert_clean_int(sco_temp, rax); 2830 generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); 2831 2832 // Fetch destination element klass from the ObjArrayKlass header. 2833 int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); 2834 __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); 2835 __ movl( sco_temp, Address(r11_dst_klass, sco_offset)); 2836 assert_clean_int(sco_temp, rax); 2837 2838 // the checkcast_copy loop needs two extra arguments: 2839 assert(c_rarg3 == sco_temp, "#3 already in place"); 2840 // Set up arguments for checkcast_copy_entry. 2841 setup_arg_regs(4); 2842 __ movptr(r8, r11_dst_klass); // dst.klass.element_klass, r8 is c_rarg4 on Linux/Solaris 2843 __ jump(RuntimeAddress(checkcast_copy_entry)); 2844 } 2845 2846 __ BIND(L_failed); 2847 __ xorptr(rax, rax); 2848 __ notptr(rax); // return -1 2849 __ leave(); // required for proper stackwalking of RuntimeStub frame 2850 __ ret(0); 2851 2852 return start; 2853 } 2854 2855 void generate_arraycopy_stubs() { 2856 address entry; 2857 address entry_jbyte_arraycopy; 2858 address entry_jshort_arraycopy; 2859 address entry_jint_arraycopy; 2860 address entry_oop_arraycopy; 2861 address entry_jlong_arraycopy; 2862 address entry_checkcast_arraycopy; 2863 2864 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, 2865 "jbyte_disjoint_arraycopy"); 2866 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy, 2867 "jbyte_arraycopy"); 2868 2869 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, 2870 "jshort_disjoint_arraycopy"); 2871 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy, 2872 "jshort_arraycopy"); 2873 2874 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, &entry, 2875 "jint_disjoint_arraycopy"); 2876 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, entry, 2877 &entry_jint_arraycopy, "jint_arraycopy"); 2878 2879 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, &entry, 2880 "jlong_disjoint_arraycopy"); 2881 StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, entry, 2882 &entry_jlong_arraycopy, "jlong_arraycopy"); 2883 2884 2885 if (UseCompressedOops) { 2886 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, &entry, 2887 "oop_disjoint_arraycopy"); 2888 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, entry, 2889 &entry_oop_arraycopy, "oop_arraycopy"); 2890 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, &entry, 2891 "oop_disjoint_arraycopy_uninit", 2892 /*dest_uninitialized*/true); 2893 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, entry, 2894 NULL, "oop_arraycopy_uninit", 2895 /*dest_uninitialized*/true); 2896 } else { 2897 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, &entry, 2898 "oop_disjoint_arraycopy"); 2899 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, entry, 2900 &entry_oop_arraycopy, "oop_arraycopy"); 2901 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, &entry, 2902 "oop_disjoint_arraycopy_uninit", 2903 /*dest_uninitialized*/true); 2904 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, entry, 2905 NULL, "oop_arraycopy_uninit", 2906 /*dest_uninitialized*/true); 2907 } 2908 2909 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); 2910 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, 2911 /*dest_uninitialized*/true); 2912 2913 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", 2914 entry_jbyte_arraycopy, 2915 entry_jshort_arraycopy, 2916 entry_jint_arraycopy, 2917 entry_jlong_arraycopy); 2918 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", 2919 entry_jbyte_arraycopy, 2920 entry_jshort_arraycopy, 2921 entry_jint_arraycopy, 2922 entry_oop_arraycopy, 2923 entry_jlong_arraycopy, 2924 entry_checkcast_arraycopy); 2925 2926 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); 2927 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); 2928 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); 2929 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); 2930 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); 2931 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); 2932 2933 // We don't generate specialized code for HeapWord-aligned source 2934 // arrays, so just use the code we've already generated 2935 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; 2936 StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; 2937 2938 StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; 2939 StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; 2940 2941 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; 2942 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; 2943 2944 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; 2945 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; 2946 2947 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; 2948 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; 2949 2950 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; 2951 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; 2952 } 2953 2954 void generate_math_stubs() { 2955 { 2956 StubCodeMark mark(this, "StubRoutines", "log"); 2957 StubRoutines::_intrinsic_log = (double (*)(double)) __ pc(); 2958 2959 __ subq(rsp, 8); 2960 __ movdbl(Address(rsp, 0), xmm0); 2961 __ fld_d(Address(rsp, 0)); 2962 __ flog(); 2963 __ fstp_d(Address(rsp, 0)); 2964 __ movdbl(xmm0, Address(rsp, 0)); 2965 __ addq(rsp, 8); 2966 __ ret(0); 2967 } 2968 { 2969 StubCodeMark mark(this, "StubRoutines", "log10"); 2970 StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc(); 2971 2972 __ subq(rsp, 8); 2973 __ movdbl(Address(rsp, 0), xmm0); 2974 __ fld_d(Address(rsp, 0)); 2975 __ flog10(); 2976 __ fstp_d(Address(rsp, 0)); 2977 __ movdbl(xmm0, Address(rsp, 0)); 2978 __ addq(rsp, 8); 2979 __ ret(0); 2980 } 2981 { 2982 StubCodeMark mark(this, "StubRoutines", "sin"); 2983 StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc(); 2984 2985 __ subq(rsp, 8); 2986 __ movdbl(Address(rsp, 0), xmm0); 2987 __ fld_d(Address(rsp, 0)); 2988 __ trigfunc('s'); 2989 __ fstp_d(Address(rsp, 0)); 2990 __ movdbl(xmm0, Address(rsp, 0)); 2991 __ addq(rsp, 8); 2992 __ ret(0); 2993 } 2994 { 2995 StubCodeMark mark(this, "StubRoutines", "cos"); 2996 StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc(); 2997 2998 __ subq(rsp, 8); 2999 __ movdbl(Address(rsp, 0), xmm0); 3000 __ fld_d(Address(rsp, 0)); 3001 __ trigfunc('c'); 3002 __ fstp_d(Address(rsp, 0)); 3003 __ movdbl(xmm0, Address(rsp, 0)); 3004 __ addq(rsp, 8); 3005 __ ret(0); 3006 } 3007 { 3008 StubCodeMark mark(this, "StubRoutines", "tan"); 3009 StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); 3010 3011 __ subq(rsp, 8); 3012 __ movdbl(Address(rsp, 0), xmm0); 3013 __ fld_d(Address(rsp, 0)); 3014 __ trigfunc('t'); 3015 __ fstp_d(Address(rsp, 0)); 3016 __ movdbl(xmm0, Address(rsp, 0)); 3017 __ addq(rsp, 8); 3018 __ ret(0); 3019 } 3020 { 3021 StubCodeMark mark(this, "StubRoutines", "exp"); 3022 StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc(); 3023 3024 __ subq(rsp, 8); 3025 __ movdbl(Address(rsp, 0), xmm0); 3026 __ fld_d(Address(rsp, 0)); 3027 __ exp_with_fallback(0); 3028 __ fstp_d(Address(rsp, 0)); 3029 __ movdbl(xmm0, Address(rsp, 0)); 3030 __ addq(rsp, 8); 3031 __ ret(0); 3032 } 3033 { 3034 StubCodeMark mark(this, "StubRoutines", "pow"); 3035 StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc(); 3036 3037 __ subq(rsp, 8); 3038 __ movdbl(Address(rsp, 0), xmm1); 3039 __ fld_d(Address(rsp, 0)); 3040 __ movdbl(Address(rsp, 0), xmm0); 3041 __ fld_d(Address(rsp, 0)); 3042 __ pow_with_fallback(0); 3043 __ fstp_d(Address(rsp, 0)); 3044 __ movdbl(xmm0, Address(rsp, 0)); 3045 __ addq(rsp, 8); 3046 __ ret(0); 3047 } 3048 } 3049 3050 // AES intrinsic stubs 3051 enum {AESBlockSize = 16}; 3052 3053 address generate_key_shuffle_mask() { 3054 __ align(16); 3055 StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); 3056 address start = __ pc(); 3057 __ emit_data64( 0x0405060700010203, relocInfo::none ); 3058 __ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none ); 3059 return start; 3060 } 3061 3062 // Utility routine for loading a 128-bit key word in little endian format 3063 // can optionally specify that the shuffle mask is already in an xmmregister 3064 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { 3065 __ movdqu(xmmdst, Address(key, offset)); 3066 if (xmm_shuf_mask != NULL) { 3067 __ pshufb(xmmdst, xmm_shuf_mask); 3068 } else { 3069 __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 3070 } 3071 } 3072 3073 // Arguments: 3074 // 3075 // Inputs: 3076 // c_rarg0 - source byte array address 3077 // c_rarg1 - destination byte array address 3078 // c_rarg2 - K (key) in little endian int array 3079 // 3080 address generate_aescrypt_encryptBlock() { 3081 assert(UseAES, "need AES instructions and misaligned SSE support"); 3082 __ align(CodeEntryAlignment); 3083 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); 3084 Label L_doLast; 3085 address start = __ pc(); 3086 3087 const Register from = c_rarg0; // source array address 3088 const Register to = c_rarg1; // destination array address 3089 const Register key = c_rarg2; // key array address 3090 const Register keylen = rax; 3091 3092 const XMMRegister xmm_result = xmm0; 3093 const XMMRegister xmm_key_shuf_mask = xmm1; 3094 // On win64 xmm6-xmm15 must be preserved so don't use them. 3095 const XMMRegister xmm_temp1 = xmm2; 3096 const XMMRegister xmm_temp2 = xmm3; 3097 const XMMRegister xmm_temp3 = xmm4; 3098 const XMMRegister xmm_temp4 = xmm5; 3099 3100 __ enter(); // required for proper stackwalking of RuntimeStub frame 3101 3102 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} 3103 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 3104 3105 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 3106 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input 3107 3108 // For encryption, the java expanded key ordering is just what we need 3109 // we don't know if the key is aligned, hence not using load-execute form 3110 3111 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); 3112 __ pxor(xmm_result, xmm_temp1); 3113 3114 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); 3115 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); 3116 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); 3117 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); 3118 3119 __ aesenc(xmm_result, xmm_temp1); 3120 __ aesenc(xmm_result, xmm_temp2); 3121 __ aesenc(xmm_result, xmm_temp3); 3122 __ aesenc(xmm_result, xmm_temp4); 3123 3124 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); 3125 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); 3126 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); 3127 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); 3128 3129 __ aesenc(xmm_result, xmm_temp1); 3130 __ aesenc(xmm_result, xmm_temp2); 3131 __ aesenc(xmm_result, xmm_temp3); 3132 __ aesenc(xmm_result, xmm_temp4); 3133 3134 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); 3135 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); 3136 3137 __ cmpl(keylen, 44); 3138 __ jccb(Assembler::equal, L_doLast); 3139 3140 __ aesenc(xmm_result, xmm_temp1); 3141 __ aesenc(xmm_result, xmm_temp2); 3142 3143 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); 3144 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); 3145 3146 __ cmpl(keylen, 52); 3147 __ jccb(Assembler::equal, L_doLast); 3148 3149 __ aesenc(xmm_result, xmm_temp1); 3150 __ aesenc(xmm_result, xmm_temp2); 3151 3152 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); 3153 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); 3154 3155 __ BIND(L_doLast); 3156 __ aesenc(xmm_result, xmm_temp1); 3157 __ aesenclast(xmm_result, xmm_temp2); 3158 __ movdqu(Address(to, 0), xmm_result); // store the result 3159 __ xorptr(rax, rax); // return 0 3160 __ leave(); // required for proper stackwalking of RuntimeStub frame 3161 __ ret(0); 3162 3163 return start; 3164 } 3165 3166 3167 // Arguments: 3168 // 3169 // Inputs: 3170 // c_rarg0 - source byte array address 3171 // c_rarg1 - destination byte array address 3172 // c_rarg2 - K (key) in little endian int array 3173 // 3174 address generate_aescrypt_decryptBlock() { 3175 assert(UseAES, "need AES instructions and misaligned SSE support"); 3176 __ align(CodeEntryAlignment); 3177 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); 3178 Label L_doLast; 3179 address start = __ pc(); 3180 3181 const Register from = c_rarg0; // source array address 3182 const Register to = c_rarg1; // destination array address 3183 const Register key = c_rarg2; // key array address 3184 const Register keylen = rax; 3185 3186 const XMMRegister xmm_result = xmm0; 3187 const XMMRegister xmm_key_shuf_mask = xmm1; 3188 // On win64 xmm6-xmm15 must be preserved so don't use them. 3189 const XMMRegister xmm_temp1 = xmm2; 3190 const XMMRegister xmm_temp2 = xmm3; 3191 const XMMRegister xmm_temp3 = xmm4; 3192 const XMMRegister xmm_temp4 = xmm5; 3193 3194 __ enter(); // required for proper stackwalking of RuntimeStub frame 3195 3196 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} 3197 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 3198 3199 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 3200 __ movdqu(xmm_result, Address(from, 0)); 3201 3202 // for decryption java expanded key ordering is rotated one position from what we want 3203 // so we start from 0x10 here and hit 0x00 last 3204 // we don't know if the key is aligned, hence not using load-execute form 3205 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); 3206 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); 3207 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); 3208 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); 3209 3210 __ pxor (xmm_result, xmm_temp1); 3211 __ aesdec(xmm_result, xmm_temp2); 3212 __ aesdec(xmm_result, xmm_temp3); 3213 __ aesdec(xmm_result, xmm_temp4); 3214 3215 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); 3216 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); 3217 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); 3218 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); 3219 3220 __ aesdec(xmm_result, xmm_temp1); 3221 __ aesdec(xmm_result, xmm_temp2); 3222 __ aesdec(xmm_result, xmm_temp3); 3223 __ aesdec(xmm_result, xmm_temp4); 3224 3225 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); 3226 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); 3227 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); 3228 3229 __ cmpl(keylen, 44); 3230 __ jccb(Assembler::equal, L_doLast); 3231 3232 __ aesdec(xmm_result, xmm_temp1); 3233 __ aesdec(xmm_result, xmm_temp2); 3234 3235 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); 3236 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); 3237 3238 __ cmpl(keylen, 52); 3239 __ jccb(Assembler::equal, L_doLast); 3240 3241 __ aesdec(xmm_result, xmm_temp1); 3242 __ aesdec(xmm_result, xmm_temp2); 3243 3244 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); 3245 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); 3246 3247 __ BIND(L_doLast); 3248 __ aesdec(xmm_result, xmm_temp1); 3249 __ aesdec(xmm_result, xmm_temp2); 3250 3251 // for decryption the aesdeclast operation is always on key+0x00 3252 __ aesdeclast(xmm_result, xmm_temp3); 3253 __ movdqu(Address(to, 0), xmm_result); // store the result 3254 __ xorptr(rax, rax); // return 0 3255 __ leave(); // required for proper stackwalking of RuntimeStub frame 3256 __ ret(0); 3257 3258 return start; 3259 } 3260 3261 3262 // Arguments: 3263 // 3264 // Inputs: 3265 // c_rarg0 - source byte array address 3266 // c_rarg1 - destination byte array address 3267 // c_rarg2 - K (key) in little endian int array 3268 // c_rarg3 - r vector byte array address 3269 // c_rarg4 - input length 3270 // 3271 // Output: 3272 // rax - input length 3273 // 3274 address generate_cipherBlockChaining_encryptAESCrypt() { 3275 assert(UseAES, "need AES instructions and misaligned SSE support"); 3276 __ align(CodeEntryAlignment); 3277 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); 3278 address start = __ pc(); 3279 3280 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; 3281 const Register from = c_rarg0; // source array address 3282 const Register to = c_rarg1; // destination array address 3283 const Register key = c_rarg2; // key array address 3284 const Register rvec = c_rarg3; // r byte array initialized from initvector array address 3285 // and left with the results of the last encryption block 3286 #ifndef _WIN64 3287 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) 3288 #else 3289 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 3290 const Register len_reg = r10; // pick the first volatile windows register 3291 #endif 3292 const Register pos = rax; 3293 3294 // xmm register assignments for the loops below 3295 const XMMRegister xmm_result = xmm0; 3296 const XMMRegister xmm_temp = xmm1; 3297 // keys 0-10 preloaded into xmm2-xmm12 3298 const int XMM_REG_NUM_KEY_FIRST = 2; 3299 const int XMM_REG_NUM_KEY_LAST = 15; 3300 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); 3301 const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10); 3302 const XMMRegister xmm_key11 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11); 3303 const XMMRegister xmm_key12 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12); 3304 const XMMRegister xmm_key13 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13); 3305 3306 __ enter(); // required for proper stackwalking of RuntimeStub frame 3307 3308 #ifdef _WIN64 3309 // on win64, fill len_reg from stack position 3310 __ movl(len_reg, len_mem); 3311 // save the xmm registers which must be preserved 6-15 3312 __ subptr(rsp, -rsp_after_call_off * wordSize); 3313 for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { 3314 __ movdqu(xmm_save(i), as_XMMRegister(i)); 3315 } 3316 #else 3317 __ push(len_reg); // Save 3318 #endif 3319 3320 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front 3321 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 3322 // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0 3323 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) { 3324 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); 3325 offset += 0x10; 3326 } 3327 __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec 3328 3329 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) 3330 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 3331 __ cmpl(rax, 44); 3332 __ jcc(Assembler::notEqual, L_key_192_256); 3333 3334 // 128 bit code follows here 3335 __ movptr(pos, 0); 3336 __ align(OptoLoopAlignment); 3337 3338 __ BIND(L_loopTop_128); 3339 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 3340 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 3341 __ pxor (xmm_result, xmm_key0); // do the aes rounds 3342 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) { 3343 __ aesenc(xmm_result, as_XMMRegister(rnum)); 3344 } 3345 __ aesenclast(xmm_result, xmm_key10); 3346 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 3347 // no need to store r to memory until we exit 3348 __ addptr(pos, AESBlockSize); 3349 __ subptr(len_reg, AESBlockSize); 3350 __ jcc(Assembler::notEqual, L_loopTop_128); 3351 3352 __ BIND(L_exit); 3353 __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object 3354 3355 #ifdef _WIN64 3356 // restore xmm regs belonging to calling function 3357 for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { 3358 __ movdqu(as_XMMRegister(i), xmm_save(i)); 3359 } 3360 __ movl(rax, len_mem); 3361 #else 3362 __ pop(rax); // return length 3363 #endif 3364 __ leave(); // required for proper stackwalking of RuntimeStub frame 3365 __ ret(0); 3366 3367 __ BIND(L_key_192_256); 3368 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) 3369 load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask); 3370 load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask); 3371 __ cmpl(rax, 52); 3372 __ jcc(Assembler::notEqual, L_key_256); 3373 3374 // 192-bit code follows here (could be changed to use more xmm registers) 3375 __ movptr(pos, 0); 3376 __ align(OptoLoopAlignment); 3377 3378 __ BIND(L_loopTop_192); 3379 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 3380 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 3381 __ pxor (xmm_result, xmm_key0); // do the aes rounds 3382 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) { 3383 __ aesenc(xmm_result, as_XMMRegister(rnum)); 3384 } 3385 __ aesenclast(xmm_result, xmm_key12); 3386 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 3387 // no need to store r to memory until we exit 3388 __ addptr(pos, AESBlockSize); 3389 __ subptr(len_reg, AESBlockSize); 3390 __ jcc(Assembler::notEqual, L_loopTop_192); 3391 __ jmp(L_exit); 3392 3393 __ BIND(L_key_256); 3394 // 256-bit code follows here (could be changed to use more xmm registers) 3395 load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask); 3396 __ movptr(pos, 0); 3397 __ align(OptoLoopAlignment); 3398 3399 __ BIND(L_loopTop_256); 3400 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 3401 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 3402 __ pxor (xmm_result, xmm_key0); // do the aes rounds 3403 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) { 3404 __ aesenc(xmm_result, as_XMMRegister(rnum)); 3405 } 3406 load_key(xmm_temp, key, 0xe0); 3407 __ aesenclast(xmm_result, xmm_temp); 3408 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 3409 // no need to store r to memory until we exit 3410 __ addptr(pos, AESBlockSize); 3411 __ subptr(len_reg, AESBlockSize); 3412 __ jcc(Assembler::notEqual, L_loopTop_256); 3413 __ jmp(L_exit); 3414 3415 return start; 3416 } 3417 3418 // Safefetch stubs. 3419 void generate_safefetch(const char* name, int size, address* entry, 3420 address* fault_pc, address* continuation_pc) { 3421 // safefetch signatures: 3422 // int SafeFetch32(int* adr, int errValue); 3423 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); 3424 // 3425 // arguments: 3426 // c_rarg0 = adr 3427 // c_rarg1 = errValue 3428 // 3429 // result: 3430 // PPC_RET = *adr or errValue 3431 3432 StubCodeMark mark(this, "StubRoutines", name); 3433 3434 // Entry point, pc or function descriptor. 3435 *entry = __ pc(); 3436 3437 // Load *adr into c_rarg1, may fault. 3438 *fault_pc = __ pc(); 3439 switch (size) { 3440 case 4: 3441 // int32_t 3442 __ movl(c_rarg1, Address(c_rarg0, 0)); 3443 break; 3444 case 8: 3445 // int64_t 3446 __ movq(c_rarg1, Address(c_rarg0, 0)); 3447 break; 3448 default: 3449 ShouldNotReachHere(); 3450 } 3451 3452 // return errValue or *adr 3453 *continuation_pc = __ pc(); 3454 __ movq(rax, c_rarg1); 3455 __ ret(0); 3456 } 3457 3458 // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time 3459 // to hide instruction latency 3460 // 3461 // Arguments: 3462 // 3463 // Inputs: 3464 // c_rarg0 - source byte array address 3465 // c_rarg1 - destination byte array address 3466 // c_rarg2 - K (key) in little endian int array 3467 // c_rarg3 - r vector byte array address 3468 // c_rarg4 - input length 3469 // 3470 // Output: 3471 // rax - input length 3472 // 3473 3474 address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { 3475 assert(UseAES, "need AES instructions and misaligned SSE support"); 3476 __ align(CodeEntryAlignment); 3477 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); 3478 address start = __ pc(); 3479 3480 Label L_exit, L_key_192_256, L_key_256; 3481 Label L_singleBlock_loopTop_128, L_multiBlock_loopTop_128; 3482 Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; 3483 const Register from = c_rarg0; // source array address 3484 const Register to = c_rarg1; // destination array address 3485 const Register key = c_rarg2; // key array address 3486 const Register rvec = c_rarg3; // r byte array initialized from initvector array address 3487 // and left with the results of the last encryption block 3488 #ifndef _WIN64 3489 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) 3490 #else 3491 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 3492 const Register len_reg = r10; // pick the first volatile windows register 3493 #endif 3494 const Register pos = rax; 3495 3496 // keys 0-10 preloaded into xmm2-xmm12 3497 const int XMM_REG_NUM_KEY_FIRST = 5; 3498 const int XMM_REG_NUM_KEY_LAST = 15; 3499 const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); 3500 const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST); 3501 3502 __ enter(); // required for proper stackwalking of RuntimeStub frame 3503 3504 #ifdef _WIN64 3505 // on win64, fill len_reg from stack position 3506 __ movl(len_reg, len_mem); 3507 // save the xmm registers which must be preserved 6-15 3508 __ subptr(rsp, -rsp_after_call_off * wordSize); 3509 for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { 3510 __ movdqu(xmm_save(i), as_XMMRegister(i)); 3511 } 3512 #else 3513 __ push(len_reg); // Save 3514 #endif 3515 3516 // the java expanded key ordering is rotated one position from what we want 3517 // so we start from 0x10 here and hit 0x00 last 3518 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front 3519 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 3520 // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00 3521 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) { 3522 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); 3523 offset += 0x10; 3524 } 3525 load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask); 3526 3527 const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block 3528 3529 // registers holding the four results in the parallelized loop 3530 const XMMRegister xmm_result0 = xmm0; 3531 const XMMRegister xmm_result1 = xmm2; 3532 const XMMRegister xmm_result2 = xmm3; 3533 const XMMRegister xmm_result3 = xmm4; 3534 3535 __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec 3536 3537 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) 3538 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 3539 __ cmpl(rax, 44); 3540 __ jcc(Assembler::notEqual, L_key_192_256); 3541 3542 3543 // 128-bit code follows here, parallelized 3544 __ movptr(pos, 0); 3545 __ align(OptoLoopAlignment); 3546 __ BIND(L_multiBlock_loopTop_128); 3547 __ cmpptr(len_reg, 4*AESBlockSize); // see if at least 4 blocks left 3548 __ jcc(Assembler::less, L_singleBlock_loopTop_128); 3549 3550 __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0*AESBlockSize)); // get next 4 blocks into xmmresult registers 3551 __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1*AESBlockSize)); 3552 __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2*AESBlockSize)); 3553 __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3*AESBlockSize)); 3554 3555 #define DoFour(opc, src_reg) \ 3556 __ opc(xmm_result0, src_reg); \ 3557 __ opc(xmm_result1, src_reg); \ 3558 __ opc(xmm_result2, src_reg); \ 3559 __ opc(xmm_result3, src_reg); 3560 3561 DoFour(pxor, xmm_key_first); 3562 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { 3563 DoFour(aesdec, as_XMMRegister(rnum)); 3564 } 3565 DoFour(aesdeclast, xmm_key_last); 3566 // for each result, xor with the r vector of previous cipher block 3567 __ pxor(xmm_result0, xmm_prev_block_cipher); 3568 __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0*AESBlockSize)); 3569 __ pxor(xmm_result1, xmm_prev_block_cipher); 3570 __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1*AESBlockSize)); 3571 __ pxor(xmm_result2, xmm_prev_block_cipher); 3572 __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2*AESBlockSize)); 3573 __ pxor(xmm_result3, xmm_prev_block_cipher); 3574 __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3*AESBlockSize)); // this will carry over to next set of blocks 3575 3576 __ movdqu(Address(to, pos, Address::times_1, 0*AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output 3577 __ movdqu(Address(to, pos, Address::times_1, 1*AESBlockSize), xmm_result1); 3578 __ movdqu(Address(to, pos, Address::times_1, 2*AESBlockSize), xmm_result2); 3579 __ movdqu(Address(to, pos, Address::times_1, 3*AESBlockSize), xmm_result3); 3580 3581 __ addptr(pos, 4*AESBlockSize); 3582 __ subptr(len_reg, 4*AESBlockSize); 3583 __ jmp(L_multiBlock_loopTop_128); 3584 3585 // registers used in the non-parallelized loops 3586 // xmm register assignments for the loops below 3587 const XMMRegister xmm_result = xmm0; 3588 const XMMRegister xmm_prev_block_cipher_save = xmm2; 3589 const XMMRegister xmm_key11 = xmm3; 3590 const XMMRegister xmm_key12 = xmm4; 3591 const XMMRegister xmm_temp = xmm4; 3592 3593 __ align(OptoLoopAlignment); 3594 __ BIND(L_singleBlock_loopTop_128); 3595 __ cmpptr(len_reg, 0); // any blocks left?? 3596 __ jcc(Assembler::equal, L_exit); 3597 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 3598 __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector 3599 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 3600 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { 3601 __ aesdec(xmm_result, as_XMMRegister(rnum)); 3602 } 3603 __ aesdeclast(xmm_result, xmm_key_last); 3604 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector 3605 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 3606 // no need to store r to memory until we exit 3607 __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block 3608 3609 __ addptr(pos, AESBlockSize); 3610 __ subptr(len_reg, AESBlockSize); 3611 __ jmp(L_singleBlock_loopTop_128); 3612 3613 3614 __ BIND(L_exit); 3615 __ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object 3616 #ifdef _WIN64 3617 // restore regs belonging to calling function 3618 for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { 3619 __ movdqu(as_XMMRegister(i), xmm_save(i)); 3620 } 3621 __ movl(rax, len_mem); 3622 #else 3623 __ pop(rax); // return length 3624 #endif 3625 __ leave(); // required for proper stackwalking of RuntimeStub frame 3626 __ ret(0); 3627 3628 3629 __ BIND(L_key_192_256); 3630 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) 3631 load_key(xmm_key11, key, 0xb0); 3632 __ cmpl(rax, 52); 3633 __ jcc(Assembler::notEqual, L_key_256); 3634 3635 // 192-bit code follows here (could be optimized to use parallelism) 3636 load_key(xmm_key12, key, 0xc0); // 192-bit key goes up to c0 3637 __ movptr(pos, 0); 3638 __ align(OptoLoopAlignment); 3639 3640 __ BIND(L_singleBlock_loopTop_192); 3641 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 3642 __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector 3643 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 3644 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { 3645 __ aesdec(xmm_result, as_XMMRegister(rnum)); 3646 } 3647 __ aesdec(xmm_result, xmm_key11); 3648 __ aesdec(xmm_result, xmm_key12); 3649 __ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0 3650 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector 3651 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 3652 // no need to store r to memory until we exit 3653 __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block 3654 __ addptr(pos, AESBlockSize); 3655 __ subptr(len_reg, AESBlockSize); 3656 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); 3657 __ jmp(L_exit); 3658 3659 __ BIND(L_key_256); 3660 // 256-bit code follows here (could be optimized to use parallelism) 3661 __ movptr(pos, 0); 3662 __ align(OptoLoopAlignment); 3663 3664 __ BIND(L_singleBlock_loopTop_256); 3665 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 3666 __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector 3667 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 3668 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { 3669 __ aesdec(xmm_result, as_XMMRegister(rnum)); 3670 } 3671 __ aesdec(xmm_result, xmm_key11); 3672 load_key(xmm_temp, key, 0xc0); 3673 __ aesdec(xmm_result, xmm_temp); 3674 load_key(xmm_temp, key, 0xd0); 3675 __ aesdec(xmm_result, xmm_temp); 3676 load_key(xmm_temp, key, 0xe0); // 256-bit key goes up to e0 3677 __ aesdec(xmm_result, xmm_temp); 3678 __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0 3679 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector 3680 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 3681 // no need to store r to memory until we exit 3682 __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block 3683 __ addptr(pos, AESBlockSize); 3684 __ subptr(len_reg, AESBlockSize); 3685 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); 3686 __ jmp(L_exit); 3687 3688 return start; 3689 } 3690 3691 /** 3692 * Arguments: 3693 * 3694 * Inputs: 3695 * c_rarg0 - int crc 3696 * c_rarg1 - byte* buf 3697 * c_rarg2 - int length 3698 * 3699 * Ouput: 3700 * rax - int crc result 3701 */ 3702 address generate_updateBytesCRC32() { 3703 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); 3704 3705 __ align(CodeEntryAlignment); 3706 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); 3707 3708 address start = __ pc(); 3709 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) 3710 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) 3711 // rscratch1: r10 3712 const Register crc = c_rarg0; // crc 3713 const Register buf = c_rarg1; // source java byte array address 3714 const Register len = c_rarg2; // length 3715 const Register table = c_rarg3; // crc_table address (reuse register) 3716 const Register tmp = r11; 3717 assert_different_registers(crc, buf, len, table, tmp, rax); 3718 3719 BLOCK_COMMENT("Entry:"); 3720 __ enter(); // required for proper stackwalking of RuntimeStub frame 3721 3722 __ kernel_crc32(crc, buf, len, table, tmp); 3723 3724 __ movl(rax, crc); 3725 __ leave(); // required for proper stackwalking of RuntimeStub frame 3726 __ ret(0); 3727 3728 return start; 3729 } 3730 3731 3732 /** 3733 * Arguments: 3734 * 3735 * Input: 3736 * c_rarg0 - x address 3737 * c_rarg1 - x length 3738 * c_rarg2 - y address 3739 * c_rarg3 - y lenth 3740 * not Win64 3741 * c_rarg4 - z address 3742 * c_rarg5 - z length 3743 * Win64 3744 * rsp+40 - z address 3745 * rsp+48 - z length 3746 */ 3747 address generate_multiplyToLen() { 3748 __ align(CodeEntryAlignment); 3749 StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); 3750 3751 address start = __ pc(); 3752 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) 3753 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) 3754 const Register x = rdi; 3755 const Register xlen = rax; 3756 const Register y = rsi; 3757 const Register ylen = rcx; 3758 const Register z = r8; 3759 const Register zlen = r11; 3760 3761 // Next registers will be saved on stack in multiply_to_len(). 3762 const Register tmp1 = r12; 3763 const Register tmp2 = r13; 3764 const Register tmp3 = r14; 3765 const Register tmp4 = r15; 3766 const Register tmp5 = rbx; 3767 3768 BLOCK_COMMENT("Entry:"); 3769 __ enter(); // required for proper stackwalking of RuntimeStub frame 3770 3771 #ifndef _WIN64 3772 __ movptr(zlen, r9); // Save r9 in r11 - zlen 3773 #endif 3774 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx 3775 // ylen => rcx, z => r8, zlen => r11 3776 // r9 and r10 may be used to save non-volatile registers 3777 #ifdef _WIN64 3778 // last 2 arguments (#4, #5) are on stack on Win64 3779 __ movptr(z, Address(rsp, 6 * wordSize)); 3780 __ movptr(zlen, Address(rsp, 7 * wordSize)); 3781 #endif 3782 3783 __ movptr(xlen, rsi); 3784 __ movptr(y, rdx); 3785 __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5); 3786 3787 restore_arg_regs(); 3788 3789 __ leave(); // required for proper stackwalking of RuntimeStub frame 3790 __ ret(0); 3791 3792 return start; 3793 } 3794 3795 #undef __ 3796 #define __ masm-> 3797 3798 // Continuation point for throwing of implicit exceptions that are 3799 // not handled in the current activation. Fabricates an exception 3800 // oop and initiates normal exception dispatching in this 3801 // frame. Since we need to preserve callee-saved values (currently 3802 // only for C2, but done for C1 as well) we need a callee-saved oop 3803 // map and therefore have to make these stubs into RuntimeStubs 3804 // rather than BufferBlobs. If the compiler needs all registers to 3805 // be preserved between the fault point and the exception handler 3806 // then it must assume responsibility for that in 3807 // AbstractCompiler::continuation_for_implicit_null_exception or 3808 // continuation_for_implicit_division_by_zero_exception. All other 3809 // implicit exceptions (e.g., NullPointerException or 3810 // AbstractMethodError on entry) are either at call sites or 3811 // otherwise assume that stack unwinding will be initiated, so 3812 // caller saved registers were assumed volatile in the compiler. 3813 address generate_throw_exception(const char* name, 3814 address runtime_entry, 3815 Register arg1 = noreg, 3816 Register arg2 = noreg) { 3817 // Information about frame layout at time of blocking runtime call. 3818 // Note that we only have to preserve callee-saved registers since 3819 // the compilers are responsible for supplying a continuation point 3820 // if they expect all registers to be preserved. 3821 enum layout { 3822 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 3823 rbp_off2, 3824 return_off, 3825 return_off2, 3826 framesize // inclusive of return address 3827 }; 3828 3829 int insts_size = 512; 3830 int locs_size = 64; 3831 3832 CodeBuffer code(name, insts_size, locs_size); 3833 OopMapSet* oop_maps = new OopMapSet(); 3834 MacroAssembler* masm = new MacroAssembler(&code); 3835 3836 address start = __ pc(); 3837 3838 // This is an inlined and slightly modified version of call_VM 3839 // which has the ability to fetch the return PC out of 3840 // thread-local storage and also sets up last_Java_sp slightly 3841 // differently than the real call_VM 3842 3843 __ enter(); // required for proper stackwalking of RuntimeStub frame 3844 3845 assert(is_even(framesize/2), "sp not 16-byte aligned"); 3846 3847 // return address and rbp are already in place 3848 __ subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog 3849 3850 int frame_complete = __ pc() - start; 3851 3852 // Set up last_Java_sp and last_Java_fp 3853 address the_pc = __ pc(); 3854 __ set_last_Java_frame(rsp, rbp, the_pc); 3855 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 3856 3857 // Call runtime 3858 if (arg1 != noreg) { 3859 assert(arg2 != c_rarg1, "clobbered"); 3860 __ movptr(c_rarg1, arg1); 3861 } 3862 if (arg2 != noreg) { 3863 __ movptr(c_rarg2, arg2); 3864 } 3865 __ movptr(c_rarg0, r15_thread); 3866 BLOCK_COMMENT("call runtime_entry"); 3867 __ call(RuntimeAddress(runtime_entry)); 3868 3869 // Generate oop map 3870 OopMap* map = new OopMap(framesize, 0); 3871 3872 oop_maps->add_gc_map(the_pc - start, map); 3873 3874 __ reset_last_Java_frame(true, true); 3875 3876 __ leave(); // required for proper stackwalking of RuntimeStub frame 3877 3878 // check for pending exceptions 3879 #ifdef ASSERT 3880 Label L; 3881 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), 3882 (int32_t) NULL_WORD); 3883 __ jcc(Assembler::notEqual, L); 3884 __ should_not_reach_here(); 3885 __ bind(L); 3886 #endif // ASSERT 3887 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3888 3889 3890 // codeBlob framesize is in words (not VMRegImpl::slot_size) 3891 RuntimeStub* stub = 3892 RuntimeStub::new_runtime_stub(name, 3893 &code, 3894 frame_complete, 3895 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 3896 oop_maps, false); 3897 return stub->entry_point(); 3898 } 3899 3900 void create_control_words() { 3901 // Round to nearest, 53-bit mode, exceptions masked 3902 StubRoutines::_fpu_cntrl_wrd_std = 0x027F; 3903 // Round to zero, 53-bit mode, exception mased 3904 StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F; 3905 // Round to nearest, 24-bit mode, exceptions masked 3906 StubRoutines::_fpu_cntrl_wrd_24 = 0x007F; 3907 // Round to nearest, 64-bit mode, exceptions masked 3908 StubRoutines::_fpu_cntrl_wrd_64 = 0x037F; 3909 // Round to nearest, 64-bit mode, exceptions masked 3910 StubRoutines::_mxcsr_std = 0x1F80; 3911 // Note: the following two constants are 80-bit values 3912 // layout is critical for correct loading by FPU. 3913 // Bias for strict fp multiply/divide 3914 StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000 3915 StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000; 3916 StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff; 3917 // Un-Bias for strict fp multiply/divide 3918 StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000 3919 StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000; 3920 StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff; 3921 } 3922 3923 // Initialization 3924 void generate_initial() { 3925 // Generates all stubs and initializes the entry points 3926 3927 // This platform-specific settings are needed by generate_call_stub() 3928 create_control_words(); 3929 3930 // entry points that exist in all platforms Note: This is code 3931 // that could be shared among different platforms - however the 3932 // benefit seems to be smaller than the disadvantage of having a 3933 // much more complicated generator structure. See also comment in 3934 // stubRoutines.hpp. 3935 3936 StubRoutines::_forward_exception_entry = generate_forward_exception(); 3937 3938 StubRoutines::_call_stub_entry = 3939 generate_call_stub(StubRoutines::_call_stub_return_address); 3940 3941 // is referenced by megamorphic call 3942 StubRoutines::_catch_exception_entry = generate_catch_exception(); 3943 3944 // atomic calls 3945 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); 3946 StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr(); 3947 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); 3948 StubRoutines::_atomic_cmpxchg_byte_entry = generate_atomic_cmpxchg_byte(); 3949 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); 3950 StubRoutines::_atomic_add_entry = generate_atomic_add(); 3951 StubRoutines::_atomic_add_ptr_entry = generate_atomic_add_ptr(); 3952 StubRoutines::_fence_entry = generate_orderaccess_fence(); 3953 3954 StubRoutines::_handler_for_unsafe_access_entry = 3955 generate_handler_for_unsafe_access(); 3956 3957 // platform dependent 3958 StubRoutines::x86::_get_previous_fp_entry = generate_get_previous_fp(); 3959 StubRoutines::x86::_get_previous_sp_entry = generate_get_previous_sp(); 3960 3961 StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); 3962 3963 // Build this early so it's available for the interpreter. 3964 StubRoutines::_throw_StackOverflowError_entry = 3965 generate_throw_exception("StackOverflowError throw_exception", 3966 CAST_FROM_FN_PTR(address, 3967 SharedRuntime:: 3968 throw_StackOverflowError)); 3969 if (UseCRC32Intrinsics) { 3970 // set table address before stub generation which use it 3971 StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; 3972 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); 3973 } 3974 } 3975 3976 void generate_all() { 3977 // Generates all stubs and initializes the entry points 3978 3979 // These entry points require SharedInfo::stack0 to be set up in 3980 // non-core builds and need to be relocatable, so they each 3981 // fabricate a RuntimeStub internally. 3982 StubRoutines::_throw_AbstractMethodError_entry = 3983 generate_throw_exception("AbstractMethodError throw_exception", 3984 CAST_FROM_FN_PTR(address, 3985 SharedRuntime:: 3986 throw_AbstractMethodError)); 3987 3988 StubRoutines::_throw_IncompatibleClassChangeError_entry = 3989 generate_throw_exception("IncompatibleClassChangeError throw_exception", 3990 CAST_FROM_FN_PTR(address, 3991 SharedRuntime:: 3992 throw_IncompatibleClassChangeError)); 3993 3994 StubRoutines::_throw_NullPointerException_at_call_entry = 3995 generate_throw_exception("NullPointerException at call throw_exception", 3996 CAST_FROM_FN_PTR(address, 3997 SharedRuntime:: 3998 throw_NullPointerException_at_call)); 3999 4000 // entry points that are platform specific 4001 StubRoutines::x86::_f2i_fixup = generate_f2i_fixup(); 4002 StubRoutines::x86::_f2l_fixup = generate_f2l_fixup(); 4003 StubRoutines::x86::_d2i_fixup = generate_d2i_fixup(); 4004 StubRoutines::x86::_d2l_fixup = generate_d2l_fixup(); 4005 4006 StubRoutines::x86::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF); 4007 StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000); 4008 StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF); 4009 StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000); 4010 4011 // support for verify_oop (must happen after universe_init) 4012 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); 4013 4014 // arraycopy stubs used by compilers 4015 generate_arraycopy_stubs(); 4016 4017 generate_math_stubs(); 4018 4019 // don't bother generating these AES intrinsic stubs unless global flag is set 4020 if (UseAESIntrinsics) { 4021 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others 4022 4023 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); 4024 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); 4025 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); 4026 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); 4027 } 4028 4029 // Safefetch stubs. 4030 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 4031 &StubRoutines::_safefetch32_fault_pc, 4032 &StubRoutines::_safefetch32_continuation_pc); 4033 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 4034 &StubRoutines::_safefetchN_fault_pc, 4035 &StubRoutines::_safefetchN_continuation_pc); 4036 #ifdef COMPILER2 4037 if (UseMultiplyToLenIntrinsic) { 4038 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 4039 } 4040 #endif 4041 } 4042 4043 public: 4044 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 4045 if (all) { 4046 generate_all(); 4047 } else { 4048 generate_initial(); 4049 } 4050 } 4051 }; // end class declaration 4052 4053 void StubGenerator_generate(CodeBuffer* code, bool all) { 4054 StubGenerator g(code, all); 4055 }