1 /* 2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "assembler_sparc.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "nativeInst_sparc.hpp" 30 #include "oops/instanceOop.hpp" 31 #include "oops/methodOop.hpp" 32 #include "oops/objArrayKlass.hpp" 33 #include "oops/oop.inline.hpp" 34 #include "prims/methodHandles.hpp" 35 #include "runtime/frame.inline.hpp" 36 #include "runtime/handles.inline.hpp" 37 #include "runtime/sharedRuntime.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/stubRoutines.hpp" 40 #include "utilities/top.hpp" 41 #ifdef TARGET_OS_FAMILY_linux 42 # include "thread_linux.inline.hpp" 43 #endif 44 #ifdef TARGET_OS_FAMILY_solaris 45 # include "thread_solaris.inline.hpp" 46 #endif 47 #ifdef TARGET_OS_FAMILY_windows 48 # include "thread_windows.inline.hpp" 49 #endif 50 #ifdef COMPILER2 51 #include "opto/runtime.hpp" 52 #endif 53 54 // Declaration and definition of StubGenerator (no .hpp file). 55 // For a more detailed description of the stub routine structure 56 // see the comment in stubRoutines.hpp. 57 58 #define __ _masm-> 59 60 #ifdef PRODUCT 61 #define BLOCK_COMMENT(str) /* nothing */ 62 #else 63 #define BLOCK_COMMENT(str) __ block_comment(str) 64 #endif 65 66 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 67 68 // Note: The register L7 is used as L7_thread_cache, and may not be used 69 // any other way within this module. 70 71 72 static const Register& Lstub_temp = L2; 73 74 // ------------------------------------------------------------------------------------------------------------------------- 75 // Stub Code definitions 76 77 static address handle_unsafe_access() { 78 JavaThread* thread = JavaThread::current(); 79 address pc = thread->saved_exception_pc(); 80 address npc = thread->saved_exception_npc(); 81 // pc is the instruction which we must emulate 82 // doing a no-op is fine: return garbage from the load 83 84 // request an async exception 85 thread->set_pending_unsafe_access_error(); 86 87 // return address of next instruction to execute 88 return npc; 89 } 90 91 class StubGenerator: public StubCodeGenerator { 92 private: 93 94 #ifdef PRODUCT 95 #define inc_counter_np(a,b,c) (0) 96 #else 97 #define inc_counter_np(counter, t1, t2) \ 98 BLOCK_COMMENT("inc_counter " #counter); \ 99 __ inc_counter(&counter, t1, t2); 100 #endif 101 102 //---------------------------------------------------------------------------------------------------- 103 // Call stubs are used to call Java from C 104 105 address generate_call_stub(address& return_pc) { 106 StubCodeMark mark(this, "StubRoutines", "call_stub"); 107 address start = __ pc(); 108 109 // Incoming arguments: 110 // 111 // o0 : call wrapper address 112 // o1 : result (address) 113 // o2 : result type 114 // o3 : method 115 // o4 : (interpreter) entry point 116 // o5 : parameters (address) 117 // [sp + 0x5c]: parameter size (in words) 118 // [sp + 0x60]: thread 119 // 120 // +---------------+ <--- sp + 0 121 // | | 122 // . reg save area . 123 // | | 124 // +---------------+ <--- sp + 0x40 125 // | | 126 // . extra 7 slots . 127 // | | 128 // +---------------+ <--- sp + 0x5c 129 // | param. size | 130 // +---------------+ <--- sp + 0x60 131 // | thread | 132 // +---------------+ 133 // | | 134 135 // note: if the link argument position changes, adjust 136 // the code in frame::entry_frame_call_wrapper() 137 138 const Argument link = Argument(0, false); // used only for GC 139 const Argument result = Argument(1, false); 140 const Argument result_type = Argument(2, false); 141 const Argument method = Argument(3, false); 142 const Argument entry_point = Argument(4, false); 143 const Argument parameters = Argument(5, false); 144 const Argument parameter_size = Argument(6, false); 145 const Argument thread = Argument(7, false); 146 147 // setup thread register 148 __ ld_ptr(thread.as_address(), G2_thread); 149 __ reinit_heapbase(); 150 151 #ifdef ASSERT 152 // make sure we have no pending exceptions 153 { const Register t = G3_scratch; 154 Label L; 155 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); 156 __ br_null(t, false, Assembler::pt, L); 157 __ delayed()->nop(); 158 __ stop("StubRoutines::call_stub: entered with pending exception"); 159 __ bind(L); 160 } 161 #endif 162 163 // create activation frame & allocate space for parameters 164 { const Register t = G3_scratch; 165 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words) 166 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words) 167 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words) 168 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes 169 __ neg(t); // negate so it can be used with save 170 __ save(SP, t, SP); // setup new frame 171 } 172 173 // +---------------+ <--- sp + 0 174 // | | 175 // . reg save area . 176 // | | 177 // +---------------+ <--- sp + 0x40 178 // | | 179 // . extra 7 slots . 180 // | | 181 // +---------------+ <--- sp + 0x5c 182 // | empty slot | (only if parameter size is even) 183 // +---------------+ 184 // | | 185 // . parameters . 186 // | | 187 // +---------------+ <--- fp + 0 188 // | | 189 // . reg save area . 190 // | | 191 // +---------------+ <--- fp + 0x40 192 // | | 193 // . extra 7 slots . 194 // | | 195 // +---------------+ <--- fp + 0x5c 196 // | param. size | 197 // +---------------+ <--- fp + 0x60 198 // | thread | 199 // +---------------+ 200 // | | 201 202 // pass parameters if any 203 BLOCK_COMMENT("pass parameters if any"); 204 { const Register src = parameters.as_in().as_register(); 205 const Register dst = Lentry_args; 206 const Register tmp = G3_scratch; 207 const Register cnt = G4_scratch; 208 209 // test if any parameters & setup of Lentry_args 210 Label exit; 211 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter 212 __ add( FP, STACK_BIAS, dst ); 213 __ tst(cnt); 214 __ br(Assembler::zero, false, Assembler::pn, exit); 215 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args 216 217 // copy parameters if any 218 Label loop; 219 __ BIND(loop); 220 // Store parameter value 221 __ ld_ptr(src, 0, tmp); 222 __ add(src, BytesPerWord, src); 223 __ st_ptr(tmp, dst, 0); 224 __ deccc(cnt); 225 __ br(Assembler::greater, false, Assembler::pt, loop); 226 __ delayed()->sub(dst, Interpreter::stackElementSize, dst); 227 228 // done 229 __ BIND(exit); 230 } 231 232 // setup parameters, method & call Java function 233 #ifdef ASSERT 234 // layout_activation_impl checks it's notion of saved SP against 235 // this register, so if this changes update it as well. 236 const Register saved_SP = Lscratch; 237 __ mov(SP, saved_SP); // keep track of SP before call 238 #endif 239 240 // setup parameters 241 const Register t = G3_scratch; 242 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words) 243 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes 244 __ sub(FP, t, Gargs); // setup parameter pointer 245 #ifdef _LP64 246 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias 247 #endif 248 __ mov(SP, O5_savedSP); 249 250 251 // do the call 252 // 253 // the following register must be setup: 254 // 255 // G2_thread 256 // G5_method 257 // Gargs 258 BLOCK_COMMENT("call Java function"); 259 __ jmpl(entry_point.as_in().as_register(), G0, O7); 260 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method 261 262 BLOCK_COMMENT("call_stub_return_address:"); 263 return_pc = __ pc(); 264 265 // The callee, if it wasn't interpreted, can return with SP changed so 266 // we can no longer assert of change of SP. 267 268 // store result depending on type 269 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE 270 // is treated as T_INT) 271 { const Register addr = result .as_in().as_register(); 272 const Register type = result_type.as_in().as_register(); 273 Label is_long, is_float, is_double, is_object, exit; 274 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object); 275 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float); 276 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double); 277 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long); 278 __ delayed()->nop(); 279 280 // store int result 281 __ st(O0, addr, G0); 282 283 __ BIND(exit); 284 __ ret(); 285 __ delayed()->restore(); 286 287 __ BIND(is_object); 288 __ ba(false, exit); 289 __ delayed()->st_ptr(O0, addr, G0); 290 291 __ BIND(is_float); 292 __ ba(false, exit); 293 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); 294 295 __ BIND(is_double); 296 __ ba(false, exit); 297 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); 298 299 __ BIND(is_long); 300 #ifdef _LP64 301 __ ba(false, exit); 302 __ delayed()->st_long(O0, addr, G0); // store entire long 303 #else 304 #if defined(COMPILER2) 305 // All return values are where we want them, except for Longs. C2 returns 306 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. 307 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit 308 // build we simply always use G1. 309 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to 310 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node 311 // first which would move g1 -> O0/O1 and destroy the exception we were throwing. 312 313 __ ba(false, exit); 314 __ delayed()->stx(G1, addr, G0); // store entire long 315 #else 316 __ st(O1, addr, BytesPerInt); 317 __ ba(false, exit); 318 __ delayed()->st(O0, addr, G0); 319 #endif /* COMPILER2 */ 320 #endif /* _LP64 */ 321 } 322 return start; 323 } 324 325 326 //---------------------------------------------------------------------------------------------------- 327 // Return point for a Java call if there's an exception thrown in Java code. 328 // The exception is caught and transformed into a pending exception stored in 329 // JavaThread that can be tested from within the VM. 330 // 331 // Oexception: exception oop 332 333 address generate_catch_exception() { 334 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 335 336 address start = __ pc(); 337 // verify that thread corresponds 338 __ verify_thread(); 339 340 const Register& temp_reg = Gtemp; 341 Address pending_exception_addr (G2_thread, Thread::pending_exception_offset()); 342 Address exception_file_offset_addr(G2_thread, Thread::exception_file_offset ()); 343 Address exception_line_offset_addr(G2_thread, Thread::exception_line_offset ()); 344 345 // set pending exception 346 __ verify_oop(Oexception); 347 __ st_ptr(Oexception, pending_exception_addr); 348 __ set((intptr_t)__FILE__, temp_reg); 349 __ st_ptr(temp_reg, exception_file_offset_addr); 350 __ set((intptr_t)__LINE__, temp_reg); 351 __ st(temp_reg, exception_line_offset_addr); 352 353 // complete return to VM 354 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); 355 356 AddressLiteral stub_ret(StubRoutines::_call_stub_return_address); 357 __ jump_to(stub_ret, temp_reg); 358 __ delayed()->nop(); 359 360 return start; 361 } 362 363 364 //---------------------------------------------------------------------------------------------------- 365 // Continuation point for runtime calls returning with a pending exception 366 // The pending exception check happened in the runtime or native call stub 367 // The pending exception in Thread is converted into a Java-level exception 368 // 369 // Contract with Java-level exception handler: O0 = exception 370 // O1 = throwing pc 371 372 address generate_forward_exception() { 373 StubCodeMark mark(this, "StubRoutines", "forward_exception"); 374 address start = __ pc(); 375 376 // Upon entry, O7 has the return address returning into Java 377 // (interpreted or compiled) code; i.e. the return address 378 // becomes the throwing pc. 379 380 const Register& handler_reg = Gtemp; 381 382 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 383 384 #ifdef ASSERT 385 // make sure that this code is only executed if there is a pending exception 386 { Label L; 387 __ ld_ptr(exception_addr, Gtemp); 388 __ br_notnull(Gtemp, false, Assembler::pt, L); 389 __ delayed()->nop(); 390 __ stop("StubRoutines::forward exception: no pending exception (1)"); 391 __ bind(L); 392 } 393 #endif 394 395 // compute exception handler into handler_reg 396 __ get_thread(); 397 __ ld_ptr(exception_addr, Oexception); 398 __ verify_oop(Oexception); 399 __ save_frame(0); // compensates for compiler weakness 400 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC 401 BLOCK_COMMENT("call exception_handler_for_return_address"); 402 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch); 403 __ mov(O0, handler_reg); 404 __ restore(); // compensates for compiler weakness 405 406 __ ld_ptr(exception_addr, Oexception); 407 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC 408 409 #ifdef ASSERT 410 // make sure exception is set 411 { Label L; 412 __ br_notnull(Oexception, false, Assembler::pt, L); 413 __ delayed()->nop(); 414 __ stop("StubRoutines::forward exception: no pending exception (2)"); 415 __ bind(L); 416 } 417 #endif 418 // jump to exception handler 419 __ jmp(handler_reg, 0); 420 // clear pending exception 421 __ delayed()->st_ptr(G0, exception_addr); 422 423 return start; 424 } 425 426 427 //------------------------------------------------------------------------------------------------------------------------ 428 // Continuation point for throwing of implicit exceptions that are not handled in 429 // the current activation. Fabricates an exception oop and initiates normal 430 // exception dispatching in this frame. Only callee-saved registers are preserved 431 // (through the normal register window / RegisterMap handling). 432 // If the compiler needs all registers to be preserved between the fault 433 // point and the exception handler then it must assume responsibility for that in 434 // AbstractCompiler::continuation_for_implicit_null_exception or 435 // continuation_for_implicit_division_by_zero_exception. All other implicit 436 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are 437 // either at call sites or otherwise assume that stack unwinding will be initiated, 438 // so caller saved registers were assumed volatile in the compiler. 439 440 // Note that we generate only this stub into a RuntimeStub, because it needs to be 441 // properly traversed and ignored during GC, so we change the meaning of the "__" 442 // macro within this method. 443 #undef __ 444 #define __ masm-> 445 446 address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc) { 447 #ifdef ASSERT 448 int insts_size = VerifyThread ? 1 * K : 600; 449 #else 450 int insts_size = VerifyThread ? 1 * K : 256; 451 #endif /* ASSERT */ 452 int locs_size = 32; 453 454 CodeBuffer code(name, insts_size, locs_size); 455 MacroAssembler* masm = new MacroAssembler(&code); 456 457 __ verify_thread(); 458 459 // This is an inlined and slightly modified version of call_VM 460 // which has the ability to fetch the return PC out of thread-local storage 461 __ assert_not_delayed(); 462 463 // Note that we always push a frame because on the SPARC 464 // architecture, for all of our implicit exception kinds at call 465 // sites, the implicit exception is taken before the callee frame 466 // is pushed. 467 __ save_frame(0); 468 469 int frame_complete = __ offset(); 470 471 if (restore_saved_exception_pc) { 472 __ ld_ptr(G2_thread, JavaThread::saved_exception_pc_offset(), I7); 473 __ sub(I7, frame::pc_return_offset, I7); 474 } 475 476 // Note that we always have a runtime stub frame on the top of stack by this point 477 Register last_java_sp = SP; 478 // 64-bit last_java_sp is biased! 479 __ set_last_Java_frame(last_java_sp, G0); 480 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early 481 __ save_thread(noreg); 482 // do the call 483 BLOCK_COMMENT("call runtime_entry"); 484 __ call(runtime_entry, relocInfo::runtime_call_type); 485 if (!VerifyThread) 486 __ delayed()->mov(G2_thread, O0); // pass thread as first argument 487 else 488 __ delayed()->nop(); // (thread already passed) 489 __ restore_thread(noreg); 490 __ reset_last_Java_frame(); 491 492 // check for pending exceptions. use Gtemp as scratch register. 493 #ifdef ASSERT 494 Label L; 495 496 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 497 Register scratch_reg = Gtemp; 498 __ ld_ptr(exception_addr, scratch_reg); 499 __ br_notnull(scratch_reg, false, Assembler::pt, L); 500 __ delayed()->nop(); 501 __ should_not_reach_here(); 502 __ bind(L); 503 #endif // ASSERT 504 BLOCK_COMMENT("call forward_exception_entry"); 505 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 506 // we use O7 linkage so that forward_exception_entry has the issuing PC 507 __ delayed()->restore(); 508 509 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false); 510 return stub->entry_point(); 511 } 512 513 #undef __ 514 #define __ _masm-> 515 516 517 // Generate a routine that sets all the registers so we 518 // can tell if the stop routine prints them correctly. 519 address generate_test_stop() { 520 StubCodeMark mark(this, "StubRoutines", "test_stop"); 521 address start = __ pc(); 522 523 int i; 524 525 __ save_frame(0); 526 527 static jfloat zero = 0.0, one = 1.0; 528 529 // put addr in L0, then load through L0 to F0 530 __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0); 531 __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1 532 533 // use add to put 2..18 in F2..F18 534 for ( i = 2; i <= 18; ++i ) { 535 __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i)); 536 } 537 538 // Now put double 2 in F16, double 18 in F18 539 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 ); 540 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 ); 541 542 // use add to put 20..32 in F20..F32 543 for (i = 20; i < 32; i += 2) { 544 __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i)); 545 } 546 547 // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's 548 for ( i = 0; i < 8; ++i ) { 549 if (i < 6) { 550 __ set( i, as_iRegister(i)); 551 __ set(16 + i, as_oRegister(i)); 552 __ set(24 + i, as_gRegister(i)); 553 } 554 __ set( 8 + i, as_lRegister(i)); 555 } 556 557 __ stop("testing stop"); 558 559 560 __ ret(); 561 __ delayed()->restore(); 562 563 return start; 564 } 565 566 567 address generate_stop_subroutine() { 568 StubCodeMark mark(this, "StubRoutines", "stop_subroutine"); 569 address start = __ pc(); 570 571 __ stop_subroutine(); 572 573 return start; 574 } 575 576 address generate_flush_callers_register_windows() { 577 StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows"); 578 address start = __ pc(); 579 580 __ flush_windows(); 581 __ retl(false); 582 __ delayed()->add( FP, STACK_BIAS, O0 ); 583 // The returned value must be a stack pointer whose register save area 584 // is flushed, and will stay flushed while the caller executes. 585 586 return start; 587 } 588 589 // Helper functions for v8 atomic operations. 590 // 591 void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) { 592 if (mark_oop_reg == noreg) { 593 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(); 594 __ set((intptr_t)lock_ptr, lock_ptr_reg); 595 } else { 596 assert(scratch_reg != noreg, "just checking"); 597 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache; 598 __ set((intptr_t)lock_ptr, lock_ptr_reg); 599 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg); 600 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg); 601 } 602 } 603 604 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { 605 606 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg); 607 __ set(StubRoutines::Sparc::locked, lock_reg); 608 // Initialize yield counter 609 __ mov(G0,yield_reg); 610 611 __ BIND(retry); 612 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount); 613 __ br(Assembler::less, false, Assembler::pt, dontyield); 614 __ delayed()->nop(); 615 616 // This code can only be called from inside the VM, this 617 // stub is only invoked from Atomic::add(). We do not 618 // want to use call_VM, because _last_java_sp and such 619 // must already be set. 620 // 621 // Save the regs and make space for a C call 622 __ save(SP, -96, SP); 623 __ save_all_globals_into_locals(); 624 BLOCK_COMMENT("call os::naked_sleep"); 625 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep)); 626 __ delayed()->nop(); 627 __ restore_globals_from_locals(); 628 __ restore(); 629 // reset the counter 630 __ mov(G0,yield_reg); 631 632 __ BIND(dontyield); 633 634 // try to get lock 635 __ swap(lock_ptr_reg, 0, lock_reg); 636 637 // did we get the lock? 638 __ cmp(lock_reg, StubRoutines::Sparc::unlocked); 639 __ br(Assembler::notEqual, true, Assembler::pn, retry); 640 __ delayed()->add(yield_reg,1,yield_reg); 641 642 // yes, got lock. do the operation here. 643 } 644 645 void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { 646 __ st(lock_reg, lock_ptr_reg, 0); // unlock 647 } 648 649 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest). 650 // 651 // Arguments : 652 // 653 // exchange_value: O0 654 // dest: O1 655 // 656 // Results: 657 // 658 // O0: the value previously stored in dest 659 // 660 address generate_atomic_xchg() { 661 StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); 662 address start = __ pc(); 663 664 if (UseCASForSwap) { 665 // Use CAS instead of swap, just in case the MP hardware 666 // prefers to work with just one kind of synch. instruction. 667 Label retry; 668 __ BIND(retry); 669 __ mov(O0, O3); // scratch copy of exchange value 670 __ ld(O1, 0, O2); // observe the previous value 671 // try to replace O2 with O3 672 __ cas_under_lock(O1, O2, O3, 673 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); 674 __ cmp(O2, O3); 675 __ br(Assembler::notEqual, false, Assembler::pn, retry); 676 __ delayed()->nop(); 677 678 __ retl(false); 679 __ delayed()->mov(O2, O0); // report previous value to caller 680 681 } else { 682 if (VM_Version::v9_instructions_work()) { 683 __ retl(false); 684 __ delayed()->swap(O1, 0, O0); 685 } else { 686 const Register& lock_reg = O2; 687 const Register& lock_ptr_reg = O3; 688 const Register& yield_reg = O4; 689 690 Label retry; 691 Label dontyield; 692 693 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 694 // got the lock, do the swap 695 __ swap(O1, 0, O0); 696 697 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 698 __ retl(false); 699 __ delayed()->nop(); 700 } 701 } 702 703 return start; 704 } 705 706 707 // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value) 708 // 709 // Arguments : 710 // 711 // exchange_value: O0 712 // dest: O1 713 // compare_value: O2 714 // 715 // Results: 716 // 717 // O0: the value previously stored in dest 718 // 719 // Overwrites (v8): O3,O4,O5 720 // 721 address generate_atomic_cmpxchg() { 722 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg"); 723 address start = __ pc(); 724 725 // cmpxchg(dest, compare_value, exchange_value) 726 __ cas_under_lock(O1, O2, O0, 727 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); 728 __ retl(false); 729 __ delayed()->nop(); 730 731 return start; 732 } 733 734 // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value) 735 // 736 // Arguments : 737 // 738 // exchange_value: O1:O0 739 // dest: O2 740 // compare_value: O4:O3 741 // 742 // Results: 743 // 744 // O1:O0: the value previously stored in dest 745 // 746 // This only works on V9, on V8 we don't generate any 747 // code and just return NULL. 748 // 749 // Overwrites: G1,G2,G3 750 // 751 address generate_atomic_cmpxchg_long() { 752 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long"); 753 address start = __ pc(); 754 755 if (!VM_Version::supports_cx8()) 756 return NULL;; 757 __ sllx(O0, 32, O0); 758 __ srl(O1, 0, O1); 759 __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value 760 __ sllx(O3, 32, O3); 761 __ srl(O4, 0, O4); 762 __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value 763 __ casx(O2, O3, O0); 764 __ srl(O0, 0, O1); // unpacked return value in O1:O0 765 __ retl(false); 766 __ delayed()->srlx(O0, 32, O0); 767 768 return start; 769 } 770 771 772 // Support for jint Atomic::add(jint add_value, volatile jint* dest). 773 // 774 // Arguments : 775 // 776 // add_value: O0 (e.g., +1 or -1) 777 // dest: O1 778 // 779 // Results: 780 // 781 // O0: the new value stored in dest 782 // 783 // Overwrites (v9): O3 784 // Overwrites (v8): O3,O4,O5 785 // 786 address generate_atomic_add() { 787 StubCodeMark mark(this, "StubRoutines", "atomic_add"); 788 address start = __ pc(); 789 __ BIND(_atomic_add_stub); 790 791 if (VM_Version::v9_instructions_work()) { 792 Label(retry); 793 __ BIND(retry); 794 795 __ lduw(O1, 0, O2); 796 __ add(O0, O2, O3); 797 __ cas(O1, O2, O3); 798 __ cmp( O2, O3); 799 __ br(Assembler::notEqual, false, Assembler::pn, retry); 800 __ delayed()->nop(); 801 __ retl(false); 802 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 803 } else { 804 const Register& lock_reg = O2; 805 const Register& lock_ptr_reg = O3; 806 const Register& value_reg = O4; 807 const Register& yield_reg = O5; 808 809 Label(retry); 810 Label(dontyield); 811 812 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 813 // got lock, do the increment 814 __ ld(O1, 0, value_reg); 815 __ add(O0, value_reg, value_reg); 816 __ st(value_reg, O1, 0); 817 818 // %%% only for RMO and PSO 819 __ membar(Assembler::StoreStore); 820 821 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 822 823 __ retl(false); 824 __ delayed()->mov(value_reg, O0); 825 } 826 827 return start; 828 } 829 Label _atomic_add_stub; // called from other stubs 830 831 832 //------------------------------------------------------------------------------------------------------------------------ 833 // The following routine generates a subroutine to throw an asynchronous 834 // UnknownError when an unsafe access gets a fault that could not be 835 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) 836 // 837 // Arguments : 838 // 839 // trapping PC: O7 840 // 841 // Results: 842 // posts an asynchronous exception, skips the trapping instruction 843 // 844 845 address generate_handler_for_unsafe_access() { 846 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); 847 address start = __ pc(); 848 849 const int preserve_register_words = (64 * 2); 850 Address preserve_addr(FP, (-preserve_register_words * wordSize) + STACK_BIAS); 851 852 Register Lthread = L7_thread_cache; 853 int i; 854 855 __ save_frame(0); 856 __ mov(G1, L1); 857 __ mov(G2, L2); 858 __ mov(G3, L3); 859 __ mov(G4, L4); 860 __ mov(G5, L5); 861 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { 862 __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize); 863 } 864 865 address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access); 866 BLOCK_COMMENT("call handle_unsafe_access"); 867 __ call(entry_point, relocInfo::runtime_call_type); 868 __ delayed()->nop(); 869 870 __ mov(L1, G1); 871 __ mov(L2, G2); 872 __ mov(L3, G3); 873 __ mov(L4, G4); 874 __ mov(L5, G5); 875 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { 876 __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize); 877 } 878 879 __ verify_thread(); 880 881 __ jmp(O0, 0); 882 __ delayed()->restore(); 883 884 return start; 885 } 886 887 888 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super ); 889 // Arguments : 890 // 891 // ret : O0, returned 892 // icc/xcc: set as O0 (depending on wordSize) 893 // sub : O1, argument, not changed 894 // super: O2, argument, not changed 895 // raddr: O7, blown by call 896 address generate_partial_subtype_check() { 897 __ align(CodeEntryAlignment); 898 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 899 address start = __ pc(); 900 Label miss; 901 902 #if defined(COMPILER2) && !defined(_LP64) 903 // Do not use a 'save' because it blows the 64-bit O registers. 904 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) 905 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); 906 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize); 907 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize); 908 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize); 909 Register Rret = O0; 910 Register Rsub = O1; 911 Register Rsuper = O2; 912 #else 913 __ save_frame(0); 914 Register Rret = I0; 915 Register Rsub = I1; 916 Register Rsuper = I2; 917 #endif 918 919 Register L0_ary_len = L0; 920 Register L1_ary_ptr = L1; 921 Register L2_super = L2; 922 Register L3_index = L3; 923 924 __ check_klass_subtype_slow_path(Rsub, Rsuper, 925 L0, L1, L2, L3, 926 NULL, &miss); 927 928 // Match falls through here. 929 __ addcc(G0,0,Rret); // set Z flags, Z result 930 931 #if defined(COMPILER2) && !defined(_LP64) 932 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 933 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 934 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 935 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 936 __ retl(); // Result in Rret is zero; flags set to Z 937 __ delayed()->add(SP,4*wordSize,SP); 938 #else 939 __ ret(); // Result in Rret is zero; flags set to Z 940 __ delayed()->restore(); 941 #endif 942 943 __ BIND(miss); 944 __ addcc(G0,1,Rret); // set NZ flags, NZ result 945 946 #if defined(COMPILER2) && !defined(_LP64) 947 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 948 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 949 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 950 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 951 __ retl(); // Result in Rret is != 0; flags set to NZ 952 __ delayed()->add(SP,4*wordSize,SP); 953 #else 954 __ ret(); // Result in Rret is != 0; flags set to NZ 955 __ delayed()->restore(); 956 #endif 957 958 return start; 959 } 960 961 962 // Called from MacroAssembler::verify_oop 963 // 964 address generate_verify_oop_subroutine() { 965 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); 966 967 address start = __ pc(); 968 969 __ verify_oop_subroutine(); 970 971 return start; 972 } 973 974 static address disjoint_byte_copy_entry; 975 static address disjoint_short_copy_entry; 976 static address disjoint_int_copy_entry; 977 static address disjoint_long_copy_entry; 978 static address disjoint_oop_copy_entry; 979 980 static address byte_copy_entry; 981 static address short_copy_entry; 982 static address int_copy_entry; 983 static address long_copy_entry; 984 static address oop_copy_entry; 985 986 static address checkcast_copy_entry; 987 988 // 989 // Verify that a register contains clean 32-bits positive value 990 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax). 991 // 992 // Input: 993 // Rint - 32-bits value 994 // Rtmp - scratch 995 // 996 void assert_clean_int(Register Rint, Register Rtmp) { 997 #if defined(ASSERT) && defined(_LP64) 998 __ signx(Rint, Rtmp); 999 __ cmp(Rint, Rtmp); 1000 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc); 1001 #endif 1002 } 1003 1004 // 1005 // Generate overlap test for array copy stubs 1006 // 1007 // Input: 1008 // O0 - array1 1009 // O1 - array2 1010 // O2 - element count 1011 // 1012 // Kills temps: O3, O4 1013 // 1014 void array_overlap_test(address no_overlap_target, int log2_elem_size) { 1015 assert(no_overlap_target != NULL, "must be generated"); 1016 array_overlap_test(no_overlap_target, NULL, log2_elem_size); 1017 } 1018 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) { 1019 array_overlap_test(NULL, &L_no_overlap, log2_elem_size); 1020 } 1021 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) { 1022 const Register from = O0; 1023 const Register to = O1; 1024 const Register count = O2; 1025 const Register to_from = O3; // to - from 1026 const Register byte_count = O4; // count << log2_elem_size 1027 1028 __ subcc(to, from, to_from); 1029 __ sll_ptr(count, log2_elem_size, byte_count); 1030 if (NOLp == NULL) 1031 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target); 1032 else 1033 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp)); 1034 __ delayed()->cmp(to_from, byte_count); 1035 if (NOLp == NULL) 1036 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, no_overlap_target); 1037 else 1038 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, (*NOLp)); 1039 __ delayed()->nop(); 1040 } 1041 1042 // 1043 // Generate pre-write barrier for array. 1044 // 1045 // Input: 1046 // addr - register containing starting address 1047 // count - register containing element count 1048 // tmp - scratch register 1049 // 1050 // The input registers are overwritten. 1051 // 1052 void gen_write_ref_array_pre_barrier(Register addr, Register count) { 1053 BarrierSet* bs = Universe::heap()->barrier_set(); 1054 if (bs->has_write_ref_pre_barrier()) { 1055 assert(bs->has_write_ref_array_pre_opt(), 1056 "Else unsupported barrier set."); 1057 1058 __ save_frame(0); 1059 // Save the necessary global regs... will be used after. 1060 if (addr->is_global()) { 1061 __ mov(addr, L0); 1062 } 1063 if (count->is_global()) { 1064 __ mov(count, L1); 1065 } 1066 __ mov(addr->after_save(), O0); 1067 // Get the count into O1 1068 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); 1069 __ delayed()->mov(count->after_save(), O1); 1070 if (addr->is_global()) { 1071 __ mov(L0, addr); 1072 } 1073 if (count->is_global()) { 1074 __ mov(L1, count); 1075 } 1076 __ restore(); 1077 } 1078 } 1079 // 1080 // Generate post-write barrier for array. 1081 // 1082 // Input: 1083 // addr - register containing starting address 1084 // count - register containing element count 1085 // tmp - scratch register 1086 // 1087 // The input registers are overwritten. 1088 // 1089 void gen_write_ref_array_post_barrier(Register addr, Register count, 1090 Register tmp) { 1091 BarrierSet* bs = Universe::heap()->barrier_set(); 1092 1093 switch (bs->kind()) { 1094 case BarrierSet::G1SATBCT: 1095 case BarrierSet::G1SATBCTLogging: 1096 { 1097 // Get some new fresh output registers. 1098 __ save_frame(0); 1099 __ mov(addr->after_save(), O0); 1100 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); 1101 __ delayed()->mov(count->after_save(), O1); 1102 __ restore(); 1103 } 1104 break; 1105 case BarrierSet::CardTableModRef: 1106 case BarrierSet::CardTableExtension: 1107 { 1108 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 1109 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1110 assert_different_registers(addr, count, tmp); 1111 1112 Label L_loop; 1113 1114 __ sll_ptr(count, LogBytesPerHeapOop, count); 1115 __ sub(count, BytesPerHeapOop, count); 1116 __ add(count, addr, count); 1117 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) 1118 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr); 1119 __ srl_ptr(count, CardTableModRefBS::card_shift, count); 1120 __ sub(count, addr, count); 1121 AddressLiteral rs(ct->byte_map_base); 1122 __ set(rs, tmp); 1123 __ BIND(L_loop); 1124 __ stb(G0, tmp, addr); 1125 __ subcc(count, 1, count); 1126 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1127 __ delayed()->add(addr, 1, addr); 1128 } 1129 break; 1130 case BarrierSet::ModRef: 1131 break; 1132 default: 1133 ShouldNotReachHere(); 1134 } 1135 } 1136 1137 1138 // Copy big chunks forward with shift 1139 // 1140 // Inputs: 1141 // from - source arrays 1142 // to - destination array aligned to 8-bytes 1143 // count - elements count to copy >= the count equivalent to 16 bytes 1144 // count_dec - elements count's decrement equivalent to 16 bytes 1145 // L_copy_bytes - copy exit label 1146 // 1147 void copy_16_bytes_forward_with_shift(Register from, Register to, 1148 Register count, int count_dec, Label& L_copy_bytes) { 1149 Label L_loop, L_aligned_copy, L_copy_last_bytes; 1150 1151 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy 1152 __ andcc(from, 7, G1); // misaligned bytes 1153 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1154 __ delayed()->nop(); 1155 1156 const Register left_shift = G1; // left shift bit counter 1157 const Register right_shift = G5; // right shift bit counter 1158 1159 __ sll(G1, LogBitsPerByte, left_shift); 1160 __ mov(64, right_shift); 1161 __ sub(right_shift, left_shift, right_shift); 1162 1163 // 1164 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1165 // to form 2 aligned 8-bytes chunks to store. 1166 // 1167 __ deccc(count, count_dec); // Pre-decrement 'count' 1168 __ andn(from, 7, from); // Align address 1169 __ ldx(from, 0, O3); 1170 __ inc(from, 8); 1171 __ align(OptoLoopAlignment); 1172 __ BIND(L_loop); 1173 __ ldx(from, 0, O4); 1174 __ deccc(count, count_dec); // Can we do next iteration after this one? 1175 __ ldx(from, 8, G4); 1176 __ inc(to, 16); 1177 __ inc(from, 16); 1178 __ sllx(O3, left_shift, O3); 1179 __ srlx(O4, right_shift, G3); 1180 __ bset(G3, O3); 1181 __ stx(O3, to, -16); 1182 __ sllx(O4, left_shift, O4); 1183 __ srlx(G4, right_shift, G3); 1184 __ bset(G3, O4); 1185 __ stx(O4, to, -8); 1186 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1187 __ delayed()->mov(G4, O3); 1188 1189 __ inccc(count, count_dec>>1 ); // + 8 bytes 1190 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); 1191 __ delayed()->inc(count, count_dec>>1); // restore 'count' 1192 1193 // copy 8 bytes, part of them already loaded in O3 1194 __ ldx(from, 0, O4); 1195 __ inc(to, 8); 1196 __ inc(from, 8); 1197 __ sllx(O3, left_shift, O3); 1198 __ srlx(O4, right_shift, G3); 1199 __ bset(O3, G3); 1200 __ stx(G3, to, -8); 1201 1202 __ BIND(L_copy_last_bytes); 1203 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes 1204 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); 1205 __ delayed()->sub(from, right_shift, from); // restore address 1206 1207 __ BIND(L_aligned_copy); 1208 } 1209 1210 // Copy big chunks backward with shift 1211 // 1212 // Inputs: 1213 // end_from - source arrays end address 1214 // end_to - destination array end address aligned to 8-bytes 1215 // count - elements count to copy >= the count equivalent to 16 bytes 1216 // count_dec - elements count's decrement equivalent to 16 bytes 1217 // L_aligned_copy - aligned copy exit label 1218 // L_copy_bytes - copy exit label 1219 // 1220 void copy_16_bytes_backward_with_shift(Register end_from, Register end_to, 1221 Register count, int count_dec, 1222 Label& L_aligned_copy, Label& L_copy_bytes) { 1223 Label L_loop, L_copy_last_bytes; 1224 1225 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy 1226 __ andcc(end_from, 7, G1); // misaligned bytes 1227 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1228 __ delayed()->deccc(count, count_dec); // Pre-decrement 'count' 1229 1230 const Register left_shift = G1; // left shift bit counter 1231 const Register right_shift = G5; // right shift bit counter 1232 1233 __ sll(G1, LogBitsPerByte, left_shift); 1234 __ mov(64, right_shift); 1235 __ sub(right_shift, left_shift, right_shift); 1236 1237 // 1238 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1239 // to form 2 aligned 8-bytes chunks to store. 1240 // 1241 __ andn(end_from, 7, end_from); // Align address 1242 __ ldx(end_from, 0, O3); 1243 __ align(OptoLoopAlignment); 1244 __ BIND(L_loop); 1245 __ ldx(end_from, -8, O4); 1246 __ deccc(count, count_dec); // Can we do next iteration after this one? 1247 __ ldx(end_from, -16, G4); 1248 __ dec(end_to, 16); 1249 __ dec(end_from, 16); 1250 __ srlx(O3, right_shift, O3); 1251 __ sllx(O4, left_shift, G3); 1252 __ bset(G3, O3); 1253 __ stx(O3, end_to, 8); 1254 __ srlx(O4, right_shift, O4); 1255 __ sllx(G4, left_shift, G3); 1256 __ bset(G3, O4); 1257 __ stx(O4, end_to, 0); 1258 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1259 __ delayed()->mov(G4, O3); 1260 1261 __ inccc(count, count_dec>>1 ); // + 8 bytes 1262 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); 1263 __ delayed()->inc(count, count_dec>>1); // restore 'count' 1264 1265 // copy 8 bytes, part of them already loaded in O3 1266 __ ldx(end_from, -8, O4); 1267 __ dec(end_to, 8); 1268 __ dec(end_from, 8); 1269 __ srlx(O3, right_shift, O3); 1270 __ sllx(O4, left_shift, G3); 1271 __ bset(O3, G3); 1272 __ stx(G3, end_to, 0); 1273 1274 __ BIND(L_copy_last_bytes); 1275 __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes 1276 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); 1277 __ delayed()->add(end_from, left_shift, end_from); // restore address 1278 } 1279 1280 // 1281 // Generate stub for disjoint byte copy. If "aligned" is true, the 1282 // "from" and "to" addresses are assumed to be heapword aligned. 1283 // 1284 // Arguments for generated stub: 1285 // from: O0 1286 // to: O1 1287 // count: O2 treated as signed 1288 // 1289 address generate_disjoint_byte_copy(bool aligned, const char * name) { 1290 __ align(CodeEntryAlignment); 1291 StubCodeMark mark(this, "StubRoutines", name); 1292 address start = __ pc(); 1293 1294 Label L_skip_alignment, L_align; 1295 Label L_copy_byte, L_copy_byte_loop, L_exit; 1296 1297 const Register from = O0; // source array address 1298 const Register to = O1; // destination array address 1299 const Register count = O2; // elements count 1300 const Register offset = O5; // offset from start of arrays 1301 // O3, O4, G3, G4 are used as temp registers 1302 1303 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1304 1305 if (!aligned) disjoint_byte_copy_entry = __ pc(); 1306 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1307 if (!aligned) BLOCK_COMMENT("Entry:"); 1308 1309 // for short arrays, just do single element copy 1310 __ cmp(count, 23); // 16 + 7 1311 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1312 __ delayed()->mov(G0, offset); 1313 1314 if (aligned) { 1315 // 'aligned' == true when it is known statically during compilation 1316 // of this arraycopy call site that both 'from' and 'to' addresses 1317 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1318 // 1319 // Aligned arrays have 4 bytes alignment in 32-bits VM 1320 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM 1321 // 1322 #ifndef _LP64 1323 // copy a 4-bytes word if necessary to align 'to' to 8 bytes 1324 __ andcc(to, 7, G0); 1325 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment); 1326 __ delayed()->ld(from, 0, O3); 1327 __ inc(from, 4); 1328 __ inc(to, 4); 1329 __ dec(count, 4); 1330 __ st(O3, to, -4); 1331 __ BIND(L_skip_alignment); 1332 #endif 1333 } else { 1334 // copy bytes to align 'to' on 8 byte boundary 1335 __ andcc(to, 7, G1); // misaligned bytes 1336 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1337 __ delayed()->neg(G1); 1338 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment 1339 __ sub(count, G1, count); 1340 __ BIND(L_align); 1341 __ ldub(from, 0, O3); 1342 __ deccc(G1); 1343 __ inc(from); 1344 __ stb(O3, to, 0); 1345 __ br(Assembler::notZero, false, Assembler::pt, L_align); 1346 __ delayed()->inc(to); 1347 __ BIND(L_skip_alignment); 1348 } 1349 #ifdef _LP64 1350 if (!aligned) 1351 #endif 1352 { 1353 // Copy with shift 16 bytes per iteration if arrays do not have 1354 // the same alignment mod 8, otherwise fall through to the next 1355 // code for aligned copy. 1356 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. 1357 // Also jump over aligned copy after the copy with shift completed. 1358 1359 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte); 1360 } 1361 1362 // Both array are 8 bytes aligned, copy 16 bytes at a time 1363 __ and3(count, 7, G4); // Save count 1364 __ srl(count, 3, count); 1365 generate_disjoint_long_copy_core(aligned); 1366 __ mov(G4, count); // Restore count 1367 1368 // copy tailing bytes 1369 __ BIND(L_copy_byte); 1370 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1371 __ delayed()->nop(); 1372 __ align(OptoLoopAlignment); 1373 __ BIND(L_copy_byte_loop); 1374 __ ldub(from, offset, O3); 1375 __ deccc(count); 1376 __ stb(O3, to, offset); 1377 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); 1378 __ delayed()->inc(offset); 1379 1380 __ BIND(L_exit); 1381 // O3, O4 are used as temp registers 1382 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); 1383 __ retl(); 1384 __ delayed()->mov(G0, O0); // return 0 1385 return start; 1386 } 1387 1388 // 1389 // Generate stub for conjoint byte copy. If "aligned" is true, the 1390 // "from" and "to" addresses are assumed to be heapword aligned. 1391 // 1392 // Arguments for generated stub: 1393 // from: O0 1394 // to: O1 1395 // count: O2 treated as signed 1396 // 1397 address generate_conjoint_byte_copy(bool aligned, const char * name) { 1398 // Do reverse copy. 1399 1400 __ align(CodeEntryAlignment); 1401 StubCodeMark mark(this, "StubRoutines", name); 1402 address start = __ pc(); 1403 address nooverlap_target = aligned ? 1404 StubRoutines::arrayof_jbyte_disjoint_arraycopy() : 1405 disjoint_byte_copy_entry; 1406 1407 Label L_skip_alignment, L_align, L_aligned_copy; 1408 Label L_copy_byte, L_copy_byte_loop, L_exit; 1409 1410 const Register from = O0; // source array address 1411 const Register to = O1; // destination array address 1412 const Register count = O2; // elements count 1413 const Register end_from = from; // source array end address 1414 const Register end_to = to; // destination array end address 1415 1416 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1417 1418 if (!aligned) byte_copy_entry = __ pc(); 1419 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1420 if (!aligned) BLOCK_COMMENT("Entry:"); 1421 1422 array_overlap_test(nooverlap_target, 0); 1423 1424 __ add(to, count, end_to); // offset after last copied element 1425 1426 // for short arrays, just do single element copy 1427 __ cmp(count, 23); // 16 + 7 1428 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1429 __ delayed()->add(from, count, end_from); 1430 1431 { 1432 // Align end of arrays since they could be not aligned even 1433 // when arrays itself are aligned. 1434 1435 // copy bytes to align 'end_to' on 8 byte boundary 1436 __ andcc(end_to, 7, G1); // misaligned bytes 1437 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1438 __ delayed()->nop(); 1439 __ sub(count, G1, count); 1440 __ BIND(L_align); 1441 __ dec(end_from); 1442 __ dec(end_to); 1443 __ ldub(end_from, 0, O3); 1444 __ deccc(G1); 1445 __ brx(Assembler::notZero, false, Assembler::pt, L_align); 1446 __ delayed()->stb(O3, end_to, 0); 1447 __ BIND(L_skip_alignment); 1448 } 1449 #ifdef _LP64 1450 if (aligned) { 1451 // Both arrays are aligned to 8-bytes in 64-bits VM. 1452 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1453 // in unaligned case. 1454 __ dec(count, 16); 1455 } else 1456 #endif 1457 { 1458 // Copy with shift 16 bytes per iteration if arrays do not have 1459 // the same alignment mod 8, otherwise jump to the next 1460 // code for aligned copy (and substracting 16 from 'count' before jump). 1461 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1462 // Also jump over aligned copy after the copy with shift completed. 1463 1464 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, 1465 L_aligned_copy, L_copy_byte); 1466 } 1467 // copy 4 elements (16 bytes) at a time 1468 __ align(OptoLoopAlignment); 1469 __ BIND(L_aligned_copy); 1470 __ dec(end_from, 16); 1471 __ ldx(end_from, 8, O3); 1472 __ ldx(end_from, 0, O4); 1473 __ dec(end_to, 16); 1474 __ deccc(count, 16); 1475 __ stx(O3, end_to, 8); 1476 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1477 __ delayed()->stx(O4, end_to, 0); 1478 __ inc(count, 16); 1479 1480 // copy 1 element (2 bytes) at a time 1481 __ BIND(L_copy_byte); 1482 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1483 __ delayed()->nop(); 1484 __ align(OptoLoopAlignment); 1485 __ BIND(L_copy_byte_loop); 1486 __ dec(end_from); 1487 __ dec(end_to); 1488 __ ldub(end_from, 0, O4); 1489 __ deccc(count); 1490 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); 1491 __ delayed()->stb(O4, end_to, 0); 1492 1493 __ BIND(L_exit); 1494 // O3, O4 are used as temp registers 1495 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); 1496 __ retl(); 1497 __ delayed()->mov(G0, O0); // return 0 1498 return start; 1499 } 1500 1501 // 1502 // Generate stub for disjoint short copy. If "aligned" is true, the 1503 // "from" and "to" addresses are assumed to be heapword aligned. 1504 // 1505 // Arguments for generated stub: 1506 // from: O0 1507 // to: O1 1508 // count: O2 treated as signed 1509 // 1510 address generate_disjoint_short_copy(bool aligned, const char * name) { 1511 __ align(CodeEntryAlignment); 1512 StubCodeMark mark(this, "StubRoutines", name); 1513 address start = __ pc(); 1514 1515 Label L_skip_alignment, L_skip_alignment2; 1516 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; 1517 1518 const Register from = O0; // source array address 1519 const Register to = O1; // destination array address 1520 const Register count = O2; // elements count 1521 const Register offset = O5; // offset from start of arrays 1522 // O3, O4, G3, G4 are used as temp registers 1523 1524 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1525 1526 if (!aligned) disjoint_short_copy_entry = __ pc(); 1527 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1528 if (!aligned) BLOCK_COMMENT("Entry:"); 1529 1530 // for short arrays, just do single element copy 1531 __ cmp(count, 11); // 8 + 3 (22 bytes) 1532 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1533 __ delayed()->mov(G0, offset); 1534 1535 if (aligned) { 1536 // 'aligned' == true when it is known statically during compilation 1537 // of this arraycopy call site that both 'from' and 'to' addresses 1538 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1539 // 1540 // Aligned arrays have 4 bytes alignment in 32-bits VM 1541 // and 8 bytes - in 64-bits VM. 1542 // 1543 #ifndef _LP64 1544 // copy a 2-elements word if necessary to align 'to' to 8 bytes 1545 __ andcc(to, 7, G0); 1546 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1547 __ delayed()->ld(from, 0, O3); 1548 __ inc(from, 4); 1549 __ inc(to, 4); 1550 __ dec(count, 2); 1551 __ st(O3, to, -4); 1552 __ BIND(L_skip_alignment); 1553 #endif 1554 } else { 1555 // copy 1 element if necessary to align 'to' on an 4 bytes 1556 __ andcc(to, 3, G0); 1557 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1558 __ delayed()->lduh(from, 0, O3); 1559 __ inc(from, 2); 1560 __ inc(to, 2); 1561 __ dec(count); 1562 __ sth(O3, to, -2); 1563 __ BIND(L_skip_alignment); 1564 1565 // copy 2 elements to align 'to' on an 8 byte boundary 1566 __ andcc(to, 7, G0); 1567 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1568 __ delayed()->lduh(from, 0, O3); 1569 __ dec(count, 2); 1570 __ lduh(from, 2, O4); 1571 __ inc(from, 4); 1572 __ inc(to, 4); 1573 __ sth(O3, to, -4); 1574 __ sth(O4, to, -2); 1575 __ BIND(L_skip_alignment2); 1576 } 1577 #ifdef _LP64 1578 if (!aligned) 1579 #endif 1580 { 1581 // Copy with shift 16 bytes per iteration if arrays do not have 1582 // the same alignment mod 8, otherwise fall through to the next 1583 // code for aligned copy. 1584 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1585 // Also jump over aligned copy after the copy with shift completed. 1586 1587 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes); 1588 } 1589 1590 // Both array are 8 bytes aligned, copy 16 bytes at a time 1591 __ and3(count, 3, G4); // Save 1592 __ srl(count, 2, count); 1593 generate_disjoint_long_copy_core(aligned); 1594 __ mov(G4, count); // restore 1595 1596 // copy 1 element at a time 1597 __ BIND(L_copy_2_bytes); 1598 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1599 __ delayed()->nop(); 1600 __ align(OptoLoopAlignment); 1601 __ BIND(L_copy_2_bytes_loop); 1602 __ lduh(from, offset, O3); 1603 __ deccc(count); 1604 __ sth(O3, to, offset); 1605 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); 1606 __ delayed()->inc(offset, 2); 1607 1608 __ BIND(L_exit); 1609 // O3, O4 are used as temp registers 1610 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); 1611 __ retl(); 1612 __ delayed()->mov(G0, O0); // return 0 1613 return start; 1614 } 1615 1616 // 1617 // Generate stub for disjoint short fill. If "aligned" is true, the 1618 // "to" address is assumed to be heapword aligned. 1619 // 1620 // Arguments for generated stub: 1621 // to: O0 1622 // value: O1 1623 // count: O2 treated as signed 1624 // 1625 address generate_fill(BasicType t, bool aligned, const char* name) { 1626 __ align(CodeEntryAlignment); 1627 StubCodeMark mark(this, "StubRoutines", name); 1628 address start = __ pc(); 1629 1630 const Register to = O0; // source array address 1631 const Register value = O1; // fill value 1632 const Register count = O2; // elements count 1633 // O3 is used as a temp register 1634 1635 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1636 1637 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 1638 Label L_fill_2_bytes, L_fill_elements, L_fill_32_bytes; 1639 1640 int shift = -1; 1641 switch (t) { 1642 case T_BYTE: 1643 shift = 2; 1644 break; 1645 case T_SHORT: 1646 shift = 1; 1647 break; 1648 case T_INT: 1649 shift = 0; 1650 break; 1651 default: ShouldNotReachHere(); 1652 } 1653 1654 BLOCK_COMMENT("Entry:"); 1655 1656 if (t == T_BYTE) { 1657 // Zero extend value 1658 __ and3(value, 0xff, value); 1659 __ sllx(value, 8, O3); 1660 __ or3(value, O3, value); 1661 } 1662 if (t == T_SHORT) { 1663 // Zero extend value 1664 __ sllx(value, 48, value); 1665 __ srlx(value, 48, value); 1666 } 1667 if (t == T_BYTE || t == T_SHORT) { 1668 __ sllx(value, 16, O3); 1669 __ or3(value, O3, value); 1670 } 1671 1672 __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 1673 __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_elements); // use unsigned cmp 1674 __ delayed()->andcc(count, 1, G0); 1675 1676 if (!aligned && (t == T_BYTE || t == T_SHORT)) { 1677 // align source address at 4 bytes address boundary 1678 if (t == T_BYTE) { 1679 // One byte misalignment happens only for byte arrays 1680 __ andcc(to, 1, G0); 1681 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); 1682 __ delayed()->nop(); 1683 __ stb(value, to, 0); 1684 __ inc(to, 1); 1685 __ dec(count, 1); 1686 __ BIND(L_skip_align1); 1687 } 1688 // Two bytes misalignment happens only for byte and short (char) arrays 1689 __ andcc(to, 2, G0); 1690 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); 1691 __ delayed()->nop(); 1692 __ sth(value, to, 0); 1693 __ inc(to, 2); 1694 __ dec(count, 1 << (shift - 1)); 1695 __ BIND(L_skip_align2); 1696 } 1697 #ifdef _LP64 1698 if (!aligned) { 1699 #endif 1700 // align to 8 bytes, we know we are 4 byte aligned to start 1701 __ andcc(to, 7, G0); 1702 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes); 1703 __ delayed()->nop(); 1704 __ stw(value, to, 0); 1705 __ inc(to, 4); 1706 __ dec(count, 1 << shift); 1707 __ BIND(L_fill_32_bytes); 1708 #ifdef _LP64 1709 } 1710 #endif 1711 1712 if (t == T_INT) { 1713 // Zero extend value 1714 __ srl(value, 0, value); 1715 } 1716 if (t == T_BYTE || t == T_SHORT || t == T_INT) { 1717 __ sllx(value, 32, O3); 1718 __ or3(value, O3, value); 1719 } 1720 1721 Label L_check_fill_8_bytes; 1722 // Fill 32-byte chunks 1723 __ subcc(count, 8 << shift, count); 1724 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); 1725 __ delayed()->nop(); 1726 1727 Label L_fill_32_bytes_loop, L_fill_4_bytes; 1728 __ align(16); 1729 __ BIND(L_fill_32_bytes_loop); 1730 1731 __ stx(value, to, 0); 1732 __ stx(value, to, 8); 1733 __ stx(value, to, 16); 1734 __ stx(value, to, 24); 1735 1736 __ subcc(count, 8 << shift, count); 1737 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop); 1738 __ delayed()->add(to, 32, to); 1739 1740 __ BIND(L_check_fill_8_bytes); 1741 __ addcc(count, 8 << shift, count); 1742 __ brx(Assembler::zero, false, Assembler::pn, L_exit); 1743 __ delayed()->subcc(count, 1 << (shift + 1), count); 1744 __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes); 1745 __ delayed()->andcc(count, 1<<shift, G0); 1746 1747 // 1748 // length is too short, just fill 8 bytes at a time 1749 // 1750 Label L_fill_8_bytes_loop; 1751 __ BIND(L_fill_8_bytes_loop); 1752 __ stx(value, to, 0); 1753 __ subcc(count, 1 << (shift + 1), count); 1754 __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop); 1755 __ delayed()->add(to, 8, to); 1756 1757 // fill trailing 4 bytes 1758 __ andcc(count, 1<<shift, G0); // in delay slot of branches 1759 if (t == T_INT) { 1760 __ BIND(L_fill_elements); 1761 } 1762 __ BIND(L_fill_4_bytes); 1763 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes); 1764 if (t == T_BYTE || t == T_SHORT) { 1765 __ delayed()->andcc(count, 1<<(shift-1), G0); 1766 } else { 1767 __ delayed()->nop(); 1768 } 1769 __ stw(value, to, 0); 1770 if (t == T_BYTE || t == T_SHORT) { 1771 __ inc(to, 4); 1772 // fill trailing 2 bytes 1773 __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches 1774 __ BIND(L_fill_2_bytes); 1775 __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte); 1776 __ delayed()->andcc(count, 1, count); 1777 __ sth(value, to, 0); 1778 if (t == T_BYTE) { 1779 __ inc(to, 2); 1780 // fill trailing byte 1781 __ andcc(count, 1, count); // in delay slot of branches 1782 __ BIND(L_fill_byte); 1783 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1784 __ delayed()->nop(); 1785 __ stb(value, to, 0); 1786 } else { 1787 __ BIND(L_fill_byte); 1788 } 1789 } else { 1790 __ BIND(L_fill_2_bytes); 1791 } 1792 __ BIND(L_exit); 1793 __ retl(); 1794 __ delayed()->nop(); 1795 1796 // Handle copies less than 8 bytes. Int is handled elsewhere. 1797 if (t == T_BYTE) { 1798 __ BIND(L_fill_elements); 1799 Label L_fill_2, L_fill_4; 1800 // in delay slot __ andcc(count, 1, G0); 1801 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); 1802 __ delayed()->andcc(count, 2, G0); 1803 __ stb(value, to, 0); 1804 __ inc(to, 1); 1805 __ BIND(L_fill_2); 1806 __ brx(Assembler::zero, false, Assembler::pt, L_fill_4); 1807 __ delayed()->andcc(count, 4, G0); 1808 __ stb(value, to, 0); 1809 __ stb(value, to, 1); 1810 __ inc(to, 2); 1811 __ BIND(L_fill_4); 1812 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1813 __ delayed()->nop(); 1814 __ stb(value, to, 0); 1815 __ stb(value, to, 1); 1816 __ stb(value, to, 2); 1817 __ retl(); 1818 __ delayed()->stb(value, to, 3); 1819 } 1820 1821 if (t == T_SHORT) { 1822 Label L_fill_2; 1823 __ BIND(L_fill_elements); 1824 // in delay slot __ andcc(count, 1, G0); 1825 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); 1826 __ delayed()->andcc(count, 2, G0); 1827 __ sth(value, to, 0); 1828 __ inc(to, 2); 1829 __ BIND(L_fill_2); 1830 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1831 __ delayed()->nop(); 1832 __ sth(value, to, 0); 1833 __ retl(); 1834 __ delayed()->sth(value, to, 2); 1835 } 1836 return start; 1837 } 1838 1839 // 1840 // Generate stub for conjoint short copy. If "aligned" is true, the 1841 // "from" and "to" addresses are assumed to be heapword aligned. 1842 // 1843 // Arguments for generated stub: 1844 // from: O0 1845 // to: O1 1846 // count: O2 treated as signed 1847 // 1848 address generate_conjoint_short_copy(bool aligned, const char * name) { 1849 // Do reverse copy. 1850 1851 __ align(CodeEntryAlignment); 1852 StubCodeMark mark(this, "StubRoutines", name); 1853 address start = __ pc(); 1854 address nooverlap_target = aligned ? 1855 StubRoutines::arrayof_jshort_disjoint_arraycopy() : 1856 disjoint_short_copy_entry; 1857 1858 Label L_skip_alignment, L_skip_alignment2, L_aligned_copy; 1859 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; 1860 1861 const Register from = O0; // source array address 1862 const Register to = O1; // destination array address 1863 const Register count = O2; // elements count 1864 const Register end_from = from; // source array end address 1865 const Register end_to = to; // destination array end address 1866 1867 const Register byte_count = O3; // bytes count to copy 1868 1869 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1870 1871 if (!aligned) short_copy_entry = __ pc(); 1872 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1873 if (!aligned) BLOCK_COMMENT("Entry:"); 1874 1875 array_overlap_test(nooverlap_target, 1); 1876 1877 __ sllx(count, LogBytesPerShort, byte_count); 1878 __ add(to, byte_count, end_to); // offset after last copied element 1879 1880 // for short arrays, just do single element copy 1881 __ cmp(count, 11); // 8 + 3 (22 bytes) 1882 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1883 __ delayed()->add(from, byte_count, end_from); 1884 1885 { 1886 // Align end of arrays since they could be not aligned even 1887 // when arrays itself are aligned. 1888 1889 // copy 1 element if necessary to align 'end_to' on an 4 bytes 1890 __ andcc(end_to, 3, G0); 1891 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1892 __ delayed()->lduh(end_from, -2, O3); 1893 __ dec(end_from, 2); 1894 __ dec(end_to, 2); 1895 __ dec(count); 1896 __ sth(O3, end_to, 0); 1897 __ BIND(L_skip_alignment); 1898 1899 // copy 2 elements to align 'end_to' on an 8 byte boundary 1900 __ andcc(end_to, 7, G0); 1901 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1902 __ delayed()->lduh(end_from, -2, O3); 1903 __ dec(count, 2); 1904 __ lduh(end_from, -4, O4); 1905 __ dec(end_from, 4); 1906 __ dec(end_to, 4); 1907 __ sth(O3, end_to, 2); 1908 __ sth(O4, end_to, 0); 1909 __ BIND(L_skip_alignment2); 1910 } 1911 #ifdef _LP64 1912 if (aligned) { 1913 // Both arrays are aligned to 8-bytes in 64-bits VM. 1914 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1915 // in unaligned case. 1916 __ dec(count, 8); 1917 } else 1918 #endif 1919 { 1920 // Copy with shift 16 bytes per iteration if arrays do not have 1921 // the same alignment mod 8, otherwise jump to the next 1922 // code for aligned copy (and substracting 8 from 'count' before jump). 1923 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1924 // Also jump over aligned copy after the copy with shift completed. 1925 1926 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, 1927 L_aligned_copy, L_copy_2_bytes); 1928 } 1929 // copy 4 elements (16 bytes) at a time 1930 __ align(OptoLoopAlignment); 1931 __ BIND(L_aligned_copy); 1932 __ dec(end_from, 16); 1933 __ ldx(end_from, 8, O3); 1934 __ ldx(end_from, 0, O4); 1935 __ dec(end_to, 16); 1936 __ deccc(count, 8); 1937 __ stx(O3, end_to, 8); 1938 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1939 __ delayed()->stx(O4, end_to, 0); 1940 __ inc(count, 8); 1941 1942 // copy 1 element (2 bytes) at a time 1943 __ BIND(L_copy_2_bytes); 1944 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1945 __ delayed()->nop(); 1946 __ BIND(L_copy_2_bytes_loop); 1947 __ dec(end_from, 2); 1948 __ dec(end_to, 2); 1949 __ lduh(end_from, 0, O4); 1950 __ deccc(count); 1951 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); 1952 __ delayed()->sth(O4, end_to, 0); 1953 1954 __ BIND(L_exit); 1955 // O3, O4 are used as temp registers 1956 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); 1957 __ retl(); 1958 __ delayed()->mov(G0, O0); // return 0 1959 return start; 1960 } 1961 1962 // 1963 // Generate core code for disjoint int copy (and oop copy on 32-bit). 1964 // If "aligned" is true, the "from" and "to" addresses are assumed 1965 // to be heapword aligned. 1966 // 1967 // Arguments: 1968 // from: O0 1969 // to: O1 1970 // count: O2 treated as signed 1971 // 1972 void generate_disjoint_int_copy_core(bool aligned) { 1973 1974 Label L_skip_alignment, L_aligned_copy; 1975 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 1976 1977 const Register from = O0; // source array address 1978 const Register to = O1; // destination array address 1979 const Register count = O2; // elements count 1980 const Register offset = O5; // offset from start of arrays 1981 // O3, O4, G3, G4 are used as temp registers 1982 1983 // 'aligned' == true when it is known statically during compilation 1984 // of this arraycopy call site that both 'from' and 'to' addresses 1985 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1986 // 1987 // Aligned arrays have 4 bytes alignment in 32-bits VM 1988 // and 8 bytes - in 64-bits VM. 1989 // 1990 #ifdef _LP64 1991 if (!aligned) 1992 #endif 1993 { 1994 // The next check could be put under 'ifndef' since the code in 1995 // generate_disjoint_long_copy_core() has own checks and set 'offset'. 1996 1997 // for short arrays, just do single element copy 1998 __ cmp(count, 5); // 4 + 1 (20 bytes) 1999 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 2000 __ delayed()->mov(G0, offset); 2001 2002 // copy 1 element to align 'to' on an 8 byte boundary 2003 __ andcc(to, 7, G0); 2004 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 2005 __ delayed()->ld(from, 0, O3); 2006 __ inc(from, 4); 2007 __ inc(to, 4); 2008 __ dec(count); 2009 __ st(O3, to, -4); 2010 __ BIND(L_skip_alignment); 2011 2012 // if arrays have same alignment mod 8, do 4 elements copy 2013 __ andcc(from, 7, G0); 2014 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 2015 __ delayed()->ld(from, 0, O3); 2016 2017 // 2018 // Load 2 aligned 8-bytes chunks and use one from previous iteration 2019 // to form 2 aligned 8-bytes chunks to store. 2020 // 2021 // copy_16_bytes_forward_with_shift() is not used here since this 2022 // code is more optimal. 2023 2024 // copy with shift 4 elements (16 bytes) at a time 2025 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 2026 2027 __ align(OptoLoopAlignment); 2028 __ BIND(L_copy_16_bytes); 2029 __ ldx(from, 4, O4); 2030 __ deccc(count, 4); // Can we do next iteration after this one? 2031 __ ldx(from, 12, G4); 2032 __ inc(to, 16); 2033 __ inc(from, 16); 2034 __ sllx(O3, 32, O3); 2035 __ srlx(O4, 32, G3); 2036 __ bset(G3, O3); 2037 __ stx(O3, to, -16); 2038 __ sllx(O4, 32, O4); 2039 __ srlx(G4, 32, G3); 2040 __ bset(G3, O4); 2041 __ stx(O4, to, -8); 2042 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 2043 __ delayed()->mov(G4, O3); 2044 2045 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 2046 __ delayed()->inc(count, 4); // restore 'count' 2047 2048 __ BIND(L_aligned_copy); 2049 } 2050 // copy 4 elements (16 bytes) at a time 2051 __ and3(count, 1, G4); // Save 2052 __ srl(count, 1, count); 2053 generate_disjoint_long_copy_core(aligned); 2054 __ mov(G4, count); // Restore 2055 2056 // copy 1 element at a time 2057 __ BIND(L_copy_4_bytes); 2058 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 2059 __ delayed()->nop(); 2060 __ BIND(L_copy_4_bytes_loop); 2061 __ ld(from, offset, O3); 2062 __ deccc(count); 2063 __ st(O3, to, offset); 2064 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop); 2065 __ delayed()->inc(offset, 4); 2066 __ BIND(L_exit); 2067 } 2068 2069 // 2070 // Generate stub for disjoint int copy. If "aligned" is true, the 2071 // "from" and "to" addresses are assumed to be heapword aligned. 2072 // 2073 // Arguments for generated stub: 2074 // from: O0 2075 // to: O1 2076 // count: O2 treated as signed 2077 // 2078 address generate_disjoint_int_copy(bool aligned, const char * name) { 2079 __ align(CodeEntryAlignment); 2080 StubCodeMark mark(this, "StubRoutines", name); 2081 address start = __ pc(); 2082 2083 const Register count = O2; 2084 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2085 2086 if (!aligned) disjoint_int_copy_entry = __ pc(); 2087 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2088 if (!aligned) BLOCK_COMMENT("Entry:"); 2089 2090 generate_disjoint_int_copy_core(aligned); 2091 2092 // O3, O4 are used as temp registers 2093 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 2094 __ retl(); 2095 __ delayed()->mov(G0, O0); // return 0 2096 return start; 2097 } 2098 2099 // 2100 // Generate core code for conjoint int copy (and oop copy on 32-bit). 2101 // If "aligned" is true, the "from" and "to" addresses are assumed 2102 // to be heapword aligned. 2103 // 2104 // Arguments: 2105 // from: O0 2106 // to: O1 2107 // count: O2 treated as signed 2108 // 2109 void generate_conjoint_int_copy_core(bool aligned) { 2110 // Do reverse copy. 2111 2112 Label L_skip_alignment, L_aligned_copy; 2113 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 2114 2115 const Register from = O0; // source array address 2116 const Register to = O1; // destination array address 2117 const Register count = O2; // elements count 2118 const Register end_from = from; // source array end address 2119 const Register end_to = to; // destination array end address 2120 // O3, O4, O5, G3 are used as temp registers 2121 2122 const Register byte_count = O3; // bytes count to copy 2123 2124 __ sllx(count, LogBytesPerInt, byte_count); 2125 __ add(to, byte_count, end_to); // offset after last copied element 2126 2127 __ cmp(count, 5); // for short arrays, just do single element copy 2128 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 2129 __ delayed()->add(from, byte_count, end_from); 2130 2131 // copy 1 element to align 'to' on an 8 byte boundary 2132 __ andcc(end_to, 7, G0); 2133 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 2134 __ delayed()->nop(); 2135 __ dec(count); 2136 __ dec(end_from, 4); 2137 __ dec(end_to, 4); 2138 __ ld(end_from, 0, O4); 2139 __ st(O4, end_to, 0); 2140 __ BIND(L_skip_alignment); 2141 2142 // Check if 'end_from' and 'end_to' has the same alignment. 2143 __ andcc(end_from, 7, G0); 2144 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 2145 __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4 2146 2147 // copy with shift 4 elements (16 bytes) at a time 2148 // 2149 // Load 2 aligned 8-bytes chunks and use one from previous iteration 2150 // to form 2 aligned 8-bytes chunks to store. 2151 // 2152 __ ldx(end_from, -4, O3); 2153 __ align(OptoLoopAlignment); 2154 __ BIND(L_copy_16_bytes); 2155 __ ldx(end_from, -12, O4); 2156 __ deccc(count, 4); 2157 __ ldx(end_from, -20, O5); 2158 __ dec(end_to, 16); 2159 __ dec(end_from, 16); 2160 __ srlx(O3, 32, O3); 2161 __ sllx(O4, 32, G3); 2162 __ bset(G3, O3); 2163 __ stx(O3, end_to, 8); 2164 __ srlx(O4, 32, O4); 2165 __ sllx(O5, 32, G3); 2166 __ bset(O4, G3); 2167 __ stx(G3, end_to, 0); 2168 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 2169 __ delayed()->mov(O5, O3); 2170 2171 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 2172 __ delayed()->inc(count, 4); 2173 2174 // copy 4 elements (16 bytes) at a time 2175 __ align(OptoLoopAlignment); 2176 __ BIND(L_aligned_copy); 2177 __ dec(end_from, 16); 2178 __ ldx(end_from, 8, O3); 2179 __ ldx(end_from, 0, O4); 2180 __ dec(end_to, 16); 2181 __ deccc(count, 4); 2182 __ stx(O3, end_to, 8); 2183 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 2184 __ delayed()->stx(O4, end_to, 0); 2185 __ inc(count, 4); 2186 2187 // copy 1 element (4 bytes) at a time 2188 __ BIND(L_copy_4_bytes); 2189 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 2190 __ delayed()->nop(); 2191 __ BIND(L_copy_4_bytes_loop); 2192 __ dec(end_from, 4); 2193 __ dec(end_to, 4); 2194 __ ld(end_from, 0, O4); 2195 __ deccc(count); 2196 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop); 2197 __ delayed()->st(O4, end_to, 0); 2198 __ BIND(L_exit); 2199 } 2200 2201 // 2202 // Generate stub for conjoint int copy. If "aligned" is true, the 2203 // "from" and "to" addresses are assumed to be heapword aligned. 2204 // 2205 // Arguments for generated stub: 2206 // from: O0 2207 // to: O1 2208 // count: O2 treated as signed 2209 // 2210 address generate_conjoint_int_copy(bool aligned, const char * name) { 2211 __ align(CodeEntryAlignment); 2212 StubCodeMark mark(this, "StubRoutines", name); 2213 address start = __ pc(); 2214 2215 address nooverlap_target = aligned ? 2216 StubRoutines::arrayof_jint_disjoint_arraycopy() : 2217 disjoint_int_copy_entry; 2218 2219 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2220 2221 if (!aligned) int_copy_entry = __ pc(); 2222 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2223 if (!aligned) BLOCK_COMMENT("Entry:"); 2224 2225 array_overlap_test(nooverlap_target, 2); 2226 2227 generate_conjoint_int_copy_core(aligned); 2228 2229 // O3, O4 are used as temp registers 2230 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 2231 __ retl(); 2232 __ delayed()->mov(G0, O0); // return 0 2233 return start; 2234 } 2235 2236 // 2237 // Generate core code for disjoint long copy (and oop copy on 64-bit). 2238 // "aligned" is ignored, because we must make the stronger 2239 // assumption that both addresses are always 64-bit aligned. 2240 // 2241 // Arguments: 2242 // from: O0 2243 // to: O1 2244 // count: O2 treated as signed 2245 // 2246 // count -= 2; 2247 // if ( count >= 0 ) { // >= 2 elements 2248 // if ( count > 6) { // >= 8 elements 2249 // count -= 6; // original count - 8 2250 // do { 2251 // copy_8_elements; 2252 // count -= 8; 2253 // } while ( count >= 0 ); 2254 // count += 6; 2255 // } 2256 // if ( count >= 0 ) { // >= 2 elements 2257 // do { 2258 // copy_2_elements; 2259 // } while ( (count=count-2) >= 0 ); 2260 // } 2261 // } 2262 // count += 2; 2263 // if ( count != 0 ) { // 1 element left 2264 // copy_1_element; 2265 // } 2266 // 2267 void generate_disjoint_long_copy_core(bool aligned) { 2268 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; 2269 const Register from = O0; // source array address 2270 const Register to = O1; // destination array address 2271 const Register count = O2; // elements count 2272 const Register offset0 = O4; // element offset 2273 const Register offset8 = O5; // next element offset 2274 2275 __ deccc(count, 2); 2276 __ mov(G0, offset0); // offset from start of arrays (0) 2277 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2278 __ delayed()->add(offset0, 8, offset8); 2279 2280 // Copy by 64 bytes chunks 2281 Label L_copy_64_bytes; 2282 const Register from64 = O3; // source address 2283 const Register to64 = G3; // destination address 2284 __ subcc(count, 6, O3); 2285 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); 2286 __ delayed()->mov(to, to64); 2287 // Now we can use O4(offset0), O5(offset8) as temps 2288 __ mov(O3, count); 2289 __ mov(from, from64); 2290 2291 __ align(OptoLoopAlignment); 2292 __ BIND(L_copy_64_bytes); 2293 for( int off = 0; off < 64; off += 16 ) { 2294 __ ldx(from64, off+0, O4); 2295 __ ldx(from64, off+8, O5); 2296 __ stx(O4, to64, off+0); 2297 __ stx(O5, to64, off+8); 2298 } 2299 __ deccc(count, 8); 2300 __ inc(from64, 64); 2301 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes); 2302 __ delayed()->inc(to64, 64); 2303 2304 // Restore O4(offset0), O5(offset8) 2305 __ sub(from64, from, offset0); 2306 __ inccc(count, 6); 2307 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2308 __ delayed()->add(offset0, 8, offset8); 2309 2310 // Copy by 16 bytes chunks 2311 __ align(OptoLoopAlignment); 2312 __ BIND(L_copy_16_bytes); 2313 __ ldx(from, offset0, O3); 2314 __ ldx(from, offset8, G3); 2315 __ deccc(count, 2); 2316 __ stx(O3, to, offset0); 2317 __ inc(offset0, 16); 2318 __ stx(G3, to, offset8); 2319 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 2320 __ delayed()->inc(offset8, 16); 2321 2322 // Copy last 8 bytes 2323 __ BIND(L_copy_8_bytes); 2324 __ inccc(count, 2); 2325 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); 2326 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs 2327 __ ldx(from, offset0, O3); 2328 __ stx(O3, to, offset0); 2329 __ BIND(L_exit); 2330 } 2331 2332 // 2333 // Generate stub for disjoint long copy. 2334 // "aligned" is ignored, because we must make the stronger 2335 // assumption that both addresses are always 64-bit aligned. 2336 // 2337 // Arguments for generated stub: 2338 // from: O0 2339 // to: O1 2340 // count: O2 treated as signed 2341 // 2342 address generate_disjoint_long_copy(bool aligned, const char * name) { 2343 __ align(CodeEntryAlignment); 2344 StubCodeMark mark(this, "StubRoutines", name); 2345 address start = __ pc(); 2346 2347 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2348 2349 if (!aligned) disjoint_long_copy_entry = __ pc(); 2350 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2351 if (!aligned) BLOCK_COMMENT("Entry:"); 2352 2353 generate_disjoint_long_copy_core(aligned); 2354 2355 // O3, O4 are used as temp registers 2356 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); 2357 __ retl(); 2358 __ delayed()->mov(G0, O0); // return 0 2359 return start; 2360 } 2361 2362 // 2363 // Generate core code for conjoint long copy (and oop copy on 64-bit). 2364 // "aligned" is ignored, because we must make the stronger 2365 // assumption that both addresses are always 64-bit aligned. 2366 // 2367 // Arguments: 2368 // from: O0 2369 // to: O1 2370 // count: O2 treated as signed 2371 // 2372 void generate_conjoint_long_copy_core(bool aligned) { 2373 // Do reverse copy. 2374 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; 2375 const Register from = O0; // source array address 2376 const Register to = O1; // destination array address 2377 const Register count = O2; // elements count 2378 const Register offset8 = O4; // element offset 2379 const Register offset0 = O5; // previous element offset 2380 2381 __ subcc(count, 1, count); 2382 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes ); 2383 __ delayed()->sllx(count, LogBytesPerLong, offset8); 2384 __ sub(offset8, 8, offset0); 2385 __ align(OptoLoopAlignment); 2386 __ BIND(L_copy_16_bytes); 2387 __ ldx(from, offset8, O2); 2388 __ ldx(from, offset0, O3); 2389 __ stx(O2, to, offset8); 2390 __ deccc(offset8, 16); // use offset8 as counter 2391 __ stx(O3, to, offset0); 2392 __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes); 2393 __ delayed()->dec(offset0, 16); 2394 2395 __ BIND(L_copy_8_bytes); 2396 __ brx(Assembler::negative, false, Assembler::pn, L_exit ); 2397 __ delayed()->nop(); 2398 __ ldx(from, 0, O3); 2399 __ stx(O3, to, 0); 2400 __ BIND(L_exit); 2401 } 2402 2403 // Generate stub for conjoint long copy. 2404 // "aligned" is ignored, because we must make the stronger 2405 // assumption that both addresses are always 64-bit aligned. 2406 // 2407 // Arguments for generated stub: 2408 // from: O0 2409 // to: O1 2410 // count: O2 treated as signed 2411 // 2412 address generate_conjoint_long_copy(bool aligned, const char * name) { 2413 __ align(CodeEntryAlignment); 2414 StubCodeMark mark(this, "StubRoutines", name); 2415 address start = __ pc(); 2416 2417 assert(!aligned, "usage"); 2418 address nooverlap_target = disjoint_long_copy_entry; 2419 2420 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2421 2422 if (!aligned) long_copy_entry = __ pc(); 2423 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2424 if (!aligned) BLOCK_COMMENT("Entry:"); 2425 2426 array_overlap_test(nooverlap_target, 3); 2427 2428 generate_conjoint_long_copy_core(aligned); 2429 2430 // O3, O4 are used as temp registers 2431 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); 2432 __ retl(); 2433 __ delayed()->mov(G0, O0); // return 0 2434 return start; 2435 } 2436 2437 // Generate stub for disjoint oop copy. If "aligned" is true, the 2438 // "from" and "to" addresses are assumed to be heapword aligned. 2439 // 2440 // Arguments for generated stub: 2441 // from: O0 2442 // to: O1 2443 // count: O2 treated as signed 2444 // 2445 address generate_disjoint_oop_copy(bool aligned, const char * name) { 2446 2447 const Register from = O0; // source array address 2448 const Register to = O1; // destination array address 2449 const Register count = O2; // elements count 2450 2451 __ align(CodeEntryAlignment); 2452 StubCodeMark mark(this, "StubRoutines", name); 2453 address start = __ pc(); 2454 2455 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2456 2457 if (!aligned) disjoint_oop_copy_entry = __ pc(); 2458 // caller can pass a 64-bit byte count here 2459 if (!aligned) BLOCK_COMMENT("Entry:"); 2460 2461 // save arguments for barrier generation 2462 __ mov(to, G1); 2463 __ mov(count, G5); 2464 gen_write_ref_array_pre_barrier(G1, G5); 2465 #ifdef _LP64 2466 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2467 if (UseCompressedOops) { 2468 generate_disjoint_int_copy_core(aligned); 2469 } else { 2470 generate_disjoint_long_copy_core(aligned); 2471 } 2472 #else 2473 generate_disjoint_int_copy_core(aligned); 2474 #endif 2475 // O0 is used as temp register 2476 gen_write_ref_array_post_barrier(G1, G5, O0); 2477 2478 // O3, O4 are used as temp registers 2479 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2480 __ retl(); 2481 __ delayed()->mov(G0, O0); // return 0 2482 return start; 2483 } 2484 2485 // Generate stub for conjoint oop copy. If "aligned" is true, the 2486 // "from" and "to" addresses are assumed to be heapword aligned. 2487 // 2488 // Arguments for generated stub: 2489 // from: O0 2490 // to: O1 2491 // count: O2 treated as signed 2492 // 2493 address generate_conjoint_oop_copy(bool aligned, const char * name) { 2494 2495 const Register from = O0; // source array address 2496 const Register to = O1; // destination array address 2497 const Register count = O2; // elements count 2498 2499 __ align(CodeEntryAlignment); 2500 StubCodeMark mark(this, "StubRoutines", name); 2501 address start = __ pc(); 2502 2503 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2504 2505 if (!aligned) oop_copy_entry = __ pc(); 2506 // caller can pass a 64-bit byte count here 2507 if (!aligned) BLOCK_COMMENT("Entry:"); 2508 2509 // save arguments for barrier generation 2510 __ mov(to, G1); 2511 __ mov(count, G5); 2512 2513 gen_write_ref_array_pre_barrier(G1, G5); 2514 2515 address nooverlap_target = aligned ? 2516 StubRoutines::arrayof_oop_disjoint_arraycopy() : 2517 disjoint_oop_copy_entry; 2518 2519 array_overlap_test(nooverlap_target, LogBytesPerHeapOop); 2520 2521 #ifdef _LP64 2522 if (UseCompressedOops) { 2523 generate_conjoint_int_copy_core(aligned); 2524 } else { 2525 generate_conjoint_long_copy_core(aligned); 2526 } 2527 #else 2528 generate_conjoint_int_copy_core(aligned); 2529 #endif 2530 2531 // O0 is used as temp register 2532 gen_write_ref_array_post_barrier(G1, G5, O0); 2533 2534 // O3, O4 are used as temp registers 2535 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2536 __ retl(); 2537 __ delayed()->mov(G0, O0); // return 0 2538 return start; 2539 } 2540 2541 2542 // Helper for generating a dynamic type check. 2543 // Smashes only the given temp registers. 2544 void generate_type_check(Register sub_klass, 2545 Register super_check_offset, 2546 Register super_klass, 2547 Register temp, 2548 Label& L_success) { 2549 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); 2550 2551 BLOCK_COMMENT("type_check:"); 2552 2553 Label L_miss, L_pop_to_miss; 2554 2555 assert_clean_int(super_check_offset, temp); 2556 2557 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, 2558 &L_success, &L_miss, NULL, 2559 super_check_offset); 2560 2561 BLOCK_COMMENT("type_check_slow_path:"); 2562 __ save_frame(0); 2563 __ check_klass_subtype_slow_path(sub_klass->after_save(), 2564 super_klass->after_save(), 2565 L0, L1, L2, L4, 2566 NULL, &L_pop_to_miss); 2567 __ ba(false, L_success); 2568 __ delayed()->restore(); 2569 2570 __ bind(L_pop_to_miss); 2571 __ restore(); 2572 2573 // Fall through on failure! 2574 __ BIND(L_miss); 2575 } 2576 2577 2578 // Generate stub for checked oop copy. 2579 // 2580 // Arguments for generated stub: 2581 // from: O0 2582 // to: O1 2583 // count: O2 treated as signed 2584 // ckoff: O3 (super_check_offset) 2585 // ckval: O4 (super_klass) 2586 // ret: O0 zero for success; (-1^K) where K is partial transfer count 2587 // 2588 address generate_checkcast_copy(const char* name) { 2589 2590 const Register O0_from = O0; // source array address 2591 const Register O1_to = O1; // destination array address 2592 const Register O2_count = O2; // elements count 2593 const Register O3_ckoff = O3; // super_check_offset 2594 const Register O4_ckval = O4; // super_klass 2595 2596 const Register O5_offset = O5; // loop var, with stride wordSize 2597 const Register G1_remain = G1; // loop var, with stride -1 2598 const Register G3_oop = G3; // actual oop copied 2599 const Register G4_klass = G4; // oop._klass 2600 const Register G5_super = G5; // oop._klass._primary_supers[ckval] 2601 2602 __ align(CodeEntryAlignment); 2603 StubCodeMark mark(this, "StubRoutines", name); 2604 address start = __ pc(); 2605 2606 gen_write_ref_array_pre_barrier(O1, O2); 2607 2608 #ifdef ASSERT 2609 // We sometimes save a frame (see generate_type_check below). 2610 // If this will cause trouble, let's fail now instead of later. 2611 __ save_frame(0); 2612 __ restore(); 2613 #endif 2614 2615 #ifdef ASSERT 2616 // caller guarantees that the arrays really are different 2617 // otherwise, we would have to make conjoint checks 2618 { Label L; 2619 __ mov(O3, G1); // spill: overlap test smashes O3 2620 __ mov(O4, G4); // spill: overlap test smashes O4 2621 array_overlap_test(L, LogBytesPerHeapOop); 2622 __ stop("checkcast_copy within a single array"); 2623 __ bind(L); 2624 __ mov(G1, O3); 2625 __ mov(G4, O4); 2626 } 2627 #endif //ASSERT 2628 2629 assert_clean_int(O2_count, G1); // Make sure 'count' is clean int. 2630 2631 checkcast_copy_entry = __ pc(); 2632 // caller can pass a 64-bit byte count here (from generic stub) 2633 BLOCK_COMMENT("Entry:"); 2634 2635 Label load_element, store_element, do_card_marks, fail, done; 2636 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it 2637 __ brx(Assembler::notZero, false, Assembler::pt, load_element); 2638 __ delayed()->mov(G0, O5_offset); // offset from start of arrays 2639 2640 // Empty array: Nothing to do. 2641 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 2642 __ retl(); 2643 __ delayed()->set(0, O0); // return 0 on (trivial) success 2644 2645 // ======== begin loop ======== 2646 // (Loop is rotated; its entry is load_element.) 2647 // Loop variables: 2648 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays 2649 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* 2650 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super 2651 __ align(OptoLoopAlignment); 2652 2653 __ BIND(store_element); 2654 __ deccc(G1_remain); // decrement the count 2655 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop 2656 __ inc(O5_offset, heapOopSize); // step to next offset 2657 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); 2658 __ delayed()->set(0, O0); // return -1 on success 2659 2660 // ======== loop entry is here ======== 2661 __ BIND(load_element); 2662 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop 2663 __ br_null(G3_oop, true, Assembler::pt, store_element); 2664 __ delayed()->nop(); 2665 2666 __ load_klass(G3_oop, G4_klass); // query the object klass 2667 2668 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, 2669 // branch to this on success: 2670 store_element); 2671 // ======== end loop ======== 2672 2673 // It was a real error; we must depend on the caller to finish the job. 2674 // Register G1 has number of *remaining* oops, O2 number of *total* oops. 2675 // Emit GC store barriers for the oops we have copied (O2 minus G1), 2676 // and report their number to the caller. 2677 __ BIND(fail); 2678 __ subcc(O2_count, G1_remain, O2_count); 2679 __ brx(Assembler::zero, false, Assembler::pt, done); 2680 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller 2681 2682 __ BIND(do_card_marks); 2683 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] 2684 2685 __ BIND(done); 2686 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 2687 __ retl(); 2688 __ delayed()->nop(); // return value in 00 2689 2690 return start; 2691 } 2692 2693 2694 // Generate 'unsafe' array copy stub 2695 // Though just as safe as the other stubs, it takes an unscaled 2696 // size_t argument instead of an element count. 2697 // 2698 // Arguments for generated stub: 2699 // from: O0 2700 // to: O1 2701 // count: O2 byte count, treated as ssize_t, can be zero 2702 // 2703 // Examines the alignment of the operands and dispatches 2704 // to a long, int, short, or byte copy loop. 2705 // 2706 address generate_unsafe_copy(const char* name) { 2707 2708 const Register O0_from = O0; // source array address 2709 const Register O1_to = O1; // destination array address 2710 const Register O2_count = O2; // elements count 2711 2712 const Register G1_bits = G1; // test copy of low bits 2713 2714 __ align(CodeEntryAlignment); 2715 StubCodeMark mark(this, "StubRoutines", name); 2716 address start = __ pc(); 2717 2718 // bump this on entry, not on exit: 2719 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3); 2720 2721 __ or3(O0_from, O1_to, G1_bits); 2722 __ or3(O2_count, G1_bits, G1_bits); 2723 2724 __ btst(BytesPerLong-1, G1_bits); 2725 __ br(Assembler::zero, true, Assembler::pt, 2726 long_copy_entry, relocInfo::runtime_call_type); 2727 // scale the count on the way out: 2728 __ delayed()->srax(O2_count, LogBytesPerLong, O2_count); 2729 2730 __ btst(BytesPerInt-1, G1_bits); 2731 __ br(Assembler::zero, true, Assembler::pt, 2732 int_copy_entry, relocInfo::runtime_call_type); 2733 // scale the count on the way out: 2734 __ delayed()->srax(O2_count, LogBytesPerInt, O2_count); 2735 2736 __ btst(BytesPerShort-1, G1_bits); 2737 __ br(Assembler::zero, true, Assembler::pt, 2738 short_copy_entry, relocInfo::runtime_call_type); 2739 // scale the count on the way out: 2740 __ delayed()->srax(O2_count, LogBytesPerShort, O2_count); 2741 2742 __ br(Assembler::always, false, Assembler::pt, 2743 byte_copy_entry, relocInfo::runtime_call_type); 2744 __ delayed()->nop(); 2745 2746 return start; 2747 } 2748 2749 2750 // Perform range checks on the proposed arraycopy. 2751 // Kills the two temps, but nothing else. 2752 // Also, clean the sign bits of src_pos and dst_pos. 2753 void arraycopy_range_checks(Register src, // source array oop (O0) 2754 Register src_pos, // source position (O1) 2755 Register dst, // destination array oo (O2) 2756 Register dst_pos, // destination position (O3) 2757 Register length, // length of copy (O4) 2758 Register temp1, Register temp2, 2759 Label& L_failed) { 2760 BLOCK_COMMENT("arraycopy_range_checks:"); 2761 2762 // if (src_pos + length > arrayOop(src)->length() ) FAIL; 2763 2764 const Register array_length = temp1; // scratch 2765 const Register end_pos = temp2; // scratch 2766 2767 // Note: This next instruction may be in the delay slot of a branch: 2768 __ add(length, src_pos, end_pos); // src_pos + length 2769 __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length); 2770 __ cmp(end_pos, array_length); 2771 __ br(Assembler::greater, false, Assembler::pn, L_failed); 2772 2773 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL; 2774 __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length 2775 __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length); 2776 __ cmp(end_pos, array_length); 2777 __ br(Assembler::greater, false, Assembler::pn, L_failed); 2778 2779 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. 2780 // Move with sign extension can be used since they are positive. 2781 __ delayed()->signx(src_pos, src_pos); 2782 __ signx(dst_pos, dst_pos); 2783 2784 BLOCK_COMMENT("arraycopy_range_checks done"); 2785 } 2786 2787 2788 // 2789 // Generate generic array copy stubs 2790 // 2791 // Input: 2792 // O0 - src oop 2793 // O1 - src_pos 2794 // O2 - dst oop 2795 // O3 - dst_pos 2796 // O4 - element count 2797 // 2798 // Output: 2799 // O0 == 0 - success 2800 // O0 == -1 - need to call System.arraycopy 2801 // 2802 address generate_generic_copy(const char *name) { 2803 2804 Label L_failed, L_objArray; 2805 2806 // Input registers 2807 const Register src = O0; // source array oop 2808 const Register src_pos = O1; // source position 2809 const Register dst = O2; // destination array oop 2810 const Register dst_pos = O3; // destination position 2811 const Register length = O4; // elements count 2812 2813 // registers used as temp 2814 const Register G3_src_klass = G3; // source array klass 2815 const Register G4_dst_klass = G4; // destination array klass 2816 const Register G5_lh = G5; // layout handler 2817 const Register O5_temp = O5; 2818 2819 __ align(CodeEntryAlignment); 2820 StubCodeMark mark(this, "StubRoutines", name); 2821 address start = __ pc(); 2822 2823 // bump this on entry, not on exit: 2824 inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3); 2825 2826 // In principle, the int arguments could be dirty. 2827 //assert_clean_int(src_pos, G1); 2828 //assert_clean_int(dst_pos, G1); 2829 //assert_clean_int(length, G1); 2830 2831 //----------------------------------------------------------------------- 2832 // Assembler stubs will be used for this call to arraycopy 2833 // if the following conditions are met: 2834 // 2835 // (1) src and dst must not be null. 2836 // (2) src_pos must not be negative. 2837 // (3) dst_pos must not be negative. 2838 // (4) length must not be negative. 2839 // (5) src klass and dst klass should be the same and not NULL. 2840 // (6) src and dst should be arrays. 2841 // (7) src_pos + length must not exceed length of src. 2842 // (8) dst_pos + length must not exceed length of dst. 2843 BLOCK_COMMENT("arraycopy initial argument checks"); 2844 2845 // if (src == NULL) return -1; 2846 __ br_null(src, false, Assembler::pn, L_failed); 2847 2848 // if (src_pos < 0) return -1; 2849 __ delayed()->tst(src_pos); 2850 __ br(Assembler::negative, false, Assembler::pn, L_failed); 2851 __ delayed()->nop(); 2852 2853 // if (dst == NULL) return -1; 2854 __ br_null(dst, false, Assembler::pn, L_failed); 2855 2856 // if (dst_pos < 0) return -1; 2857 __ delayed()->tst(dst_pos); 2858 __ br(Assembler::negative, false, Assembler::pn, L_failed); 2859 2860 // if (length < 0) return -1; 2861 __ delayed()->tst(length); 2862 __ br(Assembler::negative, false, Assembler::pn, L_failed); 2863 2864 BLOCK_COMMENT("arraycopy argument klass checks"); 2865 // get src->klass() 2866 if (UseCompressedOops) { 2867 __ delayed()->nop(); // ??? not good 2868 __ load_klass(src, G3_src_klass); 2869 } else { 2870 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass); 2871 } 2872 2873 #ifdef ASSERT 2874 // assert(src->klass() != NULL); 2875 BLOCK_COMMENT("assert klasses not null"); 2876 { Label L_a, L_b; 2877 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL 2878 __ delayed()->nop(); 2879 __ bind(L_a); 2880 __ stop("broken null klass"); 2881 __ bind(L_b); 2882 __ load_klass(dst, G4_dst_klass); 2883 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also 2884 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp 2885 BLOCK_COMMENT("assert done"); 2886 } 2887 #endif 2888 2889 // Load layout helper 2890 // 2891 // |array_tag| | header_size | element_type | |log2_element_size| 2892 // 32 30 24 16 8 2 0 2893 // 2894 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 2895 // 2896 2897 int lh_offset = klassOopDesc::header_size() * HeapWordSize + 2898 Klass::layout_helper_offset_in_bytes(); 2899 2900 // Load 32-bits signed value. Use br() instruction with it to check icc. 2901 __ lduw(G3_src_klass, lh_offset, G5_lh); 2902 2903 if (UseCompressedOops) { 2904 __ load_klass(dst, G4_dst_klass); 2905 } 2906 // Handle objArrays completely differently... 2907 juint objArray_lh = Klass::array_layout_helper(T_OBJECT); 2908 __ set(objArray_lh, O5_temp); 2909 __ cmp(G5_lh, O5_temp); 2910 __ br(Assembler::equal, false, Assembler::pt, L_objArray); 2911 if (UseCompressedOops) { 2912 __ delayed()->nop(); 2913 } else { 2914 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); 2915 } 2916 2917 // if (src->klass() != dst->klass()) return -1; 2918 __ cmp(G3_src_klass, G4_dst_klass); 2919 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed); 2920 __ delayed()->nop(); 2921 2922 // if (!src->is_Array()) return -1; 2923 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 2924 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed); 2925 2926 // At this point, it is known to be a typeArray (array_tag 0x3). 2927 #ifdef ASSERT 2928 __ delayed()->nop(); 2929 { Label L; 2930 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); 2931 __ set(lh_prim_tag_in_place, O5_temp); 2932 __ cmp(G5_lh, O5_temp); 2933 __ br(Assembler::greaterEqual, false, Assembler::pt, L); 2934 __ delayed()->nop(); 2935 __ stop("must be a primitive array"); 2936 __ bind(L); 2937 } 2938 #else 2939 __ delayed(); // match next insn to prev branch 2940 #endif 2941 2942 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 2943 O5_temp, G4_dst_klass, L_failed); 2944 2945 // typeArrayKlass 2946 // 2947 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); 2948 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); 2949 // 2950 2951 const Register G4_offset = G4_dst_klass; // array offset 2952 const Register G3_elsize = G3_src_klass; // log2 element size 2953 2954 __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset); 2955 __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset 2956 __ add(src, G4_offset, src); // src array offset 2957 __ add(dst, G4_offset, dst); // dst array offset 2958 __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size 2959 2960 // next registers should be set before the jump to corresponding stub 2961 const Register from = O0; // source array address 2962 const Register to = O1; // destination array address 2963 const Register count = O2; // elements count 2964 2965 // 'from', 'to', 'count' registers should be set in this order 2966 // since they are the same as 'src', 'src_pos', 'dst'. 2967 2968 BLOCK_COMMENT("scale indexes to element size"); 2969 __ sll_ptr(src_pos, G3_elsize, src_pos); 2970 __ sll_ptr(dst_pos, G3_elsize, dst_pos); 2971 __ add(src, src_pos, from); // src_addr 2972 __ add(dst, dst_pos, to); // dst_addr 2973 2974 BLOCK_COMMENT("choose copy loop based on element size"); 2975 __ cmp(G3_elsize, 0); 2976 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jbyte_arraycopy); 2977 __ delayed()->signx(length, count); // length 2978 2979 __ cmp(G3_elsize, LogBytesPerShort); 2980 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jshort_arraycopy); 2981 __ delayed()->signx(length, count); // length 2982 2983 __ cmp(G3_elsize, LogBytesPerInt); 2984 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jint_arraycopy); 2985 __ delayed()->signx(length, count); // length 2986 #ifdef ASSERT 2987 { Label L; 2988 __ cmp(G3_elsize, LogBytesPerLong); 2989 __ br(Assembler::equal, false, Assembler::pt, L); 2990 __ delayed()->nop(); 2991 __ stop("must be long copy, but elsize is wrong"); 2992 __ bind(L); 2993 } 2994 #endif 2995 __ br(Assembler::always,false,Assembler::pt,StubRoutines::_jlong_arraycopy); 2996 __ delayed()->signx(length, count); // length 2997 2998 // objArrayKlass 2999 __ BIND(L_objArray); 3000 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length 3001 3002 Label L_plain_copy, L_checkcast_copy; 3003 // test array classes for subtyping 3004 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality 3005 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy); 3006 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below 3007 3008 // Identically typed arrays can be copied without element-wise checks. 3009 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 3010 O5_temp, G5_lh, L_failed); 3011 3012 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 3013 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 3014 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); 3015 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); 3016 __ add(src, src_pos, from); // src_addr 3017 __ add(dst, dst_pos, to); // dst_addr 3018 __ BIND(L_plain_copy); 3019 __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy); 3020 __ delayed()->signx(length, count); // length 3021 3022 __ BIND(L_checkcast_copy); 3023 // live at this point: G3_src_klass, G4_dst_klass 3024 { 3025 // Before looking at dst.length, make sure dst is also an objArray. 3026 // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot 3027 __ cmp(G5_lh, O5_temp); 3028 __ br(Assembler::notEqual, false, Assembler::pn, L_failed); 3029 3030 // It is safe to examine both src.length and dst.length. 3031 __ delayed(); // match next insn to prev branch 3032 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 3033 O5_temp, G5_lh, L_failed); 3034 3035 // Marshal the base address arguments now, freeing registers. 3036 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 3037 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 3038 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); 3039 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); 3040 __ add(src, src_pos, from); // src_addr 3041 __ add(dst, dst_pos, to); // dst_addr 3042 __ signx(length, count); // length (reloaded) 3043 3044 Register sco_temp = O3; // this register is free now 3045 assert_different_registers(from, to, count, sco_temp, 3046 G4_dst_klass, G3_src_klass); 3047 3048 // Generate the type check. 3049 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 3050 Klass::super_check_offset_offset_in_bytes()); 3051 __ lduw(G4_dst_klass, sco_offset, sco_temp); 3052 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass, 3053 O5_temp, L_plain_copy); 3054 3055 // Fetch destination element klass from the objArrayKlass header. 3056 int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 3057 objArrayKlass::element_klass_offset_in_bytes()); 3058 3059 // the checkcast_copy loop needs two extra arguments: 3060 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass 3061 // lduw(O4, sco_offset, O3); // sco of elem klass 3062 3063 __ br(Assembler::always, false, Assembler::pt, checkcast_copy_entry); 3064 __ delayed()->lduw(O4, sco_offset, O3); 3065 } 3066 3067 __ BIND(L_failed); 3068 __ retl(); 3069 __ delayed()->sub(G0, 1, O0); // return -1 3070 return start; 3071 } 3072 3073 void generate_arraycopy_stubs() { 3074 3075 // Note: the disjoint stubs must be generated first, some of 3076 // the conjoint stubs use them. 3077 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); 3078 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); 3079 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy"); 3080 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); 3081 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy"); 3082 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy"); 3083 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); 3084 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy"); 3085 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy"); 3086 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy"); 3087 3088 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); 3089 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); 3090 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy"); 3091 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); 3092 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, "oop_arraycopy"); 3093 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy"); 3094 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); 3095 #ifdef _LP64 3096 // since sizeof(jint) < sizeof(HeapWord), there's a different flavor: 3097 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy"); 3098 #else 3099 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; 3100 #endif 3101 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; 3102 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; 3103 3104 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); 3105 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); 3106 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); 3107 3108 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); 3109 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); 3110 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); 3111 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); 3112 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); 3113 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); 3114 } 3115 3116 void generate_initial() { 3117 // Generates all stubs and initializes the entry points 3118 3119 //------------------------------------------------------------------------------------------------------------------------ 3120 // entry points that exist in all platforms 3121 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than 3122 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. 3123 StubRoutines::_forward_exception_entry = generate_forward_exception(); 3124 3125 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 3126 StubRoutines::_catch_exception_entry = generate_catch_exception(); 3127 3128 //------------------------------------------------------------------------------------------------------------------------ 3129 // entry points that are platform specific 3130 StubRoutines::Sparc::_test_stop_entry = generate_test_stop(); 3131 3132 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine(); 3133 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows(); 3134 3135 #if !defined(COMPILER2) && !defined(_LP64) 3136 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); 3137 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); 3138 StubRoutines::_atomic_add_entry = generate_atomic_add(); 3139 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry; 3140 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry; 3141 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); 3142 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry; 3143 #endif // COMPILER2 !=> _LP64 3144 } 3145 3146 3147 void generate_all() { 3148 // Generates all stubs and initializes the entry points 3149 3150 // Generate partial_subtype_check first here since its code depends on 3151 // UseZeroBaseCompressedOops which is defined after heap initialization. 3152 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); 3153 // These entry points require SharedInfo::stack0 to be set up in non-core builds 3154 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); 3155 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); 3156 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); 3157 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); 3158 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); 3159 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); 3160 3161 StubRoutines::_handler_for_unsafe_access_entry = 3162 generate_handler_for_unsafe_access(); 3163 3164 // support for verify_oop (must happen after universe_init) 3165 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); 3166 3167 // arraycopy stubs used by compilers 3168 generate_arraycopy_stubs(); 3169 3170 // Don't initialize the platform math functions since sparc 3171 // doesn't have intrinsics for these operations. 3172 } 3173 3174 3175 public: 3176 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 3177 // replace the standard masm with a special one: 3178 _masm = new MacroAssembler(code); 3179 3180 _stub_count = !all ? 0x100 : 0x200; 3181 if (all) { 3182 generate_all(); 3183 } else { 3184 generate_initial(); 3185 } 3186 3187 // make sure this stub is available for all local calls 3188 if (_atomic_add_stub.is_unbound()) { 3189 // generate a second time, if necessary 3190 (void) generate_atomic_add(); 3191 } 3192 } 3193 3194 3195 private: 3196 int _stub_count; 3197 void stub_prolog(StubCodeDesc* cdesc) { 3198 # ifdef ASSERT 3199 // put extra information in the stub code, to make it more readable 3200 #ifdef _LP64 3201 // Write the high part of the address 3202 // [RGV] Check if there is a dependency on the size of this prolog 3203 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none); 3204 #endif 3205 __ emit_data((intptr_t)cdesc, relocInfo::none); 3206 __ emit_data(++_stub_count, relocInfo::none); 3207 # endif 3208 align(true); 3209 } 3210 3211 void align(bool at_header = false) { 3212 // %%%%% move this constant somewhere else 3213 // UltraSPARC cache line size is 8 instructions: 3214 const unsigned int icache_line_size = 32; 3215 const unsigned int icache_half_line_size = 16; 3216 3217 if (at_header) { 3218 while ((intptr_t)(__ pc()) % icache_line_size != 0) { 3219 __ emit_data(0, relocInfo::none); 3220 } 3221 } else { 3222 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { 3223 __ nop(); 3224 } 3225 } 3226 } 3227 3228 }; // end class declaration 3229 3230 3231 address StubGenerator::disjoint_byte_copy_entry = NULL; 3232 address StubGenerator::disjoint_short_copy_entry = NULL; 3233 address StubGenerator::disjoint_int_copy_entry = NULL; 3234 address StubGenerator::disjoint_long_copy_entry = NULL; 3235 address StubGenerator::disjoint_oop_copy_entry = NULL; 3236 3237 address StubGenerator::byte_copy_entry = NULL; 3238 address StubGenerator::short_copy_entry = NULL; 3239 address StubGenerator::int_copy_entry = NULL; 3240 address StubGenerator::long_copy_entry = NULL; 3241 address StubGenerator::oop_copy_entry = NULL; 3242 3243 address StubGenerator::checkcast_copy_entry = NULL; 3244 3245 void StubGenerator_generate(CodeBuffer* code, bool all) { 3246 StubGenerator g(code, all); 3247 }