1 /* 2 * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 20 * CA 95054 USA or visit www.sun.com if you need additional information or 21 * have any questions. 22 * 23 */ 24 25 #include "incls/_precompiled.incl" 26 #include "incls/_stubGenerator_sparc.cpp.incl" 27 28 // Declaration and definition of StubGenerator (no .hpp file). 29 // For a more detailed description of the stub routine structure 30 // see the comment in stubRoutines.hpp. 31 32 #define __ _masm-> 33 34 #ifdef PRODUCT 35 #define BLOCK_COMMENT(str) /* nothing */ 36 #else 37 #define BLOCK_COMMENT(str) __ block_comment(str) 38 #endif 39 40 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 41 42 // Note: The register L7 is used as L7_thread_cache, and may not be used 43 // any other way within this module. 44 45 46 static const Register& Lstub_temp = L2; 47 48 // ------------------------------------------------------------------------------------------------------------------------- 49 // Stub Code definitions 50 51 static address handle_unsafe_access() { 52 JavaThread* thread = JavaThread::current(); 53 address pc = thread->saved_exception_pc(); 54 address npc = thread->saved_exception_npc(); 55 // pc is the instruction which we must emulate 56 // doing a no-op is fine: return garbage from the load 57 58 // request an async exception 59 thread->set_pending_unsafe_access_error(); 60 61 // return address of next instruction to execute 62 return npc; 63 } 64 65 class StubGenerator: public StubCodeGenerator { 66 private: 67 68 #ifdef PRODUCT 69 #define inc_counter_np(a,b,c) (0) 70 #else 71 #define inc_counter_np(counter, t1, t2) \ 72 BLOCK_COMMENT("inc_counter " #counter); \ 73 __ inc_counter(&counter, t1, t2); 74 #endif 75 76 //---------------------------------------------------------------------------------------------------- 77 // Call stubs are used to call Java from C 78 79 address generate_call_stub(address& return_pc) { 80 StubCodeMark mark(this, "StubRoutines", "call_stub"); 81 address start = __ pc(); 82 83 // Incoming arguments: 84 // 85 // o0 : call wrapper address 86 // o1 : result (address) 87 // o2 : result type 88 // o3 : method 89 // o4 : (interpreter) entry point 90 // o5 : parameters (address) 91 // [sp + 0x5c]: parameter size (in words) 92 // [sp + 0x60]: thread 93 // 94 // +---------------+ <--- sp + 0 95 // | | 96 // . reg save area . 97 // | | 98 // +---------------+ <--- sp + 0x40 99 // | | 100 // . extra 7 slots . 101 // | | 102 // +---------------+ <--- sp + 0x5c 103 // | param. size | 104 // +---------------+ <--- sp + 0x60 105 // | thread | 106 // +---------------+ 107 // | | 108 109 // note: if the link argument position changes, adjust 110 // the code in frame::entry_frame_call_wrapper() 111 112 const Argument link = Argument(0, false); // used only for GC 113 const Argument result = Argument(1, false); 114 const Argument result_type = Argument(2, false); 115 const Argument method = Argument(3, false); 116 const Argument entry_point = Argument(4, false); 117 const Argument parameters = Argument(5, false); 118 const Argument parameter_size = Argument(6, false); 119 const Argument thread = Argument(7, false); 120 121 // setup thread register 122 __ ld_ptr(thread.as_address(), G2_thread); 123 __ reinit_heapbase(); 124 125 #ifdef ASSERT 126 // make sure we have no pending exceptions 127 { const Register t = G3_scratch; 128 Label L; 129 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); 130 __ br_null(t, false, Assembler::pt, L); 131 __ delayed()->nop(); 132 __ stop("StubRoutines::call_stub: entered with pending exception"); 133 __ bind(L); 134 } 135 #endif 136 137 // create activation frame & allocate space for parameters 138 { const Register t = G3_scratch; 139 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words) 140 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words) 141 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words) 142 __ sll(t, Interpreter::logStackElementSize(), t); // compute number of bytes 143 __ neg(t); // negate so it can be used with save 144 __ save(SP, t, SP); // setup new frame 145 } 146 147 // +---------------+ <--- sp + 0 148 // | | 149 // . reg save area . 150 // | | 151 // +---------------+ <--- sp + 0x40 152 // | | 153 // . extra 7 slots . 154 // | | 155 // +---------------+ <--- sp + 0x5c 156 // | empty slot | (only if parameter size is even) 157 // +---------------+ 158 // | | 159 // . parameters . 160 // | | 161 // +---------------+ <--- fp + 0 162 // | | 163 // . reg save area . 164 // | | 165 // +---------------+ <--- fp + 0x40 166 // | | 167 // . extra 7 slots . 168 // | | 169 // +---------------+ <--- fp + 0x5c 170 // | param. size | 171 // +---------------+ <--- fp + 0x60 172 // | thread | 173 // +---------------+ 174 // | | 175 176 // pass parameters if any 177 BLOCK_COMMENT("pass parameters if any"); 178 { const Register src = parameters.as_in().as_register(); 179 const Register dst = Lentry_args; 180 const Register tmp = G3_scratch; 181 const Register cnt = G4_scratch; 182 183 // test if any parameters & setup of Lentry_args 184 Label exit; 185 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter 186 __ add( FP, STACK_BIAS, dst ); 187 __ tst(cnt); 188 __ br(Assembler::zero, false, Assembler::pn, exit); 189 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args 190 191 // copy parameters if any 192 Label loop; 193 __ BIND(loop); 194 // Store tag first. 195 if (TaggedStackInterpreter) { 196 __ ld_ptr(src, 0, tmp); 197 __ add(src, BytesPerWord, src); // get next 198 __ st_ptr(tmp, dst, Interpreter::tag_offset_in_bytes()); 199 } 200 // Store parameter value 201 __ ld_ptr(src, 0, tmp); 202 __ add(src, BytesPerWord, src); 203 __ st_ptr(tmp, dst, Interpreter::value_offset_in_bytes()); 204 __ deccc(cnt); 205 __ br(Assembler::greater, false, Assembler::pt, loop); 206 __ delayed()->sub(dst, Interpreter::stackElementSize(), dst); 207 208 // done 209 __ BIND(exit); 210 } 211 212 // setup parameters, method & call Java function 213 #ifdef ASSERT 214 // layout_activation_impl checks it's notion of saved SP against 215 // this register, so if this changes update it as well. 216 const Register saved_SP = Lscratch; 217 __ mov(SP, saved_SP); // keep track of SP before call 218 #endif 219 220 // setup parameters 221 const Register t = G3_scratch; 222 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words) 223 __ sll(t, Interpreter::logStackElementSize(), t); // compute number of bytes 224 __ sub(FP, t, Gargs); // setup parameter pointer 225 #ifdef _LP64 226 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias 227 #endif 228 __ mov(SP, O5_savedSP); 229 230 231 // do the call 232 // 233 // the following register must be setup: 234 // 235 // G2_thread 236 // G5_method 237 // Gargs 238 BLOCK_COMMENT("call Java function"); 239 __ jmpl(entry_point.as_in().as_register(), G0, O7); 240 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method 241 242 BLOCK_COMMENT("call_stub_return_address:"); 243 return_pc = __ pc(); 244 245 // The callee, if it wasn't interpreted, can return with SP changed so 246 // we can no longer assert of change of SP. 247 248 // store result depending on type 249 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE 250 // is treated as T_INT) 251 { const Register addr = result .as_in().as_register(); 252 const Register type = result_type.as_in().as_register(); 253 Label is_long, is_float, is_double, is_object, exit; 254 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object); 255 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float); 256 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double); 257 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long); 258 __ delayed()->nop(); 259 260 // store int result 261 __ st(O0, addr, G0); 262 263 __ BIND(exit); 264 __ ret(); 265 __ delayed()->restore(); 266 267 __ BIND(is_object); 268 __ ba(false, exit); 269 __ delayed()->st_ptr(O0, addr, G0); 270 271 __ BIND(is_float); 272 __ ba(false, exit); 273 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); 274 275 __ BIND(is_double); 276 __ ba(false, exit); 277 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); 278 279 __ BIND(is_long); 280 #ifdef _LP64 281 __ ba(false, exit); 282 __ delayed()->st_long(O0, addr, G0); // store entire long 283 #else 284 #if defined(COMPILER2) 285 // All return values are where we want them, except for Longs. C2 returns 286 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. 287 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit 288 // build we simply always use G1. 289 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to 290 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node 291 // first which would move g1 -> O0/O1 and destroy the exception we were throwing. 292 293 __ ba(false, exit); 294 __ delayed()->stx(G1, addr, G0); // store entire long 295 #else 296 __ st(O1, addr, BytesPerInt); 297 __ ba(false, exit); 298 __ delayed()->st(O0, addr, G0); 299 #endif /* COMPILER2 */ 300 #endif /* _LP64 */ 301 } 302 return start; 303 } 304 305 306 //---------------------------------------------------------------------------------------------------- 307 // Return point for a Java call if there's an exception thrown in Java code. 308 // The exception is caught and transformed into a pending exception stored in 309 // JavaThread that can be tested from within the VM. 310 // 311 // Oexception: exception oop 312 313 address generate_catch_exception() { 314 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 315 316 address start = __ pc(); 317 // verify that thread corresponds 318 __ verify_thread(); 319 320 const Register& temp_reg = Gtemp; 321 Address pending_exception_addr (G2_thread, Thread::pending_exception_offset()); 322 Address exception_file_offset_addr(G2_thread, Thread::exception_file_offset ()); 323 Address exception_line_offset_addr(G2_thread, Thread::exception_line_offset ()); 324 325 // set pending exception 326 __ verify_oop(Oexception); 327 __ st_ptr(Oexception, pending_exception_addr); 328 __ set((intptr_t)__FILE__, temp_reg); 329 __ st_ptr(temp_reg, exception_file_offset_addr); 330 __ set((intptr_t)__LINE__, temp_reg); 331 __ st(temp_reg, exception_line_offset_addr); 332 333 // complete return to VM 334 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); 335 336 AddressLiteral stub_ret(StubRoutines::_call_stub_return_address); 337 __ jump_to(stub_ret, temp_reg); 338 __ delayed()->nop(); 339 340 return start; 341 } 342 343 344 //---------------------------------------------------------------------------------------------------- 345 // Continuation point for runtime calls returning with a pending exception 346 // The pending exception check happened in the runtime or native call stub 347 // The pending exception in Thread is converted into a Java-level exception 348 // 349 // Contract with Java-level exception handler: O0 = exception 350 // O1 = throwing pc 351 352 address generate_forward_exception() { 353 StubCodeMark mark(this, "StubRoutines", "forward_exception"); 354 address start = __ pc(); 355 356 // Upon entry, O7 has the return address returning into Java 357 // (interpreted or compiled) code; i.e. the return address 358 // becomes the throwing pc. 359 360 const Register& handler_reg = Gtemp; 361 362 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 363 364 #ifdef ASSERT 365 // make sure that this code is only executed if there is a pending exception 366 { Label L; 367 __ ld_ptr(exception_addr, Gtemp); 368 __ br_notnull(Gtemp, false, Assembler::pt, L); 369 __ delayed()->nop(); 370 __ stop("StubRoutines::forward exception: no pending exception (1)"); 371 __ bind(L); 372 } 373 #endif 374 375 // compute exception handler into handler_reg 376 __ get_thread(); 377 __ ld_ptr(exception_addr, Oexception); 378 __ verify_oop(Oexception); 379 __ save_frame(0); // compensates for compiler weakness 380 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC 381 BLOCK_COMMENT("call exception_handler_for_return_address"); 382 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), Lscratch); 383 __ mov(O0, handler_reg); 384 __ restore(); // compensates for compiler weakness 385 386 __ ld_ptr(exception_addr, Oexception); 387 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC 388 389 #ifdef ASSERT 390 // make sure exception is set 391 { Label L; 392 __ br_notnull(Oexception, false, Assembler::pt, L); 393 __ delayed()->nop(); 394 __ stop("StubRoutines::forward exception: no pending exception (2)"); 395 __ bind(L); 396 } 397 #endif 398 // jump to exception handler 399 __ jmp(handler_reg, 0); 400 // clear pending exception 401 __ delayed()->st_ptr(G0, exception_addr); 402 403 return start; 404 } 405 406 407 //------------------------------------------------------------------------------------------------------------------------ 408 // Continuation point for throwing of implicit exceptions that are not handled in 409 // the current activation. Fabricates an exception oop and initiates normal 410 // exception dispatching in this frame. Only callee-saved registers are preserved 411 // (through the normal register window / RegisterMap handling). 412 // If the compiler needs all registers to be preserved between the fault 413 // point and the exception handler then it must assume responsibility for that in 414 // AbstractCompiler::continuation_for_implicit_null_exception or 415 // continuation_for_implicit_division_by_zero_exception. All other implicit 416 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are 417 // either at call sites or otherwise assume that stack unwinding will be initiated, 418 // so caller saved registers were assumed volatile in the compiler. 419 420 // Note that we generate only this stub into a RuntimeStub, because it needs to be 421 // properly traversed and ignored during GC, so we change the meaning of the "__" 422 // macro within this method. 423 #undef __ 424 #define __ masm-> 425 426 address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc) { 427 #ifdef ASSERT 428 int insts_size = VerifyThread ? 1 * K : 600; 429 #else 430 int insts_size = VerifyThread ? 1 * K : 256; 431 #endif /* ASSERT */ 432 int locs_size = 32; 433 434 CodeBuffer code(name, insts_size, locs_size); 435 MacroAssembler* masm = new MacroAssembler(&code); 436 437 __ verify_thread(); 438 439 // This is an inlined and slightly modified version of call_VM 440 // which has the ability to fetch the return PC out of thread-local storage 441 __ assert_not_delayed(); 442 443 // Note that we always push a frame because on the SPARC 444 // architecture, for all of our implicit exception kinds at call 445 // sites, the implicit exception is taken before the callee frame 446 // is pushed. 447 __ save_frame(0); 448 449 int frame_complete = __ offset(); 450 451 if (restore_saved_exception_pc) { 452 __ ld_ptr(G2_thread, JavaThread::saved_exception_pc_offset(), I7); 453 __ sub(I7, frame::pc_return_offset, I7); 454 } 455 456 // Note that we always have a runtime stub frame on the top of stack by this point 457 Register last_java_sp = SP; 458 // 64-bit last_java_sp is biased! 459 __ set_last_Java_frame(last_java_sp, G0); 460 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early 461 __ save_thread(noreg); 462 // do the call 463 BLOCK_COMMENT("call runtime_entry"); 464 __ call(runtime_entry, relocInfo::runtime_call_type); 465 if (!VerifyThread) 466 __ delayed()->mov(G2_thread, O0); // pass thread as first argument 467 else 468 __ delayed()->nop(); // (thread already passed) 469 __ restore_thread(noreg); 470 __ reset_last_Java_frame(); 471 472 // check for pending exceptions. use Gtemp as scratch register. 473 #ifdef ASSERT 474 Label L; 475 476 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 477 Register scratch_reg = Gtemp; 478 __ ld_ptr(exception_addr, scratch_reg); 479 __ br_notnull(scratch_reg, false, Assembler::pt, L); 480 __ delayed()->nop(); 481 __ should_not_reach_here(); 482 __ bind(L); 483 #endif // ASSERT 484 BLOCK_COMMENT("call forward_exception_entry"); 485 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 486 // we use O7 linkage so that forward_exception_entry has the issuing PC 487 __ delayed()->restore(); 488 489 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false); 490 return stub->entry_point(); 491 } 492 493 #undef __ 494 #define __ _masm-> 495 496 497 // Generate a routine that sets all the registers so we 498 // can tell if the stop routine prints them correctly. 499 address generate_test_stop() { 500 StubCodeMark mark(this, "StubRoutines", "test_stop"); 501 address start = __ pc(); 502 503 int i; 504 505 __ save_frame(0); 506 507 static jfloat zero = 0.0, one = 1.0; 508 509 // put addr in L0, then load through L0 to F0 510 __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0); 511 __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1 512 513 // use add to put 2..18 in F2..F18 514 for ( i = 2; i <= 18; ++i ) { 515 __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i)); 516 } 517 518 // Now put double 2 in F16, double 18 in F18 519 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 ); 520 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 ); 521 522 // use add to put 20..32 in F20..F32 523 for (i = 20; i < 32; i += 2) { 524 __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i)); 525 } 526 527 // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's 528 for ( i = 0; i < 8; ++i ) { 529 if (i < 6) { 530 __ set( i, as_iRegister(i)); 531 __ set(16 + i, as_oRegister(i)); 532 __ set(24 + i, as_gRegister(i)); 533 } 534 __ set( 8 + i, as_lRegister(i)); 535 } 536 537 __ stop("testing stop"); 538 539 540 __ ret(); 541 __ delayed()->restore(); 542 543 return start; 544 } 545 546 547 address generate_stop_subroutine() { 548 StubCodeMark mark(this, "StubRoutines", "stop_subroutine"); 549 address start = __ pc(); 550 551 __ stop_subroutine(); 552 553 return start; 554 } 555 556 address generate_flush_callers_register_windows() { 557 StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows"); 558 address start = __ pc(); 559 560 __ flush_windows(); 561 __ retl(false); 562 __ delayed()->add( FP, STACK_BIAS, O0 ); 563 // The returned value must be a stack pointer whose register save area 564 // is flushed, and will stay flushed while the caller executes. 565 566 return start; 567 } 568 569 // Helper functions for v8 atomic operations. 570 // 571 void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) { 572 if (mark_oop_reg == noreg) { 573 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(); 574 __ set((intptr_t)lock_ptr, lock_ptr_reg); 575 } else { 576 assert(scratch_reg != noreg, "just checking"); 577 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache; 578 __ set((intptr_t)lock_ptr, lock_ptr_reg); 579 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg); 580 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg); 581 } 582 } 583 584 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { 585 586 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg); 587 __ set(StubRoutines::Sparc::locked, lock_reg); 588 // Initialize yield counter 589 __ mov(G0,yield_reg); 590 591 __ BIND(retry); 592 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount); 593 __ br(Assembler::less, false, Assembler::pt, dontyield); 594 __ delayed()->nop(); 595 596 // This code can only be called from inside the VM, this 597 // stub is only invoked from Atomic::add(). We do not 598 // want to use call_VM, because _last_java_sp and such 599 // must already be set. 600 // 601 // Save the regs and make space for a C call 602 __ save(SP, -96, SP); 603 __ save_all_globals_into_locals(); 604 BLOCK_COMMENT("call os::naked_sleep"); 605 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep)); 606 __ delayed()->nop(); 607 __ restore_globals_from_locals(); 608 __ restore(); 609 // reset the counter 610 __ mov(G0,yield_reg); 611 612 __ BIND(dontyield); 613 614 // try to get lock 615 __ swap(lock_ptr_reg, 0, lock_reg); 616 617 // did we get the lock? 618 __ cmp(lock_reg, StubRoutines::Sparc::unlocked); 619 __ br(Assembler::notEqual, true, Assembler::pn, retry); 620 __ delayed()->add(yield_reg,1,yield_reg); 621 622 // yes, got lock. do the operation here. 623 } 624 625 void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { 626 __ st(lock_reg, lock_ptr_reg, 0); // unlock 627 } 628 629 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest). 630 // 631 // Arguments : 632 // 633 // exchange_value: O0 634 // dest: O1 635 // 636 // Results: 637 // 638 // O0: the value previously stored in dest 639 // 640 address generate_atomic_xchg() { 641 StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); 642 address start = __ pc(); 643 644 if (UseCASForSwap) { 645 // Use CAS instead of swap, just in case the MP hardware 646 // prefers to work with just one kind of synch. instruction. 647 Label retry; 648 __ BIND(retry); 649 __ mov(O0, O3); // scratch copy of exchange value 650 __ ld(O1, 0, O2); // observe the previous value 651 // try to replace O2 with O3 652 __ cas_under_lock(O1, O2, O3, 653 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); 654 __ cmp(O2, O3); 655 __ br(Assembler::notEqual, false, Assembler::pn, retry); 656 __ delayed()->nop(); 657 658 __ retl(false); 659 __ delayed()->mov(O2, O0); // report previous value to caller 660 661 } else { 662 if (VM_Version::v9_instructions_work()) { 663 __ retl(false); 664 __ delayed()->swap(O1, 0, O0); 665 } else { 666 const Register& lock_reg = O2; 667 const Register& lock_ptr_reg = O3; 668 const Register& yield_reg = O4; 669 670 Label retry; 671 Label dontyield; 672 673 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 674 // got the lock, do the swap 675 __ swap(O1, 0, O0); 676 677 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 678 __ retl(false); 679 __ delayed()->nop(); 680 } 681 } 682 683 return start; 684 } 685 686 687 // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value) 688 // 689 // Arguments : 690 // 691 // exchange_value: O0 692 // dest: O1 693 // compare_value: O2 694 // 695 // Results: 696 // 697 // O0: the value previously stored in dest 698 // 699 // Overwrites (v8): O3,O4,O5 700 // 701 address generate_atomic_cmpxchg() { 702 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg"); 703 address start = __ pc(); 704 705 // cmpxchg(dest, compare_value, exchange_value) 706 __ cas_under_lock(O1, O2, O0, 707 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); 708 __ retl(false); 709 __ delayed()->nop(); 710 711 return start; 712 } 713 714 // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value) 715 // 716 // Arguments : 717 // 718 // exchange_value: O1:O0 719 // dest: O2 720 // compare_value: O4:O3 721 // 722 // Results: 723 // 724 // O1:O0: the value previously stored in dest 725 // 726 // This only works on V9, on V8 we don't generate any 727 // code and just return NULL. 728 // 729 // Overwrites: G1,G2,G3 730 // 731 address generate_atomic_cmpxchg_long() { 732 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long"); 733 address start = __ pc(); 734 735 if (!VM_Version::supports_cx8()) 736 return NULL;; 737 __ sllx(O0, 32, O0); 738 __ srl(O1, 0, O1); 739 __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value 740 __ sllx(O3, 32, O3); 741 __ srl(O4, 0, O4); 742 __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value 743 __ casx(O2, O3, O0); 744 __ srl(O0, 0, O1); // unpacked return value in O1:O0 745 __ retl(false); 746 __ delayed()->srlx(O0, 32, O0); 747 748 return start; 749 } 750 751 752 // Support for jint Atomic::add(jint add_value, volatile jint* dest). 753 // 754 // Arguments : 755 // 756 // add_value: O0 (e.g., +1 or -1) 757 // dest: O1 758 // 759 // Results: 760 // 761 // O0: the new value stored in dest 762 // 763 // Overwrites (v9): O3 764 // Overwrites (v8): O3,O4,O5 765 // 766 address generate_atomic_add() { 767 StubCodeMark mark(this, "StubRoutines", "atomic_add"); 768 address start = __ pc(); 769 __ BIND(_atomic_add_stub); 770 771 if (VM_Version::v9_instructions_work()) { 772 Label(retry); 773 __ BIND(retry); 774 775 __ lduw(O1, 0, O2); 776 __ add(O0, O2, O3); 777 __ cas(O1, O2, O3); 778 __ cmp( O2, O3); 779 __ br(Assembler::notEqual, false, Assembler::pn, retry); 780 __ delayed()->nop(); 781 __ retl(false); 782 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 783 } else { 784 const Register& lock_reg = O2; 785 const Register& lock_ptr_reg = O3; 786 const Register& value_reg = O4; 787 const Register& yield_reg = O5; 788 789 Label(retry); 790 Label(dontyield); 791 792 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 793 // got lock, do the increment 794 __ ld(O1, 0, value_reg); 795 __ add(O0, value_reg, value_reg); 796 __ st(value_reg, O1, 0); 797 798 // %%% only for RMO and PSO 799 __ membar(Assembler::StoreStore); 800 801 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 802 803 __ retl(false); 804 __ delayed()->mov(value_reg, O0); 805 } 806 807 return start; 808 } 809 Label _atomic_add_stub; // called from other stubs 810 811 812 //------------------------------------------------------------------------------------------------------------------------ 813 // The following routine generates a subroutine to throw an asynchronous 814 // UnknownError when an unsafe access gets a fault that could not be 815 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) 816 // 817 // Arguments : 818 // 819 // trapping PC: O7 820 // 821 // Results: 822 // posts an asynchronous exception, skips the trapping instruction 823 // 824 825 address generate_handler_for_unsafe_access() { 826 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); 827 address start = __ pc(); 828 829 const int preserve_register_words = (64 * 2); 830 Address preserve_addr(FP, (-preserve_register_words * wordSize) + STACK_BIAS); 831 832 Register Lthread = L7_thread_cache; 833 int i; 834 835 __ save_frame(0); 836 __ mov(G1, L1); 837 __ mov(G2, L2); 838 __ mov(G3, L3); 839 __ mov(G4, L4); 840 __ mov(G5, L5); 841 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { 842 __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize); 843 } 844 845 address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access); 846 BLOCK_COMMENT("call handle_unsafe_access"); 847 __ call(entry_point, relocInfo::runtime_call_type); 848 __ delayed()->nop(); 849 850 __ mov(L1, G1); 851 __ mov(L2, G2); 852 __ mov(L3, G3); 853 __ mov(L4, G4); 854 __ mov(L5, G5); 855 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { 856 __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize); 857 } 858 859 __ verify_thread(); 860 861 __ jmp(O0, 0); 862 __ delayed()->restore(); 863 864 return start; 865 } 866 867 868 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super ); 869 // Arguments : 870 // 871 // ret : O0, returned 872 // icc/xcc: set as O0 (depending on wordSize) 873 // sub : O1, argument, not changed 874 // super: O2, argument, not changed 875 // raddr: O7, blown by call 876 address generate_partial_subtype_check() { 877 __ align(CodeEntryAlignment); 878 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 879 address start = __ pc(); 880 Label miss; 881 882 #if defined(COMPILER2) && !defined(_LP64) 883 // Do not use a 'save' because it blows the 64-bit O registers. 884 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) 885 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); 886 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize); 887 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize); 888 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize); 889 Register Rret = O0; 890 Register Rsub = O1; 891 Register Rsuper = O2; 892 #else 893 __ save_frame(0); 894 Register Rret = I0; 895 Register Rsub = I1; 896 Register Rsuper = I2; 897 #endif 898 899 Register L0_ary_len = L0; 900 Register L1_ary_ptr = L1; 901 Register L2_super = L2; 902 Register L3_index = L3; 903 904 __ check_klass_subtype_slow_path(Rsub, Rsuper, 905 L0, L1, L2, L3, 906 NULL, &miss); 907 908 // Match falls through here. 909 __ addcc(G0,0,Rret); // set Z flags, Z result 910 911 #if defined(COMPILER2) && !defined(_LP64) 912 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 913 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 914 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 915 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 916 __ retl(); // Result in Rret is zero; flags set to Z 917 __ delayed()->add(SP,4*wordSize,SP); 918 #else 919 __ ret(); // Result in Rret is zero; flags set to Z 920 __ delayed()->restore(); 921 #endif 922 923 __ BIND(miss); 924 __ addcc(G0,1,Rret); // set NZ flags, NZ result 925 926 #if defined(COMPILER2) && !defined(_LP64) 927 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 928 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 929 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 930 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 931 __ retl(); // Result in Rret is != 0; flags set to NZ 932 __ delayed()->add(SP,4*wordSize,SP); 933 #else 934 __ ret(); // Result in Rret is != 0; flags set to NZ 935 __ delayed()->restore(); 936 #endif 937 938 return start; 939 } 940 941 942 // Called from MacroAssembler::verify_oop 943 // 944 address generate_verify_oop_subroutine() { 945 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); 946 947 address start = __ pc(); 948 949 __ verify_oop_subroutine(); 950 951 return start; 952 } 953 954 static address disjoint_byte_copy_entry; 955 static address disjoint_short_copy_entry; 956 static address disjoint_int_copy_entry; 957 static address disjoint_long_copy_entry; 958 static address disjoint_oop_copy_entry; 959 960 static address byte_copy_entry; 961 static address short_copy_entry; 962 static address int_copy_entry; 963 static address long_copy_entry; 964 static address oop_copy_entry; 965 966 static address checkcast_copy_entry; 967 968 // 969 // Verify that a register contains clean 32-bits positive value 970 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax). 971 // 972 // Input: 973 // Rint - 32-bits value 974 // Rtmp - scratch 975 // 976 void assert_clean_int(Register Rint, Register Rtmp) { 977 #if defined(ASSERT) && defined(_LP64) 978 __ signx(Rint, Rtmp); 979 __ cmp(Rint, Rtmp); 980 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc); 981 #endif 982 } 983 984 // 985 // Generate overlap test for array copy stubs 986 // 987 // Input: 988 // O0 - array1 989 // O1 - array2 990 // O2 - element count 991 // 992 // Kills temps: O3, O4 993 // 994 void array_overlap_test(address no_overlap_target, int log2_elem_size) { 995 assert(no_overlap_target != NULL, "must be generated"); 996 array_overlap_test(no_overlap_target, NULL, log2_elem_size); 997 } 998 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) { 999 array_overlap_test(NULL, &L_no_overlap, log2_elem_size); 1000 } 1001 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) { 1002 const Register from = O0; 1003 const Register to = O1; 1004 const Register count = O2; 1005 const Register to_from = O3; // to - from 1006 const Register byte_count = O4; // count << log2_elem_size 1007 1008 __ subcc(to, from, to_from); 1009 __ sll_ptr(count, log2_elem_size, byte_count); 1010 if (NOLp == NULL) 1011 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target); 1012 else 1013 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp)); 1014 __ delayed()->cmp(to_from, byte_count); 1015 if (NOLp == NULL) 1016 __ brx(Assembler::greaterEqual, false, Assembler::pt, no_overlap_target); 1017 else 1018 __ brx(Assembler::greaterEqual, false, Assembler::pt, (*NOLp)); 1019 __ delayed()->nop(); 1020 } 1021 1022 // 1023 // Generate pre-write barrier for array. 1024 // 1025 // Input: 1026 // addr - register containing starting address 1027 // count - register containing element count 1028 // tmp - scratch register 1029 // 1030 // The input registers are overwritten. 1031 // 1032 void gen_write_ref_array_pre_barrier(Register addr, Register count) { 1033 BarrierSet* bs = Universe::heap()->barrier_set(); 1034 if (bs->has_write_ref_pre_barrier()) { 1035 assert(bs->has_write_ref_array_pre_opt(), 1036 "Else unsupported barrier set."); 1037 1038 __ save_frame(0); 1039 // Save the necessary global regs... will be used after. 1040 if (addr->is_global()) { 1041 __ mov(addr, L0); 1042 } 1043 if (count->is_global()) { 1044 __ mov(count, L1); 1045 } 1046 __ mov(addr->after_save(), O0); 1047 // Get the count into O1 1048 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); 1049 __ delayed()->mov(count->after_save(), O1); 1050 if (addr->is_global()) { 1051 __ mov(L0, addr); 1052 } 1053 if (count->is_global()) { 1054 __ mov(L1, count); 1055 } 1056 __ restore(); 1057 } 1058 } 1059 // 1060 // Generate post-write barrier for array. 1061 // 1062 // Input: 1063 // addr - register containing starting address 1064 // count - register containing element count 1065 // tmp - scratch register 1066 // 1067 // The input registers are overwritten. 1068 // 1069 void gen_write_ref_array_post_barrier(Register addr, Register count, 1070 Register tmp) { 1071 BarrierSet* bs = Universe::heap()->barrier_set(); 1072 1073 switch (bs->kind()) { 1074 case BarrierSet::G1SATBCT: 1075 case BarrierSet::G1SATBCTLogging: 1076 { 1077 // Get some new fresh output registers. 1078 __ save_frame(0); 1079 __ mov(addr->after_save(), O0); 1080 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); 1081 __ delayed()->mov(count->after_save(), O1); 1082 __ restore(); 1083 } 1084 break; 1085 case BarrierSet::CardTableModRef: 1086 case BarrierSet::CardTableExtension: 1087 { 1088 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 1089 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1090 assert_different_registers(addr, count, tmp); 1091 1092 Label L_loop; 1093 1094 __ sll_ptr(count, LogBytesPerHeapOop, count); 1095 __ sub(count, BytesPerHeapOop, count); 1096 __ add(count, addr, count); 1097 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) 1098 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr); 1099 __ srl_ptr(count, CardTableModRefBS::card_shift, count); 1100 __ sub(count, addr, count); 1101 AddressLiteral rs(ct->byte_map_base); 1102 __ set(rs, tmp); 1103 __ BIND(L_loop); 1104 __ stb(G0, tmp, addr); 1105 __ subcc(count, 1, count); 1106 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1107 __ delayed()->add(addr, 1, addr); 1108 } 1109 break; 1110 case BarrierSet::ModRef: 1111 break; 1112 default: 1113 ShouldNotReachHere(); 1114 } 1115 } 1116 1117 1118 // Copy big chunks forward with shift 1119 // 1120 // Inputs: 1121 // from - source arrays 1122 // to - destination array aligned to 8-bytes 1123 // count - elements count to copy >= the count equivalent to 16 bytes 1124 // count_dec - elements count's decrement equivalent to 16 bytes 1125 // L_copy_bytes - copy exit label 1126 // 1127 void copy_16_bytes_forward_with_shift(Register from, Register to, 1128 Register count, int count_dec, Label& L_copy_bytes) { 1129 Label L_loop, L_aligned_copy, L_copy_last_bytes; 1130 1131 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy 1132 __ andcc(from, 7, G1); // misaligned bytes 1133 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1134 __ delayed()->nop(); 1135 1136 const Register left_shift = G1; // left shift bit counter 1137 const Register right_shift = G5; // right shift bit counter 1138 1139 __ sll(G1, LogBitsPerByte, left_shift); 1140 __ mov(64, right_shift); 1141 __ sub(right_shift, left_shift, right_shift); 1142 1143 // 1144 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1145 // to form 2 aligned 8-bytes chunks to store. 1146 // 1147 __ deccc(count, count_dec); // Pre-decrement 'count' 1148 __ andn(from, 7, from); // Align address 1149 __ ldx(from, 0, O3); 1150 __ inc(from, 8); 1151 __ align(16); 1152 __ BIND(L_loop); 1153 __ ldx(from, 0, O4); 1154 __ deccc(count, count_dec); // Can we do next iteration after this one? 1155 __ ldx(from, 8, G4); 1156 __ inc(to, 16); 1157 __ inc(from, 16); 1158 __ sllx(O3, left_shift, O3); 1159 __ srlx(O4, right_shift, G3); 1160 __ bset(G3, O3); 1161 __ stx(O3, to, -16); 1162 __ sllx(O4, left_shift, O4); 1163 __ srlx(G4, right_shift, G3); 1164 __ bset(G3, O4); 1165 __ stx(O4, to, -8); 1166 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1167 __ delayed()->mov(G4, O3); 1168 1169 __ inccc(count, count_dec>>1 ); // + 8 bytes 1170 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); 1171 __ delayed()->inc(count, count_dec>>1); // restore 'count' 1172 1173 // copy 8 bytes, part of them already loaded in O3 1174 __ ldx(from, 0, O4); 1175 __ inc(to, 8); 1176 __ inc(from, 8); 1177 __ sllx(O3, left_shift, O3); 1178 __ srlx(O4, right_shift, G3); 1179 __ bset(O3, G3); 1180 __ stx(G3, to, -8); 1181 1182 __ BIND(L_copy_last_bytes); 1183 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes 1184 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); 1185 __ delayed()->sub(from, right_shift, from); // restore address 1186 1187 __ BIND(L_aligned_copy); 1188 } 1189 1190 // Copy big chunks backward with shift 1191 // 1192 // Inputs: 1193 // end_from - source arrays end address 1194 // end_to - destination array end address aligned to 8-bytes 1195 // count - elements count to copy >= the count equivalent to 16 bytes 1196 // count_dec - elements count's decrement equivalent to 16 bytes 1197 // L_aligned_copy - aligned copy exit label 1198 // L_copy_bytes - copy exit label 1199 // 1200 void copy_16_bytes_backward_with_shift(Register end_from, Register end_to, 1201 Register count, int count_dec, 1202 Label& L_aligned_copy, Label& L_copy_bytes) { 1203 Label L_loop, L_copy_last_bytes; 1204 1205 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy 1206 __ andcc(end_from, 7, G1); // misaligned bytes 1207 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1208 __ delayed()->deccc(count, count_dec); // Pre-decrement 'count' 1209 1210 const Register left_shift = G1; // left shift bit counter 1211 const Register right_shift = G5; // right shift bit counter 1212 1213 __ sll(G1, LogBitsPerByte, left_shift); 1214 __ mov(64, right_shift); 1215 __ sub(right_shift, left_shift, right_shift); 1216 1217 // 1218 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1219 // to form 2 aligned 8-bytes chunks to store. 1220 // 1221 __ andn(end_from, 7, end_from); // Align address 1222 __ ldx(end_from, 0, O3); 1223 __ align(16); 1224 __ BIND(L_loop); 1225 __ ldx(end_from, -8, O4); 1226 __ deccc(count, count_dec); // Can we do next iteration after this one? 1227 __ ldx(end_from, -16, G4); 1228 __ dec(end_to, 16); 1229 __ dec(end_from, 16); 1230 __ srlx(O3, right_shift, O3); 1231 __ sllx(O4, left_shift, G3); 1232 __ bset(G3, O3); 1233 __ stx(O3, end_to, 8); 1234 __ srlx(O4, right_shift, O4); 1235 __ sllx(G4, left_shift, G3); 1236 __ bset(G3, O4); 1237 __ stx(O4, end_to, 0); 1238 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1239 __ delayed()->mov(G4, O3); 1240 1241 __ inccc(count, count_dec>>1 ); // + 8 bytes 1242 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); 1243 __ delayed()->inc(count, count_dec>>1); // restore 'count' 1244 1245 // copy 8 bytes, part of them already loaded in O3 1246 __ ldx(end_from, -8, O4); 1247 __ dec(end_to, 8); 1248 __ dec(end_from, 8); 1249 __ srlx(O3, right_shift, O3); 1250 __ sllx(O4, left_shift, G3); 1251 __ bset(O3, G3); 1252 __ stx(G3, end_to, 0); 1253 1254 __ BIND(L_copy_last_bytes); 1255 __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes 1256 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); 1257 __ delayed()->add(end_from, left_shift, end_from); // restore address 1258 } 1259 1260 // 1261 // Generate stub for disjoint byte copy. If "aligned" is true, the 1262 // "from" and "to" addresses are assumed to be heapword aligned. 1263 // 1264 // Arguments for generated stub: 1265 // from: O0 1266 // to: O1 1267 // count: O2 treated as signed 1268 // 1269 address generate_disjoint_byte_copy(bool aligned, const char * name) { 1270 __ align(CodeEntryAlignment); 1271 StubCodeMark mark(this, "StubRoutines", name); 1272 address start = __ pc(); 1273 1274 Label L_skip_alignment, L_align; 1275 Label L_copy_byte, L_copy_byte_loop, L_exit; 1276 1277 const Register from = O0; // source array address 1278 const Register to = O1; // destination array address 1279 const Register count = O2; // elements count 1280 const Register offset = O5; // offset from start of arrays 1281 // O3, O4, G3, G4 are used as temp registers 1282 1283 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1284 1285 if (!aligned) disjoint_byte_copy_entry = __ pc(); 1286 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1287 if (!aligned) BLOCK_COMMENT("Entry:"); 1288 1289 // for short arrays, just do single element copy 1290 __ cmp(count, 23); // 16 + 7 1291 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1292 __ delayed()->mov(G0, offset); 1293 1294 if (aligned) { 1295 // 'aligned' == true when it is known statically during compilation 1296 // of this arraycopy call site that both 'from' and 'to' addresses 1297 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1298 // 1299 // Aligned arrays have 4 bytes alignment in 32-bits VM 1300 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM 1301 // 1302 #ifndef _LP64 1303 // copy a 4-bytes word if necessary to align 'to' to 8 bytes 1304 __ andcc(to, 7, G0); 1305 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment); 1306 __ delayed()->ld(from, 0, O3); 1307 __ inc(from, 4); 1308 __ inc(to, 4); 1309 __ dec(count, 4); 1310 __ st(O3, to, -4); 1311 __ BIND(L_skip_alignment); 1312 #endif 1313 } else { 1314 // copy bytes to align 'to' on 8 byte boundary 1315 __ andcc(to, 7, G1); // misaligned bytes 1316 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1317 __ delayed()->neg(G1); 1318 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment 1319 __ sub(count, G1, count); 1320 __ BIND(L_align); 1321 __ ldub(from, 0, O3); 1322 __ deccc(G1); 1323 __ inc(from); 1324 __ stb(O3, to, 0); 1325 __ br(Assembler::notZero, false, Assembler::pt, L_align); 1326 __ delayed()->inc(to); 1327 __ BIND(L_skip_alignment); 1328 } 1329 #ifdef _LP64 1330 if (!aligned) 1331 #endif 1332 { 1333 // Copy with shift 16 bytes per iteration if arrays do not have 1334 // the same alignment mod 8, otherwise fall through to the next 1335 // code for aligned copy. 1336 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. 1337 // Also jump over aligned copy after the copy with shift completed. 1338 1339 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte); 1340 } 1341 1342 // Both array are 8 bytes aligned, copy 16 bytes at a time 1343 __ and3(count, 7, G4); // Save count 1344 __ srl(count, 3, count); 1345 generate_disjoint_long_copy_core(aligned); 1346 __ mov(G4, count); // Restore count 1347 1348 // copy tailing bytes 1349 __ BIND(L_copy_byte); 1350 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1351 __ delayed()->nop(); 1352 __ align(16); 1353 __ BIND(L_copy_byte_loop); 1354 __ ldub(from, offset, O3); 1355 __ deccc(count); 1356 __ stb(O3, to, offset); 1357 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); 1358 __ delayed()->inc(offset); 1359 1360 __ BIND(L_exit); 1361 // O3, O4 are used as temp registers 1362 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); 1363 __ retl(); 1364 __ delayed()->mov(G0, O0); // return 0 1365 return start; 1366 } 1367 1368 // 1369 // Generate stub for conjoint byte copy. If "aligned" is true, the 1370 // "from" and "to" addresses are assumed to be heapword aligned. 1371 // 1372 // Arguments for generated stub: 1373 // from: O0 1374 // to: O1 1375 // count: O2 treated as signed 1376 // 1377 address generate_conjoint_byte_copy(bool aligned, const char * name) { 1378 // Do reverse copy. 1379 1380 __ align(CodeEntryAlignment); 1381 StubCodeMark mark(this, "StubRoutines", name); 1382 address start = __ pc(); 1383 address nooverlap_target = aligned ? 1384 StubRoutines::arrayof_jbyte_disjoint_arraycopy() : 1385 disjoint_byte_copy_entry; 1386 1387 Label L_skip_alignment, L_align, L_aligned_copy; 1388 Label L_copy_byte, L_copy_byte_loop, L_exit; 1389 1390 const Register from = O0; // source array address 1391 const Register to = O1; // destination array address 1392 const Register count = O2; // elements count 1393 const Register end_from = from; // source array end address 1394 const Register end_to = to; // destination array end address 1395 1396 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1397 1398 if (!aligned) byte_copy_entry = __ pc(); 1399 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1400 if (!aligned) BLOCK_COMMENT("Entry:"); 1401 1402 array_overlap_test(nooverlap_target, 0); 1403 1404 __ add(to, count, end_to); // offset after last copied element 1405 1406 // for short arrays, just do single element copy 1407 __ cmp(count, 23); // 16 + 7 1408 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1409 __ delayed()->add(from, count, end_from); 1410 1411 { 1412 // Align end of arrays since they could be not aligned even 1413 // when arrays itself are aligned. 1414 1415 // copy bytes to align 'end_to' on 8 byte boundary 1416 __ andcc(end_to, 7, G1); // misaligned bytes 1417 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1418 __ delayed()->nop(); 1419 __ sub(count, G1, count); 1420 __ BIND(L_align); 1421 __ dec(end_from); 1422 __ dec(end_to); 1423 __ ldub(end_from, 0, O3); 1424 __ deccc(G1); 1425 __ brx(Assembler::notZero, false, Assembler::pt, L_align); 1426 __ delayed()->stb(O3, end_to, 0); 1427 __ BIND(L_skip_alignment); 1428 } 1429 #ifdef _LP64 1430 if (aligned) { 1431 // Both arrays are aligned to 8-bytes in 64-bits VM. 1432 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1433 // in unaligned case. 1434 __ dec(count, 16); 1435 } else 1436 #endif 1437 { 1438 // Copy with shift 16 bytes per iteration if arrays do not have 1439 // the same alignment mod 8, otherwise jump to the next 1440 // code for aligned copy (and substracting 16 from 'count' before jump). 1441 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1442 // Also jump over aligned copy after the copy with shift completed. 1443 1444 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, 1445 L_aligned_copy, L_copy_byte); 1446 } 1447 // copy 4 elements (16 bytes) at a time 1448 __ align(16); 1449 __ BIND(L_aligned_copy); 1450 __ dec(end_from, 16); 1451 __ ldx(end_from, 8, O3); 1452 __ ldx(end_from, 0, O4); 1453 __ dec(end_to, 16); 1454 __ deccc(count, 16); 1455 __ stx(O3, end_to, 8); 1456 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1457 __ delayed()->stx(O4, end_to, 0); 1458 __ inc(count, 16); 1459 1460 // copy 1 element (2 bytes) at a time 1461 __ BIND(L_copy_byte); 1462 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1463 __ delayed()->nop(); 1464 __ align(16); 1465 __ BIND(L_copy_byte_loop); 1466 __ dec(end_from); 1467 __ dec(end_to); 1468 __ ldub(end_from, 0, O4); 1469 __ deccc(count); 1470 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); 1471 __ delayed()->stb(O4, end_to, 0); 1472 1473 __ BIND(L_exit); 1474 // O3, O4 are used as temp registers 1475 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); 1476 __ retl(); 1477 __ delayed()->mov(G0, O0); // return 0 1478 return start; 1479 } 1480 1481 // 1482 // Generate stub for disjoint short copy. If "aligned" is true, the 1483 // "from" and "to" addresses are assumed to be heapword aligned. 1484 // 1485 // Arguments for generated stub: 1486 // from: O0 1487 // to: O1 1488 // count: O2 treated as signed 1489 // 1490 address generate_disjoint_short_copy(bool aligned, const char * name) { 1491 __ align(CodeEntryAlignment); 1492 StubCodeMark mark(this, "StubRoutines", name); 1493 address start = __ pc(); 1494 1495 Label L_skip_alignment, L_skip_alignment2; 1496 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; 1497 1498 const Register from = O0; // source array address 1499 const Register to = O1; // destination array address 1500 const Register count = O2; // elements count 1501 const Register offset = O5; // offset from start of arrays 1502 // O3, O4, G3, G4 are used as temp registers 1503 1504 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1505 1506 if (!aligned) disjoint_short_copy_entry = __ pc(); 1507 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1508 if (!aligned) BLOCK_COMMENT("Entry:"); 1509 1510 // for short arrays, just do single element copy 1511 __ cmp(count, 11); // 8 + 3 (22 bytes) 1512 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1513 __ delayed()->mov(G0, offset); 1514 1515 if (aligned) { 1516 // 'aligned' == true when it is known statically during compilation 1517 // of this arraycopy call site that both 'from' and 'to' addresses 1518 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1519 // 1520 // Aligned arrays have 4 bytes alignment in 32-bits VM 1521 // and 8 bytes - in 64-bits VM. 1522 // 1523 #ifndef _LP64 1524 // copy a 2-elements word if necessary to align 'to' to 8 bytes 1525 __ andcc(to, 7, G0); 1526 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1527 __ delayed()->ld(from, 0, O3); 1528 __ inc(from, 4); 1529 __ inc(to, 4); 1530 __ dec(count, 2); 1531 __ st(O3, to, -4); 1532 __ BIND(L_skip_alignment); 1533 #endif 1534 } else { 1535 // copy 1 element if necessary to align 'to' on an 4 bytes 1536 __ andcc(to, 3, G0); 1537 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1538 __ delayed()->lduh(from, 0, O3); 1539 __ inc(from, 2); 1540 __ inc(to, 2); 1541 __ dec(count); 1542 __ sth(O3, to, -2); 1543 __ BIND(L_skip_alignment); 1544 1545 // copy 2 elements to align 'to' on an 8 byte boundary 1546 __ andcc(to, 7, G0); 1547 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1548 __ delayed()->lduh(from, 0, O3); 1549 __ dec(count, 2); 1550 __ lduh(from, 2, O4); 1551 __ inc(from, 4); 1552 __ inc(to, 4); 1553 __ sth(O3, to, -4); 1554 __ sth(O4, to, -2); 1555 __ BIND(L_skip_alignment2); 1556 } 1557 #ifdef _LP64 1558 if (!aligned) 1559 #endif 1560 { 1561 // Copy with shift 16 bytes per iteration if arrays do not have 1562 // the same alignment mod 8, otherwise fall through to the next 1563 // code for aligned copy. 1564 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1565 // Also jump over aligned copy after the copy with shift completed. 1566 1567 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes); 1568 } 1569 1570 // Both array are 8 bytes aligned, copy 16 bytes at a time 1571 __ and3(count, 3, G4); // Save 1572 __ srl(count, 2, count); 1573 generate_disjoint_long_copy_core(aligned); 1574 __ mov(G4, count); // restore 1575 1576 // copy 1 element at a time 1577 __ BIND(L_copy_2_bytes); 1578 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1579 __ delayed()->nop(); 1580 __ align(16); 1581 __ BIND(L_copy_2_bytes_loop); 1582 __ lduh(from, offset, O3); 1583 __ deccc(count); 1584 __ sth(O3, to, offset); 1585 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); 1586 __ delayed()->inc(offset, 2); 1587 1588 __ BIND(L_exit); 1589 // O3, O4 are used as temp registers 1590 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); 1591 __ retl(); 1592 __ delayed()->mov(G0, O0); // return 0 1593 return start; 1594 } 1595 1596 // 1597 // Generate stub for conjoint short copy. If "aligned" is true, the 1598 // "from" and "to" addresses are assumed to be heapword aligned. 1599 // 1600 // Arguments for generated stub: 1601 // from: O0 1602 // to: O1 1603 // count: O2 treated as signed 1604 // 1605 address generate_conjoint_short_copy(bool aligned, const char * name) { 1606 // Do reverse copy. 1607 1608 __ align(CodeEntryAlignment); 1609 StubCodeMark mark(this, "StubRoutines", name); 1610 address start = __ pc(); 1611 address nooverlap_target = aligned ? 1612 StubRoutines::arrayof_jshort_disjoint_arraycopy() : 1613 disjoint_short_copy_entry; 1614 1615 Label L_skip_alignment, L_skip_alignment2, L_aligned_copy; 1616 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; 1617 1618 const Register from = O0; // source array address 1619 const Register to = O1; // destination array address 1620 const Register count = O2; // elements count 1621 const Register end_from = from; // source array end address 1622 const Register end_to = to; // destination array end address 1623 1624 const Register byte_count = O3; // bytes count to copy 1625 1626 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1627 1628 if (!aligned) short_copy_entry = __ pc(); 1629 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1630 if (!aligned) BLOCK_COMMENT("Entry:"); 1631 1632 array_overlap_test(nooverlap_target, 1); 1633 1634 __ sllx(count, LogBytesPerShort, byte_count); 1635 __ add(to, byte_count, end_to); // offset after last copied element 1636 1637 // for short arrays, just do single element copy 1638 __ cmp(count, 11); // 8 + 3 (22 bytes) 1639 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1640 __ delayed()->add(from, byte_count, end_from); 1641 1642 { 1643 // Align end of arrays since they could be not aligned even 1644 // when arrays itself are aligned. 1645 1646 // copy 1 element if necessary to align 'end_to' on an 4 bytes 1647 __ andcc(end_to, 3, G0); 1648 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1649 __ delayed()->lduh(end_from, -2, O3); 1650 __ dec(end_from, 2); 1651 __ dec(end_to, 2); 1652 __ dec(count); 1653 __ sth(O3, end_to, 0); 1654 __ BIND(L_skip_alignment); 1655 1656 // copy 2 elements to align 'end_to' on an 8 byte boundary 1657 __ andcc(end_to, 7, G0); 1658 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1659 __ delayed()->lduh(end_from, -2, O3); 1660 __ dec(count, 2); 1661 __ lduh(end_from, -4, O4); 1662 __ dec(end_from, 4); 1663 __ dec(end_to, 4); 1664 __ sth(O3, end_to, 2); 1665 __ sth(O4, end_to, 0); 1666 __ BIND(L_skip_alignment2); 1667 } 1668 #ifdef _LP64 1669 if (aligned) { 1670 // Both arrays are aligned to 8-bytes in 64-bits VM. 1671 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1672 // in unaligned case. 1673 __ dec(count, 8); 1674 } else 1675 #endif 1676 { 1677 // Copy with shift 16 bytes per iteration if arrays do not have 1678 // the same alignment mod 8, otherwise jump to the next 1679 // code for aligned copy (and substracting 8 from 'count' before jump). 1680 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1681 // Also jump over aligned copy after the copy with shift completed. 1682 1683 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, 1684 L_aligned_copy, L_copy_2_bytes); 1685 } 1686 // copy 4 elements (16 bytes) at a time 1687 __ align(16); 1688 __ BIND(L_aligned_copy); 1689 __ dec(end_from, 16); 1690 __ ldx(end_from, 8, O3); 1691 __ ldx(end_from, 0, O4); 1692 __ dec(end_to, 16); 1693 __ deccc(count, 8); 1694 __ stx(O3, end_to, 8); 1695 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1696 __ delayed()->stx(O4, end_to, 0); 1697 __ inc(count, 8); 1698 1699 // copy 1 element (2 bytes) at a time 1700 __ BIND(L_copy_2_bytes); 1701 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1702 __ delayed()->nop(); 1703 __ BIND(L_copy_2_bytes_loop); 1704 __ dec(end_from, 2); 1705 __ dec(end_to, 2); 1706 __ lduh(end_from, 0, O4); 1707 __ deccc(count); 1708 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); 1709 __ delayed()->sth(O4, end_to, 0); 1710 1711 __ BIND(L_exit); 1712 // O3, O4 are used as temp registers 1713 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); 1714 __ retl(); 1715 __ delayed()->mov(G0, O0); // return 0 1716 return start; 1717 } 1718 1719 // 1720 // Generate core code for disjoint int copy (and oop copy on 32-bit). 1721 // If "aligned" is true, the "from" and "to" addresses are assumed 1722 // to be heapword aligned. 1723 // 1724 // Arguments: 1725 // from: O0 1726 // to: O1 1727 // count: O2 treated as signed 1728 // 1729 void generate_disjoint_int_copy_core(bool aligned) { 1730 1731 Label L_skip_alignment, L_aligned_copy; 1732 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 1733 1734 const Register from = O0; // source array address 1735 const Register to = O1; // destination array address 1736 const Register count = O2; // elements count 1737 const Register offset = O5; // offset from start of arrays 1738 // O3, O4, G3, G4 are used as temp registers 1739 1740 // 'aligned' == true when it is known statically during compilation 1741 // of this arraycopy call site that both 'from' and 'to' addresses 1742 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1743 // 1744 // Aligned arrays have 4 bytes alignment in 32-bits VM 1745 // and 8 bytes - in 64-bits VM. 1746 // 1747 #ifdef _LP64 1748 if (!aligned) 1749 #endif 1750 { 1751 // The next check could be put under 'ifndef' since the code in 1752 // generate_disjoint_long_copy_core() has own checks and set 'offset'. 1753 1754 // for short arrays, just do single element copy 1755 __ cmp(count, 5); // 4 + 1 (20 bytes) 1756 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 1757 __ delayed()->mov(G0, offset); 1758 1759 // copy 1 element to align 'to' on an 8 byte boundary 1760 __ andcc(to, 7, G0); 1761 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1762 __ delayed()->ld(from, 0, O3); 1763 __ inc(from, 4); 1764 __ inc(to, 4); 1765 __ dec(count); 1766 __ st(O3, to, -4); 1767 __ BIND(L_skip_alignment); 1768 1769 // if arrays have same alignment mod 8, do 4 elements copy 1770 __ andcc(from, 7, G0); 1771 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1772 __ delayed()->ld(from, 0, O3); 1773 1774 // 1775 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1776 // to form 2 aligned 8-bytes chunks to store. 1777 // 1778 // copy_16_bytes_forward_with_shift() is not used here since this 1779 // code is more optimal. 1780 1781 // copy with shift 4 elements (16 bytes) at a time 1782 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 1783 1784 __ align(16); 1785 __ BIND(L_copy_16_bytes); 1786 __ ldx(from, 4, O4); 1787 __ deccc(count, 4); // Can we do next iteration after this one? 1788 __ ldx(from, 12, G4); 1789 __ inc(to, 16); 1790 __ inc(from, 16); 1791 __ sllx(O3, 32, O3); 1792 __ srlx(O4, 32, G3); 1793 __ bset(G3, O3); 1794 __ stx(O3, to, -16); 1795 __ sllx(O4, 32, O4); 1796 __ srlx(G4, 32, G3); 1797 __ bset(G3, O4); 1798 __ stx(O4, to, -8); 1799 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 1800 __ delayed()->mov(G4, O3); 1801 1802 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 1803 __ delayed()->inc(count, 4); // restore 'count' 1804 1805 __ BIND(L_aligned_copy); 1806 } 1807 // copy 4 elements (16 bytes) at a time 1808 __ and3(count, 1, G4); // Save 1809 __ srl(count, 1, count); 1810 generate_disjoint_long_copy_core(aligned); 1811 __ mov(G4, count); // Restore 1812 1813 // copy 1 element at a time 1814 __ BIND(L_copy_4_bytes); 1815 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1816 __ delayed()->nop(); 1817 __ BIND(L_copy_4_bytes_loop); 1818 __ ld(from, offset, O3); 1819 __ deccc(count); 1820 __ st(O3, to, offset); 1821 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop); 1822 __ delayed()->inc(offset, 4); 1823 __ BIND(L_exit); 1824 } 1825 1826 // 1827 // Generate stub for disjoint int copy. If "aligned" is true, the 1828 // "from" and "to" addresses are assumed to be heapword aligned. 1829 // 1830 // Arguments for generated stub: 1831 // from: O0 1832 // to: O1 1833 // count: O2 treated as signed 1834 // 1835 address generate_disjoint_int_copy(bool aligned, const char * name) { 1836 __ align(CodeEntryAlignment); 1837 StubCodeMark mark(this, "StubRoutines", name); 1838 address start = __ pc(); 1839 1840 const Register count = O2; 1841 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1842 1843 if (!aligned) disjoint_int_copy_entry = __ pc(); 1844 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1845 if (!aligned) BLOCK_COMMENT("Entry:"); 1846 1847 generate_disjoint_int_copy_core(aligned); 1848 1849 // O3, O4 are used as temp registers 1850 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 1851 __ retl(); 1852 __ delayed()->mov(G0, O0); // return 0 1853 return start; 1854 } 1855 1856 // 1857 // Generate core code for conjoint int copy (and oop copy on 32-bit). 1858 // If "aligned" is true, the "from" and "to" addresses are assumed 1859 // to be heapword aligned. 1860 // 1861 // Arguments: 1862 // from: O0 1863 // to: O1 1864 // count: O2 treated as signed 1865 // 1866 void generate_conjoint_int_copy_core(bool aligned) { 1867 // Do reverse copy. 1868 1869 Label L_skip_alignment, L_aligned_copy; 1870 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 1871 1872 const Register from = O0; // source array address 1873 const Register to = O1; // destination array address 1874 const Register count = O2; // elements count 1875 const Register end_from = from; // source array end address 1876 const Register end_to = to; // destination array end address 1877 // O3, O4, O5, G3 are used as temp registers 1878 1879 const Register byte_count = O3; // bytes count to copy 1880 1881 __ sllx(count, LogBytesPerInt, byte_count); 1882 __ add(to, byte_count, end_to); // offset after last copied element 1883 1884 __ cmp(count, 5); // for short arrays, just do single element copy 1885 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 1886 __ delayed()->add(from, byte_count, end_from); 1887 1888 // copy 1 element to align 'to' on an 8 byte boundary 1889 __ andcc(end_to, 7, G0); 1890 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1891 __ delayed()->nop(); 1892 __ dec(count); 1893 __ dec(end_from, 4); 1894 __ dec(end_to, 4); 1895 __ ld(end_from, 0, O4); 1896 __ st(O4, end_to, 0); 1897 __ BIND(L_skip_alignment); 1898 1899 // Check if 'end_from' and 'end_to' has the same alignment. 1900 __ andcc(end_from, 7, G0); 1901 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1902 __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4 1903 1904 // copy with shift 4 elements (16 bytes) at a time 1905 // 1906 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1907 // to form 2 aligned 8-bytes chunks to store. 1908 // 1909 __ ldx(end_from, -4, O3); 1910 __ align(16); 1911 __ BIND(L_copy_16_bytes); 1912 __ ldx(end_from, -12, O4); 1913 __ deccc(count, 4); 1914 __ ldx(end_from, -20, O5); 1915 __ dec(end_to, 16); 1916 __ dec(end_from, 16); 1917 __ srlx(O3, 32, O3); 1918 __ sllx(O4, 32, G3); 1919 __ bset(G3, O3); 1920 __ stx(O3, end_to, 8); 1921 __ srlx(O4, 32, O4); 1922 __ sllx(O5, 32, G3); 1923 __ bset(O4, G3); 1924 __ stx(G3, end_to, 0); 1925 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 1926 __ delayed()->mov(O5, O3); 1927 1928 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 1929 __ delayed()->inc(count, 4); 1930 1931 // copy 4 elements (16 bytes) at a time 1932 __ align(16); 1933 __ BIND(L_aligned_copy); 1934 __ dec(end_from, 16); 1935 __ ldx(end_from, 8, O3); 1936 __ ldx(end_from, 0, O4); 1937 __ dec(end_to, 16); 1938 __ deccc(count, 4); 1939 __ stx(O3, end_to, 8); 1940 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1941 __ delayed()->stx(O4, end_to, 0); 1942 __ inc(count, 4); 1943 1944 // copy 1 element (4 bytes) at a time 1945 __ BIND(L_copy_4_bytes); 1946 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1947 __ delayed()->nop(); 1948 __ BIND(L_copy_4_bytes_loop); 1949 __ dec(end_from, 4); 1950 __ dec(end_to, 4); 1951 __ ld(end_from, 0, O4); 1952 __ deccc(count); 1953 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop); 1954 __ delayed()->st(O4, end_to, 0); 1955 __ BIND(L_exit); 1956 } 1957 1958 // 1959 // Generate stub for conjoint int copy. If "aligned" is true, the 1960 // "from" and "to" addresses are assumed to be heapword aligned. 1961 // 1962 // Arguments for generated stub: 1963 // from: O0 1964 // to: O1 1965 // count: O2 treated as signed 1966 // 1967 address generate_conjoint_int_copy(bool aligned, const char * name) { 1968 __ align(CodeEntryAlignment); 1969 StubCodeMark mark(this, "StubRoutines", name); 1970 address start = __ pc(); 1971 1972 address nooverlap_target = aligned ? 1973 StubRoutines::arrayof_jint_disjoint_arraycopy() : 1974 disjoint_int_copy_entry; 1975 1976 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 1977 1978 if (!aligned) int_copy_entry = __ pc(); 1979 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1980 if (!aligned) BLOCK_COMMENT("Entry:"); 1981 1982 array_overlap_test(nooverlap_target, 2); 1983 1984 generate_conjoint_int_copy_core(aligned); 1985 1986 // O3, O4 are used as temp registers 1987 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 1988 __ retl(); 1989 __ delayed()->mov(G0, O0); // return 0 1990 return start; 1991 } 1992 1993 // 1994 // Generate core code for disjoint long copy (and oop copy on 64-bit). 1995 // "aligned" is ignored, because we must make the stronger 1996 // assumption that both addresses are always 64-bit aligned. 1997 // 1998 // Arguments: 1999 // from: O0 2000 // to: O1 2001 // count: O2 treated as signed 2002 // 2003 void generate_disjoint_long_copy_core(bool aligned) { 2004 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; 2005 const Register from = O0; // source array address 2006 const Register to = O1; // destination array address 2007 const Register count = O2; // elements count 2008 const Register offset0 = O4; // element offset 2009 const Register offset8 = O5; // next element offset 2010 2011 __ deccc(count, 2); 2012 __ mov(G0, offset0); // offset from start of arrays (0) 2013 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2014 __ delayed()->add(offset0, 8, offset8); 2015 __ align(16); 2016 __ BIND(L_copy_16_bytes); 2017 __ ldx(from, offset0, O3); 2018 __ ldx(from, offset8, G3); 2019 __ deccc(count, 2); 2020 __ stx(O3, to, offset0); 2021 __ inc(offset0, 16); 2022 __ stx(G3, to, offset8); 2023 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 2024 __ delayed()->inc(offset8, 16); 2025 2026 __ BIND(L_copy_8_bytes); 2027 __ inccc(count, 2); 2028 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); 2029 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs 2030 __ ldx(from, offset0, O3); 2031 __ stx(O3, to, offset0); 2032 __ BIND(L_exit); 2033 } 2034 2035 // 2036 // Generate stub for disjoint long copy. 2037 // "aligned" is ignored, because we must make the stronger 2038 // assumption that both addresses are always 64-bit aligned. 2039 // 2040 // Arguments for generated stub: 2041 // from: O0 2042 // to: O1 2043 // count: O2 treated as signed 2044 // 2045 address generate_disjoint_long_copy(bool aligned, const char * name) { 2046 __ align(CodeEntryAlignment); 2047 StubCodeMark mark(this, "StubRoutines", name); 2048 address start = __ pc(); 2049 2050 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2051 2052 if (!aligned) disjoint_long_copy_entry = __ pc(); 2053 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2054 if (!aligned) BLOCK_COMMENT("Entry:"); 2055 2056 generate_disjoint_long_copy_core(aligned); 2057 2058 // O3, O4 are used as temp registers 2059 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); 2060 __ retl(); 2061 __ delayed()->mov(G0, O0); // return 0 2062 return start; 2063 } 2064 2065 // 2066 // Generate core code for conjoint long copy (and oop copy on 64-bit). 2067 // "aligned" is ignored, because we must make the stronger 2068 // assumption that both addresses are always 64-bit aligned. 2069 // 2070 // Arguments: 2071 // from: O0 2072 // to: O1 2073 // count: O2 treated as signed 2074 // 2075 void generate_conjoint_long_copy_core(bool aligned) { 2076 // Do reverse copy. 2077 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; 2078 const Register from = O0; // source array address 2079 const Register to = O1; // destination array address 2080 const Register count = O2; // elements count 2081 const Register offset8 = O4; // element offset 2082 const Register offset0 = O5; // previous element offset 2083 2084 __ subcc(count, 1, count); 2085 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes ); 2086 __ delayed()->sllx(count, LogBytesPerLong, offset8); 2087 __ sub(offset8, 8, offset0); 2088 __ align(16); 2089 __ BIND(L_copy_16_bytes); 2090 __ ldx(from, offset8, O2); 2091 __ ldx(from, offset0, O3); 2092 __ stx(O2, to, offset8); 2093 __ deccc(offset8, 16); // use offset8 as counter 2094 __ stx(O3, to, offset0); 2095 __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes); 2096 __ delayed()->dec(offset0, 16); 2097 2098 __ BIND(L_copy_8_bytes); 2099 __ brx(Assembler::negative, false, Assembler::pn, L_exit ); 2100 __ delayed()->nop(); 2101 __ ldx(from, 0, O3); 2102 __ stx(O3, to, 0); 2103 __ BIND(L_exit); 2104 } 2105 2106 // Generate stub for conjoint long copy. 2107 // "aligned" is ignored, because we must make the stronger 2108 // assumption that both addresses are always 64-bit aligned. 2109 // 2110 // Arguments for generated stub: 2111 // from: O0 2112 // to: O1 2113 // count: O2 treated as signed 2114 // 2115 address generate_conjoint_long_copy(bool aligned, const char * name) { 2116 __ align(CodeEntryAlignment); 2117 StubCodeMark mark(this, "StubRoutines", name); 2118 address start = __ pc(); 2119 2120 assert(!aligned, "usage"); 2121 address nooverlap_target = disjoint_long_copy_entry; 2122 2123 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2124 2125 if (!aligned) long_copy_entry = __ pc(); 2126 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2127 if (!aligned) BLOCK_COMMENT("Entry:"); 2128 2129 array_overlap_test(nooverlap_target, 3); 2130 2131 generate_conjoint_long_copy_core(aligned); 2132 2133 // O3, O4 are used as temp registers 2134 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); 2135 __ retl(); 2136 __ delayed()->mov(G0, O0); // return 0 2137 return start; 2138 } 2139 2140 // Generate stub for disjoint oop copy. If "aligned" is true, the 2141 // "from" and "to" addresses are assumed to be heapword aligned. 2142 // 2143 // Arguments for generated stub: 2144 // from: O0 2145 // to: O1 2146 // count: O2 treated as signed 2147 // 2148 address generate_disjoint_oop_copy(bool aligned, const char * name) { 2149 2150 const Register from = O0; // source array address 2151 const Register to = O1; // destination array address 2152 const Register count = O2; // elements count 2153 2154 __ align(CodeEntryAlignment); 2155 StubCodeMark mark(this, "StubRoutines", name); 2156 address start = __ pc(); 2157 2158 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2159 2160 if (!aligned) disjoint_oop_copy_entry = __ pc(); 2161 // caller can pass a 64-bit byte count here 2162 if (!aligned) BLOCK_COMMENT("Entry:"); 2163 2164 // save arguments for barrier generation 2165 __ mov(to, G1); 2166 __ mov(count, G5); 2167 gen_write_ref_array_pre_barrier(G1, G5); 2168 #ifdef _LP64 2169 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2170 if (UseCompressedOops) { 2171 generate_disjoint_int_copy_core(aligned); 2172 } else { 2173 generate_disjoint_long_copy_core(aligned); 2174 } 2175 #else 2176 generate_disjoint_int_copy_core(aligned); 2177 #endif 2178 // O0 is used as temp register 2179 gen_write_ref_array_post_barrier(G1, G5, O0); 2180 2181 // O3, O4 are used as temp registers 2182 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2183 __ retl(); 2184 __ delayed()->mov(G0, O0); // return 0 2185 return start; 2186 } 2187 2188 // Generate stub for conjoint oop copy. If "aligned" is true, the 2189 // "from" and "to" addresses are assumed to be heapword aligned. 2190 // 2191 // Arguments for generated stub: 2192 // from: O0 2193 // to: O1 2194 // count: O2 treated as signed 2195 // 2196 address generate_conjoint_oop_copy(bool aligned, const char * name) { 2197 2198 const Register from = O0; // source array address 2199 const Register to = O1; // destination array address 2200 const Register count = O2; // elements count 2201 2202 __ align(CodeEntryAlignment); 2203 StubCodeMark mark(this, "StubRoutines", name); 2204 address start = __ pc(); 2205 2206 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2207 2208 if (!aligned) oop_copy_entry = __ pc(); 2209 // caller can pass a 64-bit byte count here 2210 if (!aligned) BLOCK_COMMENT("Entry:"); 2211 2212 // save arguments for barrier generation 2213 __ mov(to, G1); 2214 __ mov(count, G5); 2215 2216 gen_write_ref_array_pre_barrier(G1, G5); 2217 2218 address nooverlap_target = aligned ? 2219 StubRoutines::arrayof_oop_disjoint_arraycopy() : 2220 disjoint_oop_copy_entry; 2221 2222 array_overlap_test(nooverlap_target, LogBytesPerHeapOop); 2223 2224 #ifdef _LP64 2225 if (UseCompressedOops) { 2226 generate_conjoint_int_copy_core(aligned); 2227 } else { 2228 generate_conjoint_long_copy_core(aligned); 2229 } 2230 #else 2231 generate_conjoint_int_copy_core(aligned); 2232 #endif 2233 2234 // O0 is used as temp register 2235 gen_write_ref_array_post_barrier(G1, G5, O0); 2236 2237 // O3, O4 are used as temp registers 2238 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2239 __ retl(); 2240 __ delayed()->mov(G0, O0); // return 0 2241 return start; 2242 } 2243 2244 2245 // Helper for generating a dynamic type check. 2246 // Smashes only the given temp registers. 2247 void generate_type_check(Register sub_klass, 2248 Register super_check_offset, 2249 Register super_klass, 2250 Register temp, 2251 Label& L_success) { 2252 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); 2253 2254 BLOCK_COMMENT("type_check:"); 2255 2256 Label L_miss, L_pop_to_miss; 2257 2258 assert_clean_int(super_check_offset, temp); 2259 2260 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, 2261 &L_success, &L_miss, NULL, 2262 super_check_offset); 2263 2264 BLOCK_COMMENT("type_check_slow_path:"); 2265 __ save_frame(0); 2266 __ check_klass_subtype_slow_path(sub_klass->after_save(), 2267 super_klass->after_save(), 2268 L0, L1, L2, L4, 2269 NULL, &L_pop_to_miss); 2270 __ ba(false, L_success); 2271 __ delayed()->restore(); 2272 2273 __ bind(L_pop_to_miss); 2274 __ restore(); 2275 2276 // Fall through on failure! 2277 __ BIND(L_miss); 2278 } 2279 2280 2281 // Generate stub for checked oop copy. 2282 // 2283 // Arguments for generated stub: 2284 // from: O0 2285 // to: O1 2286 // count: O2 treated as signed 2287 // ckoff: O3 (super_check_offset) 2288 // ckval: O4 (super_klass) 2289 // ret: O0 zero for success; (-1^K) where K is partial transfer count 2290 // 2291 address generate_checkcast_copy(const char* name) { 2292 2293 const Register O0_from = O0; // source array address 2294 const Register O1_to = O1; // destination array address 2295 const Register O2_count = O2; // elements count 2296 const Register O3_ckoff = O3; // super_check_offset 2297 const Register O4_ckval = O4; // super_klass 2298 2299 const Register O5_offset = O5; // loop var, with stride wordSize 2300 const Register G1_remain = G1; // loop var, with stride -1 2301 const Register G3_oop = G3; // actual oop copied 2302 const Register G4_klass = G4; // oop._klass 2303 const Register G5_super = G5; // oop._klass._primary_supers[ckval] 2304 2305 __ align(CodeEntryAlignment); 2306 StubCodeMark mark(this, "StubRoutines", name); 2307 address start = __ pc(); 2308 2309 gen_write_ref_array_pre_barrier(O1, O2); 2310 2311 #ifdef ASSERT 2312 // We sometimes save a frame (see generate_type_check below). 2313 // If this will cause trouble, let's fail now instead of later. 2314 __ save_frame(0); 2315 __ restore(); 2316 #endif 2317 2318 #ifdef ASSERT 2319 // caller guarantees that the arrays really are different 2320 // otherwise, we would have to make conjoint checks 2321 { Label L; 2322 __ mov(O3, G1); // spill: overlap test smashes O3 2323 __ mov(O4, G4); // spill: overlap test smashes O4 2324 array_overlap_test(L, LogBytesPerHeapOop); 2325 __ stop("checkcast_copy within a single array"); 2326 __ bind(L); 2327 __ mov(G1, O3); 2328 __ mov(G4, O4); 2329 } 2330 #endif //ASSERT 2331 2332 assert_clean_int(O2_count, G1); // Make sure 'count' is clean int. 2333 2334 checkcast_copy_entry = __ pc(); 2335 // caller can pass a 64-bit byte count here (from generic stub) 2336 BLOCK_COMMENT("Entry:"); 2337 2338 Label load_element, store_element, do_card_marks, fail, done; 2339 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it 2340 __ brx(Assembler::notZero, false, Assembler::pt, load_element); 2341 __ delayed()->mov(G0, O5_offset); // offset from start of arrays 2342 2343 // Empty array: Nothing to do. 2344 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 2345 __ retl(); 2346 __ delayed()->set(0, O0); // return 0 on (trivial) success 2347 2348 // ======== begin loop ======== 2349 // (Loop is rotated; its entry is load_element.) 2350 // Loop variables: 2351 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays 2352 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* 2353 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super 2354 __ align(16); 2355 2356 __ BIND(store_element); 2357 __ deccc(G1_remain); // decrement the count 2358 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop 2359 __ inc(O5_offset, heapOopSize); // step to next offset 2360 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); 2361 __ delayed()->set(0, O0); // return -1 on success 2362 2363 // ======== loop entry is here ======== 2364 __ BIND(load_element); 2365 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop 2366 __ br_null(G3_oop, true, Assembler::pt, store_element); 2367 __ delayed()->nop(); 2368 2369 __ load_klass(G3_oop, G4_klass); // query the object klass 2370 2371 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, 2372 // branch to this on success: 2373 store_element); 2374 // ======== end loop ======== 2375 2376 // It was a real error; we must depend on the caller to finish the job. 2377 // Register G1 has number of *remaining* oops, O2 number of *total* oops. 2378 // Emit GC store barriers for the oops we have copied (O2 minus G1), 2379 // and report their number to the caller. 2380 __ BIND(fail); 2381 __ subcc(O2_count, G1_remain, O2_count); 2382 __ brx(Assembler::zero, false, Assembler::pt, done); 2383 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller 2384 2385 __ BIND(do_card_marks); 2386 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] 2387 2388 __ BIND(done); 2389 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 2390 __ retl(); 2391 __ delayed()->nop(); // return value in 00 2392 2393 return start; 2394 } 2395 2396 2397 // Generate 'unsafe' array copy stub 2398 // Though just as safe as the other stubs, it takes an unscaled 2399 // size_t argument instead of an element count. 2400 // 2401 // Arguments for generated stub: 2402 // from: O0 2403 // to: O1 2404 // count: O2 byte count, treated as ssize_t, can be zero 2405 // 2406 // Examines the alignment of the operands and dispatches 2407 // to a long, int, short, or byte copy loop. 2408 // 2409 address generate_unsafe_copy(const char* name) { 2410 2411 const Register O0_from = O0; // source array address 2412 const Register O1_to = O1; // destination array address 2413 const Register O2_count = O2; // elements count 2414 2415 const Register G1_bits = G1; // test copy of low bits 2416 2417 __ align(CodeEntryAlignment); 2418 StubCodeMark mark(this, "StubRoutines", name); 2419 address start = __ pc(); 2420 2421 // bump this on entry, not on exit: 2422 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3); 2423 2424 __ or3(O0_from, O1_to, G1_bits); 2425 __ or3(O2_count, G1_bits, G1_bits); 2426 2427 __ btst(BytesPerLong-1, G1_bits); 2428 __ br(Assembler::zero, true, Assembler::pt, 2429 long_copy_entry, relocInfo::runtime_call_type); 2430 // scale the count on the way out: 2431 __ delayed()->srax(O2_count, LogBytesPerLong, O2_count); 2432 2433 __ btst(BytesPerInt-1, G1_bits); 2434 __ br(Assembler::zero, true, Assembler::pt, 2435 int_copy_entry, relocInfo::runtime_call_type); 2436 // scale the count on the way out: 2437 __ delayed()->srax(O2_count, LogBytesPerInt, O2_count); 2438 2439 __ btst(BytesPerShort-1, G1_bits); 2440 __ br(Assembler::zero, true, Assembler::pt, 2441 short_copy_entry, relocInfo::runtime_call_type); 2442 // scale the count on the way out: 2443 __ delayed()->srax(O2_count, LogBytesPerShort, O2_count); 2444 2445 __ br(Assembler::always, false, Assembler::pt, 2446 byte_copy_entry, relocInfo::runtime_call_type); 2447 __ delayed()->nop(); 2448 2449 return start; 2450 } 2451 2452 2453 // Perform range checks on the proposed arraycopy. 2454 // Kills the two temps, but nothing else. 2455 // Also, clean the sign bits of src_pos and dst_pos. 2456 void arraycopy_range_checks(Register src, // source array oop (O0) 2457 Register src_pos, // source position (O1) 2458 Register dst, // destination array oo (O2) 2459 Register dst_pos, // destination position (O3) 2460 Register length, // length of copy (O4) 2461 Register temp1, Register temp2, 2462 Label& L_failed) { 2463 BLOCK_COMMENT("arraycopy_range_checks:"); 2464 2465 // if (src_pos + length > arrayOop(src)->length() ) FAIL; 2466 2467 const Register array_length = temp1; // scratch 2468 const Register end_pos = temp2; // scratch 2469 2470 // Note: This next instruction may be in the delay slot of a branch: 2471 __ add(length, src_pos, end_pos); // src_pos + length 2472 __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length); 2473 __ cmp(end_pos, array_length); 2474 __ br(Assembler::greater, false, Assembler::pn, L_failed); 2475 2476 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL; 2477 __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length 2478 __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length); 2479 __ cmp(end_pos, array_length); 2480 __ br(Assembler::greater, false, Assembler::pn, L_failed); 2481 2482 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. 2483 // Move with sign extension can be used since they are positive. 2484 __ delayed()->signx(src_pos, src_pos); 2485 __ signx(dst_pos, dst_pos); 2486 2487 BLOCK_COMMENT("arraycopy_range_checks done"); 2488 } 2489 2490 2491 // 2492 // Generate generic array copy stubs 2493 // 2494 // Input: 2495 // O0 - src oop 2496 // O1 - src_pos 2497 // O2 - dst oop 2498 // O3 - dst_pos 2499 // O4 - element count 2500 // 2501 // Output: 2502 // O0 == 0 - success 2503 // O0 == -1 - need to call System.arraycopy 2504 // 2505 address generate_generic_copy(const char *name) { 2506 2507 Label L_failed, L_objArray; 2508 2509 // Input registers 2510 const Register src = O0; // source array oop 2511 const Register src_pos = O1; // source position 2512 const Register dst = O2; // destination array oop 2513 const Register dst_pos = O3; // destination position 2514 const Register length = O4; // elements count 2515 2516 // registers used as temp 2517 const Register G3_src_klass = G3; // source array klass 2518 const Register G4_dst_klass = G4; // destination array klass 2519 const Register G5_lh = G5; // layout handler 2520 const Register O5_temp = O5; 2521 2522 __ align(CodeEntryAlignment); 2523 StubCodeMark mark(this, "StubRoutines", name); 2524 address start = __ pc(); 2525 2526 // bump this on entry, not on exit: 2527 inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3); 2528 2529 // In principle, the int arguments could be dirty. 2530 //assert_clean_int(src_pos, G1); 2531 //assert_clean_int(dst_pos, G1); 2532 //assert_clean_int(length, G1); 2533 2534 //----------------------------------------------------------------------- 2535 // Assembler stubs will be used for this call to arraycopy 2536 // if the following conditions are met: 2537 // 2538 // (1) src and dst must not be null. 2539 // (2) src_pos must not be negative. 2540 // (3) dst_pos must not be negative. 2541 // (4) length must not be negative. 2542 // (5) src klass and dst klass should be the same and not NULL. 2543 // (6) src and dst should be arrays. 2544 // (7) src_pos + length must not exceed length of src. 2545 // (8) dst_pos + length must not exceed length of dst. 2546 BLOCK_COMMENT("arraycopy initial argument checks"); 2547 2548 // if (src == NULL) return -1; 2549 __ br_null(src, false, Assembler::pn, L_failed); 2550 2551 // if (src_pos < 0) return -1; 2552 __ delayed()->tst(src_pos); 2553 __ br(Assembler::negative, false, Assembler::pn, L_failed); 2554 __ delayed()->nop(); 2555 2556 // if (dst == NULL) return -1; 2557 __ br_null(dst, false, Assembler::pn, L_failed); 2558 2559 // if (dst_pos < 0) return -1; 2560 __ delayed()->tst(dst_pos); 2561 __ br(Assembler::negative, false, Assembler::pn, L_failed); 2562 2563 // if (length < 0) return -1; 2564 __ delayed()->tst(length); 2565 __ br(Assembler::negative, false, Assembler::pn, L_failed); 2566 2567 BLOCK_COMMENT("arraycopy argument klass checks"); 2568 // get src->klass() 2569 if (UseCompressedOops) { 2570 __ delayed()->nop(); // ??? not good 2571 __ load_klass(src, G3_src_klass); 2572 } else { 2573 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass); 2574 } 2575 2576 #ifdef ASSERT 2577 // assert(src->klass() != NULL); 2578 BLOCK_COMMENT("assert klasses not null"); 2579 { Label L_a, L_b; 2580 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL 2581 __ delayed()->nop(); 2582 __ bind(L_a); 2583 __ stop("broken null klass"); 2584 __ bind(L_b); 2585 __ load_klass(dst, G4_dst_klass); 2586 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also 2587 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp 2588 BLOCK_COMMENT("assert done"); 2589 } 2590 #endif 2591 2592 // Load layout helper 2593 // 2594 // |array_tag| | header_size | element_type | |log2_element_size| 2595 // 32 30 24 16 8 2 0 2596 // 2597 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 2598 // 2599 2600 int lh_offset = klassOopDesc::header_size() * HeapWordSize + 2601 Klass::layout_helper_offset_in_bytes(); 2602 2603 // Load 32-bits signed value. Use br() instruction with it to check icc. 2604 __ lduw(G3_src_klass, lh_offset, G5_lh); 2605 2606 if (UseCompressedOops) { 2607 __ load_klass(dst, G4_dst_klass); 2608 } 2609 // Handle objArrays completely differently... 2610 juint objArray_lh = Klass::array_layout_helper(T_OBJECT); 2611 __ set(objArray_lh, O5_temp); 2612 __ cmp(G5_lh, O5_temp); 2613 __ br(Assembler::equal, false, Assembler::pt, L_objArray); 2614 if (UseCompressedOops) { 2615 __ delayed()->nop(); 2616 } else { 2617 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); 2618 } 2619 2620 // if (src->klass() != dst->klass()) return -1; 2621 __ cmp(G3_src_klass, G4_dst_klass); 2622 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed); 2623 __ delayed()->nop(); 2624 2625 // if (!src->is_Array()) return -1; 2626 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 2627 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed); 2628 2629 // At this point, it is known to be a typeArray (array_tag 0x3). 2630 #ifdef ASSERT 2631 __ delayed()->nop(); 2632 { Label L; 2633 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); 2634 __ set(lh_prim_tag_in_place, O5_temp); 2635 __ cmp(G5_lh, O5_temp); 2636 __ br(Assembler::greaterEqual, false, Assembler::pt, L); 2637 __ delayed()->nop(); 2638 __ stop("must be a primitive array"); 2639 __ bind(L); 2640 } 2641 #else 2642 __ delayed(); // match next insn to prev branch 2643 #endif 2644 2645 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 2646 O5_temp, G4_dst_klass, L_failed); 2647 2648 // typeArrayKlass 2649 // 2650 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); 2651 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); 2652 // 2653 2654 const Register G4_offset = G4_dst_klass; // array offset 2655 const Register G3_elsize = G3_src_klass; // log2 element size 2656 2657 __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset); 2658 __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset 2659 __ add(src, G4_offset, src); // src array offset 2660 __ add(dst, G4_offset, dst); // dst array offset 2661 __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size 2662 2663 // next registers should be set before the jump to corresponding stub 2664 const Register from = O0; // source array address 2665 const Register to = O1; // destination array address 2666 const Register count = O2; // elements count 2667 2668 // 'from', 'to', 'count' registers should be set in this order 2669 // since they are the same as 'src', 'src_pos', 'dst'. 2670 2671 BLOCK_COMMENT("scale indexes to element size"); 2672 __ sll_ptr(src_pos, G3_elsize, src_pos); 2673 __ sll_ptr(dst_pos, G3_elsize, dst_pos); 2674 __ add(src, src_pos, from); // src_addr 2675 __ add(dst, dst_pos, to); // dst_addr 2676 2677 BLOCK_COMMENT("choose copy loop based on element size"); 2678 __ cmp(G3_elsize, 0); 2679 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jbyte_arraycopy); 2680 __ delayed()->signx(length, count); // length 2681 2682 __ cmp(G3_elsize, LogBytesPerShort); 2683 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jshort_arraycopy); 2684 __ delayed()->signx(length, count); // length 2685 2686 __ cmp(G3_elsize, LogBytesPerInt); 2687 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jint_arraycopy); 2688 __ delayed()->signx(length, count); // length 2689 #ifdef ASSERT 2690 { Label L; 2691 __ cmp(G3_elsize, LogBytesPerLong); 2692 __ br(Assembler::equal, false, Assembler::pt, L); 2693 __ delayed()->nop(); 2694 __ stop("must be long copy, but elsize is wrong"); 2695 __ bind(L); 2696 } 2697 #endif 2698 __ br(Assembler::always,false,Assembler::pt,StubRoutines::_jlong_arraycopy); 2699 __ delayed()->signx(length, count); // length 2700 2701 // objArrayKlass 2702 __ BIND(L_objArray); 2703 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length 2704 2705 Label L_plain_copy, L_checkcast_copy; 2706 // test array classes for subtyping 2707 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality 2708 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy); 2709 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below 2710 2711 // Identically typed arrays can be copied without element-wise checks. 2712 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 2713 O5_temp, G5_lh, L_failed); 2714 2715 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 2716 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 2717 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); 2718 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); 2719 __ add(src, src_pos, from); // src_addr 2720 __ add(dst, dst_pos, to); // dst_addr 2721 __ BIND(L_plain_copy); 2722 __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy); 2723 __ delayed()->signx(length, count); // length 2724 2725 __ BIND(L_checkcast_copy); 2726 // live at this point: G3_src_klass, G4_dst_klass 2727 { 2728 // Before looking at dst.length, make sure dst is also an objArray. 2729 // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot 2730 __ cmp(G5_lh, O5_temp); 2731 __ br(Assembler::notEqual, false, Assembler::pn, L_failed); 2732 2733 // It is safe to examine both src.length and dst.length. 2734 __ delayed(); // match next insn to prev branch 2735 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 2736 O5_temp, G5_lh, L_failed); 2737 2738 // Marshal the base address arguments now, freeing registers. 2739 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 2740 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 2741 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); 2742 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); 2743 __ add(src, src_pos, from); // src_addr 2744 __ add(dst, dst_pos, to); // dst_addr 2745 __ signx(length, count); // length (reloaded) 2746 2747 Register sco_temp = O3; // this register is free now 2748 assert_different_registers(from, to, count, sco_temp, 2749 G4_dst_klass, G3_src_klass); 2750 2751 // Generate the type check. 2752 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 2753 Klass::super_check_offset_offset_in_bytes()); 2754 __ lduw(G4_dst_klass, sco_offset, sco_temp); 2755 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass, 2756 O5_temp, L_plain_copy); 2757 2758 // Fetch destination element klass from the objArrayKlass header. 2759 int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 2760 objArrayKlass::element_klass_offset_in_bytes()); 2761 2762 // the checkcast_copy loop needs two extra arguments: 2763 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass 2764 // lduw(O4, sco_offset, O3); // sco of elem klass 2765 2766 __ br(Assembler::always, false, Assembler::pt, checkcast_copy_entry); 2767 __ delayed()->lduw(O4, sco_offset, O3); 2768 } 2769 2770 __ BIND(L_failed); 2771 __ retl(); 2772 __ delayed()->sub(G0, 1, O0); // return -1 2773 return start; 2774 } 2775 2776 void generate_arraycopy_stubs() { 2777 2778 // Note: the disjoint stubs must be generated first, some of 2779 // the conjoint stubs use them. 2780 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); 2781 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); 2782 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy"); 2783 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); 2784 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy"); 2785 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy"); 2786 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); 2787 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy"); 2788 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy"); 2789 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy"); 2790 2791 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); 2792 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); 2793 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy"); 2794 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); 2795 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, "oop_arraycopy"); 2796 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy"); 2797 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); 2798 #ifdef _LP64 2799 // since sizeof(jint) < sizeof(HeapWord), there's a different flavor: 2800 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy"); 2801 #else 2802 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; 2803 #endif 2804 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; 2805 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; 2806 2807 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); 2808 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); 2809 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); 2810 } 2811 2812 void generate_initial() { 2813 // Generates all stubs and initializes the entry points 2814 2815 //------------------------------------------------------------------------------------------------------------------------ 2816 // entry points that exist in all platforms 2817 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than 2818 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. 2819 StubRoutines::_forward_exception_entry = generate_forward_exception(); 2820 2821 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 2822 StubRoutines::_catch_exception_entry = generate_catch_exception(); 2823 2824 //------------------------------------------------------------------------------------------------------------------------ 2825 // entry points that are platform specific 2826 StubRoutines::Sparc::_test_stop_entry = generate_test_stop(); 2827 2828 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine(); 2829 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows(); 2830 2831 #if !defined(COMPILER2) && !defined(_LP64) 2832 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); 2833 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); 2834 StubRoutines::_atomic_add_entry = generate_atomic_add(); 2835 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry; 2836 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry; 2837 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); 2838 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry; 2839 #endif // COMPILER2 !=> _LP64 2840 } 2841 2842 2843 void generate_all() { 2844 // Generates all stubs and initializes the entry points 2845 2846 // Generate partial_subtype_check first here since its code depends on 2847 // UseZeroBaseCompressedOops which is defined after heap initialization. 2848 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); 2849 // These entry points require SharedInfo::stack0 to be set up in non-core builds 2850 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); 2851 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); 2852 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); 2853 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); 2854 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); 2855 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); 2856 2857 StubRoutines::_handler_for_unsafe_access_entry = 2858 generate_handler_for_unsafe_access(); 2859 2860 // support for verify_oop (must happen after universe_init) 2861 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); 2862 2863 // arraycopy stubs used by compilers 2864 generate_arraycopy_stubs(); 2865 2866 // Don't initialize the platform math functions since sparc 2867 // doesn't have intrinsics for these operations. 2868 } 2869 2870 2871 public: 2872 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 2873 // replace the standard masm with a special one: 2874 _masm = new MacroAssembler(code); 2875 2876 _stub_count = !all ? 0x100 : 0x200; 2877 if (all) { 2878 generate_all(); 2879 } else { 2880 generate_initial(); 2881 } 2882 2883 // make sure this stub is available for all local calls 2884 if (_atomic_add_stub.is_unbound()) { 2885 // generate a second time, if necessary 2886 (void) generate_atomic_add(); 2887 } 2888 } 2889 2890 2891 private: 2892 int _stub_count; 2893 void stub_prolog(StubCodeDesc* cdesc) { 2894 # ifdef ASSERT 2895 // put extra information in the stub code, to make it more readable 2896 #ifdef _LP64 2897 // Write the high part of the address 2898 // [RGV] Check if there is a dependency on the size of this prolog 2899 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none); 2900 #endif 2901 __ emit_data((intptr_t)cdesc, relocInfo::none); 2902 __ emit_data(++_stub_count, relocInfo::none); 2903 # endif 2904 align(true); 2905 } 2906 2907 void align(bool at_header = false) { 2908 // %%%%% move this constant somewhere else 2909 // UltraSPARC cache line size is 8 instructions: 2910 const unsigned int icache_line_size = 32; 2911 const unsigned int icache_half_line_size = 16; 2912 2913 if (at_header) { 2914 while ((intptr_t)(__ pc()) % icache_line_size != 0) { 2915 __ emit_data(0, relocInfo::none); 2916 } 2917 } else { 2918 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { 2919 __ nop(); 2920 } 2921 } 2922 } 2923 2924 }; // end class declaration 2925 2926 2927 address StubGenerator::disjoint_byte_copy_entry = NULL; 2928 address StubGenerator::disjoint_short_copy_entry = NULL; 2929 address StubGenerator::disjoint_int_copy_entry = NULL; 2930 address StubGenerator::disjoint_long_copy_entry = NULL; 2931 address StubGenerator::disjoint_oop_copy_entry = NULL; 2932 2933 address StubGenerator::byte_copy_entry = NULL; 2934 address StubGenerator::short_copy_entry = NULL; 2935 address StubGenerator::int_copy_entry = NULL; 2936 address StubGenerator::long_copy_entry = NULL; 2937 address StubGenerator::oop_copy_entry = NULL; 2938 2939 address StubGenerator::checkcast_copy_entry = NULL; 2940 2941 void StubGenerator_generate(CodeBuffer* code, bool all) { 2942 StubGenerator g(code, all); 2943 }