1 /* 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "assembler_sparc.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "nativeInst_sparc.hpp" 30 #include "oops/instanceOop.hpp" 31 #include "oops/methodOop.hpp" 32 #include "oops/objArrayKlass.hpp" 33 #include "oops/oop.inline.hpp" 34 #include "prims/methodHandles.hpp" 35 #include "runtime/frame.inline.hpp" 36 #include "runtime/handles.inline.hpp" 37 #include "runtime/sharedRuntime.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/stubRoutines.hpp" 40 #include "utilities/top.hpp" 41 #ifdef TARGET_OS_FAMILY_linux 42 # include "thread_linux.inline.hpp" 43 #endif 44 #ifdef TARGET_OS_FAMILY_solaris 45 # include "thread_solaris.inline.hpp" 46 #endif 47 #ifdef COMPILER2 48 #include "opto/runtime.hpp" 49 #endif 50 51 // Declaration and definition of StubGenerator (no .hpp file). 52 // For a more detailed description of the stub routine structure 53 // see the comment in stubRoutines.hpp. 54 55 #define __ _masm-> 56 57 #ifdef PRODUCT 58 #define BLOCK_COMMENT(str) /* nothing */ 59 #else 60 #define BLOCK_COMMENT(str) __ block_comment(str) 61 #endif 62 63 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 64 65 // Note: The register L7 is used as L7_thread_cache, and may not be used 66 // any other way within this module. 67 68 69 static const Register& Lstub_temp = L2; 70 71 // ------------------------------------------------------------------------------------------------------------------------- 72 // Stub Code definitions 73 74 static address handle_unsafe_access() { 75 JavaThread* thread = JavaThread::current(); 76 address pc = thread->saved_exception_pc(); 77 address npc = thread->saved_exception_npc(); 78 // pc is the instruction which we must emulate 79 // doing a no-op is fine: return garbage from the load 80 81 // request an async exception 82 thread->set_pending_unsafe_access_error(); 83 84 // return address of next instruction to execute 85 return npc; 86 } 87 88 class StubGenerator: public StubCodeGenerator { 89 private: 90 91 #ifdef PRODUCT 92 #define inc_counter_np(a,b,c) (0) 93 #else 94 #define inc_counter_np(counter, t1, t2) \ 95 BLOCK_COMMENT("inc_counter " #counter); \ 96 __ inc_counter(&counter, t1, t2); 97 #endif 98 99 //---------------------------------------------------------------------------------------------------- 100 // Call stubs are used to call Java from C 101 102 address generate_call_stub(address& return_pc) { 103 StubCodeMark mark(this, "StubRoutines", "call_stub"); 104 address start = __ pc(); 105 106 // Incoming arguments: 107 // 108 // o0 : call wrapper address 109 // o1 : result (address) 110 // o2 : result type 111 // o3 : method 112 // o4 : (interpreter) entry point 113 // o5 : parameters (address) 114 // [sp + 0x5c]: parameter size (in words) 115 // [sp + 0x60]: thread 116 // 117 // +---------------+ <--- sp + 0 118 // | | 119 // . reg save area . 120 // | | 121 // +---------------+ <--- sp + 0x40 122 // | | 123 // . extra 7 slots . 124 // | | 125 // +---------------+ <--- sp + 0x5c 126 // | param. size | 127 // +---------------+ <--- sp + 0x60 128 // | thread | 129 // +---------------+ 130 // | | 131 132 // note: if the link argument position changes, adjust 133 // the code in frame::entry_frame_call_wrapper() 134 135 const Argument link = Argument(0, false); // used only for GC 136 const Argument result = Argument(1, false); 137 const Argument result_type = Argument(2, false); 138 const Argument method = Argument(3, false); 139 const Argument entry_point = Argument(4, false); 140 const Argument parameters = Argument(5, false); 141 const Argument parameter_size = Argument(6, false); 142 const Argument thread = Argument(7, false); 143 144 // setup thread register 145 __ ld_ptr(thread.as_address(), G2_thread); 146 __ reinit_heapbase(); 147 148 #ifdef ASSERT 149 // make sure we have no pending exceptions 150 { const Register t = G3_scratch; 151 Label L; 152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); 153 __ br_null_short(t, Assembler::pt, L); 154 __ stop("StubRoutines::call_stub: entered with pending exception"); 155 __ bind(L); 156 } 157 #endif 158 159 // create activation frame & allocate space for parameters 160 { const Register t = G3_scratch; 161 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words) 162 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words) 163 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words) 164 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes 165 __ neg(t); // negate so it can be used with save 166 __ save(SP, t, SP); // setup new frame 167 } 168 169 // +---------------+ <--- sp + 0 170 // | | 171 // . reg save area . 172 // | | 173 // +---------------+ <--- sp + 0x40 174 // | | 175 // . extra 7 slots . 176 // | | 177 // +---------------+ <--- sp + 0x5c 178 // | empty slot | (only if parameter size is even) 179 // +---------------+ 180 // | | 181 // . parameters . 182 // | | 183 // +---------------+ <--- fp + 0 184 // | | 185 // . reg save area . 186 // | | 187 // +---------------+ <--- fp + 0x40 188 // | | 189 // . extra 7 slots . 190 // | | 191 // +---------------+ <--- fp + 0x5c 192 // | param. size | 193 // +---------------+ <--- fp + 0x60 194 // | thread | 195 // +---------------+ 196 // | | 197 198 // pass parameters if any 199 BLOCK_COMMENT("pass parameters if any"); 200 { const Register src = parameters.as_in().as_register(); 201 const Register dst = Lentry_args; 202 const Register tmp = G3_scratch; 203 const Register cnt = G4_scratch; 204 205 // test if any parameters & setup of Lentry_args 206 Label exit; 207 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter 208 __ add( FP, STACK_BIAS, dst ); 209 __ cmp_zero_and_br(Assembler::zero, cnt, exit); 210 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args 211 212 // copy parameters if any 213 Label loop; 214 __ BIND(loop); 215 // Store parameter value 216 __ ld_ptr(src, 0, tmp); 217 __ add(src, BytesPerWord, src); 218 __ st_ptr(tmp, dst, 0); 219 __ deccc(cnt); 220 __ br(Assembler::greater, false, Assembler::pt, loop); 221 __ delayed()->sub(dst, Interpreter::stackElementSize, dst); 222 223 // done 224 __ BIND(exit); 225 } 226 227 // setup parameters, method & call Java function 228 #ifdef ASSERT 229 // layout_activation_impl checks it's notion of saved SP against 230 // this register, so if this changes update it as well. 231 const Register saved_SP = Lscratch; 232 __ mov(SP, saved_SP); // keep track of SP before call 233 #endif 234 235 // setup parameters 236 const Register t = G3_scratch; 237 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words) 238 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes 239 __ sub(FP, t, Gargs); // setup parameter pointer 240 #ifdef _LP64 241 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias 242 #endif 243 __ mov(SP, O5_savedSP); 244 245 246 // do the call 247 // 248 // the following register must be setup: 249 // 250 // G2_thread 251 // G5_method 252 // Gargs 253 BLOCK_COMMENT("call Java function"); 254 __ jmpl(entry_point.as_in().as_register(), G0, O7); 255 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method 256 257 BLOCK_COMMENT("call_stub_return_address:"); 258 return_pc = __ pc(); 259 260 // The callee, if it wasn't interpreted, can return with SP changed so 261 // we can no longer assert of change of SP. 262 263 // store result depending on type 264 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE 265 // is treated as T_INT) 266 { const Register addr = result .as_in().as_register(); 267 const Register type = result_type.as_in().as_register(); 268 Label is_long, is_float, is_double, is_object, exit; 269 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object); 270 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float); 271 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double); 272 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long); 273 __ delayed()->nop(); 274 275 // store int result 276 __ st(O0, addr, G0); 277 278 __ BIND(exit); 279 __ ret(); 280 __ delayed()->restore(); 281 282 __ BIND(is_object); 283 __ ba(exit); 284 __ delayed()->st_ptr(O0, addr, G0); 285 286 __ BIND(is_float); 287 __ ba(exit); 288 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); 289 290 __ BIND(is_double); 291 __ ba(exit); 292 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); 293 294 __ BIND(is_long); 295 #ifdef _LP64 296 __ ba(exit); 297 __ delayed()->st_long(O0, addr, G0); // store entire long 298 #else 299 #if defined(COMPILER2) 300 // All return values are where we want them, except for Longs. C2 returns 301 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. 302 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit 303 // build we simply always use G1. 304 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to 305 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node 306 // first which would move g1 -> O0/O1 and destroy the exception we were throwing. 307 308 __ ba(exit); 309 __ delayed()->stx(G1, addr, G0); // store entire long 310 #else 311 __ st(O1, addr, BytesPerInt); 312 __ ba(exit); 313 __ delayed()->st(O0, addr, G0); 314 #endif /* COMPILER2 */ 315 #endif /* _LP64 */ 316 } 317 return start; 318 } 319 320 321 //---------------------------------------------------------------------------------------------------- 322 // Return point for a Java call if there's an exception thrown in Java code. 323 // The exception is caught and transformed into a pending exception stored in 324 // JavaThread that can be tested from within the VM. 325 // 326 // Oexception: exception oop 327 328 address generate_catch_exception() { 329 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 330 331 address start = __ pc(); 332 // verify that thread corresponds 333 __ verify_thread(); 334 335 const Register& temp_reg = Gtemp; 336 Address pending_exception_addr (G2_thread, Thread::pending_exception_offset()); 337 Address exception_file_offset_addr(G2_thread, Thread::exception_file_offset ()); 338 Address exception_line_offset_addr(G2_thread, Thread::exception_line_offset ()); 339 340 // set pending exception 341 __ verify_oop(Oexception); 342 __ st_ptr(Oexception, pending_exception_addr); 343 __ set((intptr_t)__FILE__, temp_reg); 344 __ st_ptr(temp_reg, exception_file_offset_addr); 345 __ set((intptr_t)__LINE__, temp_reg); 346 __ st(temp_reg, exception_line_offset_addr); 347 348 // complete return to VM 349 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); 350 351 AddressLiteral stub_ret(StubRoutines::_call_stub_return_address); 352 __ jump_to(stub_ret, temp_reg); 353 __ delayed()->nop(); 354 355 return start; 356 } 357 358 359 //---------------------------------------------------------------------------------------------------- 360 // Continuation point for runtime calls returning with a pending exception 361 // The pending exception check happened in the runtime or native call stub 362 // The pending exception in Thread is converted into a Java-level exception 363 // 364 // Contract with Java-level exception handler: O0 = exception 365 // O1 = throwing pc 366 367 address generate_forward_exception() { 368 StubCodeMark mark(this, "StubRoutines", "forward_exception"); 369 address start = __ pc(); 370 371 // Upon entry, O7 has the return address returning into Java 372 // (interpreted or compiled) code; i.e. the return address 373 // becomes the throwing pc. 374 375 const Register& handler_reg = Gtemp; 376 377 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 378 379 #ifdef ASSERT 380 // make sure that this code is only executed if there is a pending exception 381 { Label L; 382 __ ld_ptr(exception_addr, Gtemp); 383 __ br_notnull_short(Gtemp, Assembler::pt, L); 384 __ stop("StubRoutines::forward exception: no pending exception (1)"); 385 __ bind(L); 386 } 387 #endif 388 389 // compute exception handler into handler_reg 390 __ get_thread(); 391 __ ld_ptr(exception_addr, Oexception); 392 __ verify_oop(Oexception); 393 __ save_frame(0); // compensates for compiler weakness 394 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC 395 BLOCK_COMMENT("call exception_handler_for_return_address"); 396 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch); 397 __ mov(O0, handler_reg); 398 __ restore(); // compensates for compiler weakness 399 400 __ ld_ptr(exception_addr, Oexception); 401 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC 402 403 #ifdef ASSERT 404 // make sure exception is set 405 { Label L; 406 __ br_notnull_short(Oexception, Assembler::pt, L); 407 __ stop("StubRoutines::forward exception: no pending exception (2)"); 408 __ bind(L); 409 } 410 #endif 411 // jump to exception handler 412 __ jmp(handler_reg, 0); 413 // clear pending exception 414 __ delayed()->st_ptr(G0, exception_addr); 415 416 return start; 417 } 418 419 420 //------------------------------------------------------------------------------------------------------------------------ 421 // Continuation point for throwing of implicit exceptions that are not handled in 422 // the current activation. Fabricates an exception oop and initiates normal 423 // exception dispatching in this frame. Only callee-saved registers are preserved 424 // (through the normal register window / RegisterMap handling). 425 // If the compiler needs all registers to be preserved between the fault 426 // point and the exception handler then it must assume responsibility for that in 427 // AbstractCompiler::continuation_for_implicit_null_exception or 428 // continuation_for_implicit_division_by_zero_exception. All other implicit 429 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are 430 // either at call sites or otherwise assume that stack unwinding will be initiated, 431 // so caller saved registers were assumed volatile in the compiler. 432 433 // Note that we generate only this stub into a RuntimeStub, because it needs to be 434 // properly traversed and ignored during GC, so we change the meaning of the "__" 435 // macro within this method. 436 #undef __ 437 #define __ masm-> 438 439 address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc, 440 Register arg1 = noreg, Register arg2 = noreg) { 441 #ifdef ASSERT 442 int insts_size = VerifyThread ? 1 * K : 600; 443 #else 444 int insts_size = VerifyThread ? 1 * K : 256; 445 #endif /* ASSERT */ 446 int locs_size = 32; 447 448 CodeBuffer code(name, insts_size, locs_size); 449 MacroAssembler* masm = new MacroAssembler(&code); 450 451 __ verify_thread(); 452 453 // This is an inlined and slightly modified version of call_VM 454 // which has the ability to fetch the return PC out of thread-local storage 455 __ assert_not_delayed(); 456 457 // Note that we always push a frame because on the SPARC 458 // architecture, for all of our implicit exception kinds at call 459 // sites, the implicit exception is taken before the callee frame 460 // is pushed. 461 __ save_frame(0); 462 463 int frame_complete = __ offset(); 464 465 if (restore_saved_exception_pc) { 466 __ ld_ptr(G2_thread, JavaThread::saved_exception_pc_offset(), I7); 467 __ sub(I7, frame::pc_return_offset, I7); 468 } 469 470 // Note that we always have a runtime stub frame on the top of stack by this point 471 Register last_java_sp = SP; 472 // 64-bit last_java_sp is biased! 473 __ set_last_Java_frame(last_java_sp, G0); 474 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early 475 __ save_thread(noreg); 476 if (arg1 != noreg) { 477 assert(arg2 != O1, "clobbered"); 478 __ mov(arg1, O1); 479 } 480 if (arg2 != noreg) { 481 __ mov(arg2, O2); 482 } 483 // do the call 484 BLOCK_COMMENT("call runtime_entry"); 485 __ call(runtime_entry, relocInfo::runtime_call_type); 486 if (!VerifyThread) 487 __ delayed()->mov(G2_thread, O0); // pass thread as first argument 488 else 489 __ delayed()->nop(); // (thread already passed) 490 __ restore_thread(noreg); 491 __ reset_last_Java_frame(); 492 493 // check for pending exceptions. use Gtemp as scratch register. 494 #ifdef ASSERT 495 Label L; 496 497 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 498 Register scratch_reg = Gtemp; 499 __ ld_ptr(exception_addr, scratch_reg); 500 __ br_notnull_short(scratch_reg, Assembler::pt, L); 501 __ should_not_reach_here(); 502 __ bind(L); 503 #endif // ASSERT 504 BLOCK_COMMENT("call forward_exception_entry"); 505 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 506 // we use O7 linkage so that forward_exception_entry has the issuing PC 507 __ delayed()->restore(); 508 509 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false); 510 return stub->entry_point(); 511 } 512 513 #undef __ 514 #define __ _masm-> 515 516 517 // Generate a routine that sets all the registers so we 518 // can tell if the stop routine prints them correctly. 519 address generate_test_stop() { 520 StubCodeMark mark(this, "StubRoutines", "test_stop"); 521 address start = __ pc(); 522 523 int i; 524 525 __ save_frame(0); 526 527 static jfloat zero = 0.0, one = 1.0; 528 529 // put addr in L0, then load through L0 to F0 530 __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0); 531 __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1 532 533 // use add to put 2..18 in F2..F18 534 for ( i = 2; i <= 18; ++i ) { 535 __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i)); 536 } 537 538 // Now put double 2 in F16, double 18 in F18 539 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 ); 540 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 ); 541 542 // use add to put 20..32 in F20..F32 543 for (i = 20; i < 32; i += 2) { 544 __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i)); 545 } 546 547 // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's 548 for ( i = 0; i < 8; ++i ) { 549 if (i < 6) { 550 __ set( i, as_iRegister(i)); 551 __ set(16 + i, as_oRegister(i)); 552 __ set(24 + i, as_gRegister(i)); 553 } 554 __ set( 8 + i, as_lRegister(i)); 555 } 556 557 __ stop("testing stop"); 558 559 560 __ ret(); 561 __ delayed()->restore(); 562 563 return start; 564 } 565 566 567 address generate_stop_subroutine() { 568 StubCodeMark mark(this, "StubRoutines", "stop_subroutine"); 569 address start = __ pc(); 570 571 __ stop_subroutine(); 572 573 return start; 574 } 575 576 address generate_flush_callers_register_windows() { 577 StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows"); 578 address start = __ pc(); 579 580 __ flush_windows(); 581 __ retl(false); 582 __ delayed()->add( FP, STACK_BIAS, O0 ); 583 // The returned value must be a stack pointer whose register save area 584 // is flushed, and will stay flushed while the caller executes. 585 586 return start; 587 } 588 589 // Helper functions for v8 atomic operations. 590 // 591 void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) { 592 if (mark_oop_reg == noreg) { 593 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(); 594 __ set((intptr_t)lock_ptr, lock_ptr_reg); 595 } else { 596 assert(scratch_reg != noreg, "just checking"); 597 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache; 598 __ set((intptr_t)lock_ptr, lock_ptr_reg); 599 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg); 600 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg); 601 } 602 } 603 604 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { 605 606 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg); 607 __ set(StubRoutines::Sparc::locked, lock_reg); 608 // Initialize yield counter 609 __ mov(G0,yield_reg); 610 611 __ BIND(retry); 612 __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield); 613 614 // This code can only be called from inside the VM, this 615 // stub is only invoked from Atomic::add(). We do not 616 // want to use call_VM, because _last_java_sp and such 617 // must already be set. 618 // 619 // Save the regs and make space for a C call 620 __ save(SP, -96, SP); 621 __ save_all_globals_into_locals(); 622 BLOCK_COMMENT("call os::naked_sleep"); 623 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep)); 624 __ delayed()->nop(); 625 __ restore_globals_from_locals(); 626 __ restore(); 627 // reset the counter 628 __ mov(G0,yield_reg); 629 630 __ BIND(dontyield); 631 632 // try to get lock 633 __ swap(lock_ptr_reg, 0, lock_reg); 634 635 // did we get the lock? 636 __ cmp(lock_reg, StubRoutines::Sparc::unlocked); 637 __ br(Assembler::notEqual, true, Assembler::pn, retry); 638 __ delayed()->add(yield_reg,1,yield_reg); 639 640 // yes, got lock. do the operation here. 641 } 642 643 void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { 644 __ st(lock_reg, lock_ptr_reg, 0); // unlock 645 } 646 647 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest). 648 // 649 // Arguments : 650 // 651 // exchange_value: O0 652 // dest: O1 653 // 654 // Results: 655 // 656 // O0: the value previously stored in dest 657 // 658 address generate_atomic_xchg() { 659 StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); 660 address start = __ pc(); 661 662 if (UseCASForSwap) { 663 // Use CAS instead of swap, just in case the MP hardware 664 // prefers to work with just one kind of synch. instruction. 665 Label retry; 666 __ BIND(retry); 667 __ mov(O0, O3); // scratch copy of exchange value 668 __ ld(O1, 0, O2); // observe the previous value 669 // try to replace O2 with O3 670 __ cas_under_lock(O1, O2, O3, 671 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); 672 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); 673 674 __ retl(false); 675 __ delayed()->mov(O2, O0); // report previous value to caller 676 677 } else { 678 if (VM_Version::v9_instructions_work()) { 679 __ retl(false); 680 __ delayed()->swap(O1, 0, O0); 681 } else { 682 const Register& lock_reg = O2; 683 const Register& lock_ptr_reg = O3; 684 const Register& yield_reg = O4; 685 686 Label retry; 687 Label dontyield; 688 689 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 690 // got the lock, do the swap 691 __ swap(O1, 0, O0); 692 693 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 694 __ retl(false); 695 __ delayed()->nop(); 696 } 697 } 698 699 return start; 700 } 701 702 703 // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value) 704 // 705 // Arguments : 706 // 707 // exchange_value: O0 708 // dest: O1 709 // compare_value: O2 710 // 711 // Results: 712 // 713 // O0: the value previously stored in dest 714 // 715 // Overwrites (v8): O3,O4,O5 716 // 717 address generate_atomic_cmpxchg() { 718 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg"); 719 address start = __ pc(); 720 721 // cmpxchg(dest, compare_value, exchange_value) 722 __ cas_under_lock(O1, O2, O0, 723 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); 724 __ retl(false); 725 __ delayed()->nop(); 726 727 return start; 728 } 729 730 // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value) 731 // 732 // Arguments : 733 // 734 // exchange_value: O1:O0 735 // dest: O2 736 // compare_value: O4:O3 737 // 738 // Results: 739 // 740 // O1:O0: the value previously stored in dest 741 // 742 // This only works on V9, on V8 we don't generate any 743 // code and just return NULL. 744 // 745 // Overwrites: G1,G2,G3 746 // 747 address generate_atomic_cmpxchg_long() { 748 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long"); 749 address start = __ pc(); 750 751 if (!VM_Version::supports_cx8()) 752 return NULL;; 753 __ sllx(O0, 32, O0); 754 __ srl(O1, 0, O1); 755 __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value 756 __ sllx(O3, 32, O3); 757 __ srl(O4, 0, O4); 758 __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value 759 __ casx(O2, O3, O0); 760 __ srl(O0, 0, O1); // unpacked return value in O1:O0 761 __ retl(false); 762 __ delayed()->srlx(O0, 32, O0); 763 764 return start; 765 } 766 767 768 // Support for jint Atomic::add(jint add_value, volatile jint* dest). 769 // 770 // Arguments : 771 // 772 // add_value: O0 (e.g., +1 or -1) 773 // dest: O1 774 // 775 // Results: 776 // 777 // O0: the new value stored in dest 778 // 779 // Overwrites (v9): O3 780 // Overwrites (v8): O3,O4,O5 781 // 782 address generate_atomic_add() { 783 StubCodeMark mark(this, "StubRoutines", "atomic_add"); 784 address start = __ pc(); 785 __ BIND(_atomic_add_stub); 786 787 if (VM_Version::v9_instructions_work()) { 788 Label(retry); 789 __ BIND(retry); 790 791 __ lduw(O1, 0, O2); 792 __ add(O0, O2, O3); 793 __ cas(O1, O2, O3); 794 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); 795 __ retl(false); 796 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 797 } else { 798 const Register& lock_reg = O2; 799 const Register& lock_ptr_reg = O3; 800 const Register& value_reg = O4; 801 const Register& yield_reg = O5; 802 803 Label(retry); 804 Label(dontyield); 805 806 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 807 // got lock, do the increment 808 __ ld(O1, 0, value_reg); 809 __ add(O0, value_reg, value_reg); 810 __ st(value_reg, O1, 0); 811 812 // %%% only for RMO and PSO 813 __ membar(Assembler::StoreStore); 814 815 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 816 817 __ retl(false); 818 __ delayed()->mov(value_reg, O0); 819 } 820 821 return start; 822 } 823 Label _atomic_add_stub; // called from other stubs 824 825 826 //------------------------------------------------------------------------------------------------------------------------ 827 // The following routine generates a subroutine to throw an asynchronous 828 // UnknownError when an unsafe access gets a fault that could not be 829 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) 830 // 831 // Arguments : 832 // 833 // trapping PC: O7 834 // 835 // Results: 836 // posts an asynchronous exception, skips the trapping instruction 837 // 838 839 address generate_handler_for_unsafe_access() { 840 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); 841 address start = __ pc(); 842 843 const int preserve_register_words = (64 * 2); 844 Address preserve_addr(FP, (-preserve_register_words * wordSize) + STACK_BIAS); 845 846 Register Lthread = L7_thread_cache; 847 int i; 848 849 __ save_frame(0); 850 __ mov(G1, L1); 851 __ mov(G2, L2); 852 __ mov(G3, L3); 853 __ mov(G4, L4); 854 __ mov(G5, L5); 855 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { 856 __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize); 857 } 858 859 address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access); 860 BLOCK_COMMENT("call handle_unsafe_access"); 861 __ call(entry_point, relocInfo::runtime_call_type); 862 __ delayed()->nop(); 863 864 __ mov(L1, G1); 865 __ mov(L2, G2); 866 __ mov(L3, G3); 867 __ mov(L4, G4); 868 __ mov(L5, G5); 869 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { 870 __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize); 871 } 872 873 __ verify_thread(); 874 875 __ jmp(O0, 0); 876 __ delayed()->restore(); 877 878 return start; 879 } 880 881 882 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super ); 883 // Arguments : 884 // 885 // ret : O0, returned 886 // icc/xcc: set as O0 (depending on wordSize) 887 // sub : O1, argument, not changed 888 // super: O2, argument, not changed 889 // raddr: O7, blown by call 890 address generate_partial_subtype_check() { 891 __ align(CodeEntryAlignment); 892 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 893 address start = __ pc(); 894 Label miss; 895 896 #if defined(COMPILER2) && !defined(_LP64) 897 // Do not use a 'save' because it blows the 64-bit O registers. 898 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) 899 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); 900 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize); 901 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize); 902 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize); 903 Register Rret = O0; 904 Register Rsub = O1; 905 Register Rsuper = O2; 906 #else 907 __ save_frame(0); 908 Register Rret = I0; 909 Register Rsub = I1; 910 Register Rsuper = I2; 911 #endif 912 913 Register L0_ary_len = L0; 914 Register L1_ary_ptr = L1; 915 Register L2_super = L2; 916 Register L3_index = L3; 917 918 __ check_klass_subtype_slow_path(Rsub, Rsuper, 919 L0, L1, L2, L3, 920 NULL, &miss); 921 922 // Match falls through here. 923 __ addcc(G0,0,Rret); // set Z flags, Z result 924 925 #if defined(COMPILER2) && !defined(_LP64) 926 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 927 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 928 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 929 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 930 __ retl(); // Result in Rret is zero; flags set to Z 931 __ delayed()->add(SP,4*wordSize,SP); 932 #else 933 __ ret(); // Result in Rret is zero; flags set to Z 934 __ delayed()->restore(); 935 #endif 936 937 __ BIND(miss); 938 __ addcc(G0,1,Rret); // set NZ flags, NZ result 939 940 #if defined(COMPILER2) && !defined(_LP64) 941 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 942 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 943 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 944 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 945 __ retl(); // Result in Rret is != 0; flags set to NZ 946 __ delayed()->add(SP,4*wordSize,SP); 947 #else 948 __ ret(); // Result in Rret is != 0; flags set to NZ 949 __ delayed()->restore(); 950 #endif 951 952 return start; 953 } 954 955 956 // Called from MacroAssembler::verify_oop 957 // 958 address generate_verify_oop_subroutine() { 959 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); 960 961 address start = __ pc(); 962 963 __ verify_oop_subroutine(); 964 965 return start; 966 } 967 968 969 // 970 // Verify that a register contains clean 32-bits positive value 971 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax). 972 // 973 // Input: 974 // Rint - 32-bits value 975 // Rtmp - scratch 976 // 977 void assert_clean_int(Register Rint, Register Rtmp) { 978 #if defined(ASSERT) && defined(_LP64) 979 __ signx(Rint, Rtmp); 980 __ cmp(Rint, Rtmp); 981 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc); 982 #endif 983 } 984 985 // 986 // Generate overlap test for array copy stubs 987 // 988 // Input: 989 // O0 - array1 990 // O1 - array2 991 // O2 - element count 992 // 993 // Kills temps: O3, O4 994 // 995 void array_overlap_test(address no_overlap_target, int log2_elem_size) { 996 assert(no_overlap_target != NULL, "must be generated"); 997 array_overlap_test(no_overlap_target, NULL, log2_elem_size); 998 } 999 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) { 1000 array_overlap_test(NULL, &L_no_overlap, log2_elem_size); 1001 } 1002 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) { 1003 const Register from = O0; 1004 const Register to = O1; 1005 const Register count = O2; 1006 const Register to_from = O3; // to - from 1007 const Register byte_count = O4; // count << log2_elem_size 1008 1009 __ subcc(to, from, to_from); 1010 __ sll_ptr(count, log2_elem_size, byte_count); 1011 if (NOLp == NULL) 1012 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target); 1013 else 1014 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp)); 1015 __ delayed()->cmp(to_from, byte_count); 1016 if (NOLp == NULL) 1017 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, no_overlap_target); 1018 else 1019 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, (*NOLp)); 1020 __ delayed()->nop(); 1021 } 1022 1023 // 1024 // Generate pre-write barrier for array. 1025 // 1026 // Input: 1027 // addr - register containing starting address 1028 // count - register containing element count 1029 // tmp - scratch register 1030 // 1031 // The input registers are overwritten. 1032 // 1033 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { 1034 BarrierSet* bs = Universe::heap()->barrier_set(); 1035 switch (bs->kind()) { 1036 case BarrierSet::G1SATBCT: 1037 case BarrierSet::G1SATBCTLogging: 1038 // With G1, don't generate the call if we statically know that the target in uninitialized 1039 if (!dest_uninitialized) { 1040 __ save_frame(0); 1041 // Save the necessary global regs... will be used after. 1042 if (addr->is_global()) { 1043 __ mov(addr, L0); 1044 } 1045 if (count->is_global()) { 1046 __ mov(count, L1); 1047 } 1048 __ mov(addr->after_save(), O0); 1049 // Get the count into O1 1050 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); 1051 __ delayed()->mov(count->after_save(), O1); 1052 if (addr->is_global()) { 1053 __ mov(L0, addr); 1054 } 1055 if (count->is_global()) { 1056 __ mov(L1, count); 1057 } 1058 __ restore(); 1059 } 1060 break; 1061 case BarrierSet::CardTableModRef: 1062 case BarrierSet::CardTableExtension: 1063 case BarrierSet::ModRef: 1064 break; 1065 default: 1066 ShouldNotReachHere(); 1067 } 1068 } 1069 // 1070 // Generate post-write barrier for array. 1071 // 1072 // Input: 1073 // addr - register containing starting address 1074 // count - register containing element count 1075 // tmp - scratch register 1076 // 1077 // The input registers are overwritten. 1078 // 1079 void gen_write_ref_array_post_barrier(Register addr, Register count, 1080 Register tmp) { 1081 BarrierSet* bs = Universe::heap()->barrier_set(); 1082 1083 switch (bs->kind()) { 1084 case BarrierSet::G1SATBCT: 1085 case BarrierSet::G1SATBCTLogging: 1086 { 1087 // Get some new fresh output registers. 1088 __ save_frame(0); 1089 __ mov(addr->after_save(), O0); 1090 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); 1091 __ delayed()->mov(count->after_save(), O1); 1092 __ restore(); 1093 } 1094 break; 1095 case BarrierSet::CardTableModRef: 1096 case BarrierSet::CardTableExtension: 1097 { 1098 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 1099 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1100 assert_different_registers(addr, count, tmp); 1101 1102 Label L_loop; 1103 1104 __ sll_ptr(count, LogBytesPerHeapOop, count); 1105 __ sub(count, BytesPerHeapOop, count); 1106 __ add(count, addr, count); 1107 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) 1108 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr); 1109 __ srl_ptr(count, CardTableModRefBS::card_shift, count); 1110 __ sub(count, addr, count); 1111 AddressLiteral rs(ct->byte_map_base); 1112 __ set(rs, tmp); 1113 __ BIND(L_loop); 1114 __ stb(G0, tmp, addr); 1115 __ subcc(count, 1, count); 1116 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1117 __ delayed()->add(addr, 1, addr); 1118 } 1119 break; 1120 case BarrierSet::ModRef: 1121 break; 1122 default: 1123 ShouldNotReachHere(); 1124 } 1125 } 1126 1127 // 1128 // Generate main code for disjoint arraycopy 1129 // 1130 typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec, 1131 Label& L_loop, bool use_prefetch, bool use_bis); 1132 1133 void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size, 1134 int iter_size, CopyLoopFunc copy_loop_func) { 1135 Label L_copy; 1136 1137 assert(log2_elem_size <= 3, "the following code should be changed"); 1138 int count_dec = 16>>log2_elem_size; 1139 1140 int prefetch_dist = MAX2(ArraycopySrcPrefetchDistance, ArraycopyDstPrefetchDistance); 1141 assert(prefetch_dist < 4096, "invalid value"); 1142 prefetch_dist = (prefetch_dist + (iter_size-1)) & (-iter_size); // round up to one iteration copy size 1143 int prefetch_count = (prefetch_dist >> log2_elem_size); // elements count 1144 1145 if (UseBlockCopy) { 1146 Label L_block_copy, L_block_copy_prefetch, L_skip_block_copy; 1147 1148 // 64 bytes tail + bytes copied in one loop iteration 1149 int tail_size = 64 + iter_size; 1150 int block_copy_count = (MAX2(tail_size, (int)BlockCopyLowLimit)) >> log2_elem_size; 1151 // Use BIS copy only for big arrays since it requires membar. 1152 __ set(block_copy_count, O4); 1153 __ cmp_and_br_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_skip_block_copy); 1154 // This code is for disjoint source and destination: 1155 // to <= from || to >= from+count 1156 // but BIS will stomp over 'from' if (to > from-tail_size && to <= from) 1157 __ sub(from, to, O4); 1158 __ srax(O4, 4, O4); // divide by 16 since following short branch have only 5 bits for imm. 1159 __ cmp_and_br_short(O4, (tail_size>>4), Assembler::lessEqualUnsigned, Assembler::pn, L_skip_block_copy); 1160 1161 __ wrasi(G0, Assembler::ASI_ST_BLKINIT_PRIMARY); 1162 // BIS should not be used to copy tail (64 bytes+iter_size) 1163 // to avoid zeroing of following values. 1164 __ sub(count, (tail_size>>log2_elem_size), count); // count is still positive >= 0 1165 1166 if (prefetch_count > 0) { // rounded up to one iteration count 1167 // Do prefetching only if copy size is bigger 1168 // than prefetch distance. 1169 __ set(prefetch_count, O4); 1170 __ cmp_and_brx_short(count, O4, Assembler::less, Assembler::pt, L_block_copy); 1171 __ sub(count, prefetch_count, count); 1172 1173 (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy_prefetch, true, true); 1174 __ add(count, prefetch_count, count); // restore count 1175 1176 } // prefetch_count > 0 1177 1178 (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy, false, true); 1179 __ add(count, (tail_size>>log2_elem_size), count); // restore count 1180 1181 __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT); 1182 // BIS needs membar. 1183 __ membar(Assembler::StoreLoad); 1184 // Copy tail 1185 __ ba_short(L_copy); 1186 1187 __ BIND(L_skip_block_copy); 1188 } // UseBlockCopy 1189 1190 if (prefetch_count > 0) { // rounded up to one iteration count 1191 // Do prefetching only if copy size is bigger 1192 // than prefetch distance. 1193 __ set(prefetch_count, O4); 1194 __ cmp_and_brx_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_copy); 1195 __ sub(count, prefetch_count, count); 1196 1197 Label L_copy_prefetch; 1198 (this->*copy_loop_func)(from, to, count, count_dec, L_copy_prefetch, true, false); 1199 __ add(count, prefetch_count, count); // restore count 1200 1201 } // prefetch_count > 0 1202 1203 (this->*copy_loop_func)(from, to, count, count_dec, L_copy, false, false); 1204 } 1205 1206 1207 1208 // 1209 // Helper methods for copy_16_bytes_forward_with_shift() 1210 // 1211 void copy_16_bytes_shift_loop(Register from, Register to, Register count, int count_dec, 1212 Label& L_loop, bool use_prefetch, bool use_bis) { 1213 1214 const Register left_shift = G1; // left shift bit counter 1215 const Register right_shift = G5; // right shift bit counter 1216 1217 __ align(OptoLoopAlignment); 1218 __ BIND(L_loop); 1219 if (use_prefetch) { 1220 if (ArraycopySrcPrefetchDistance > 0) { 1221 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); 1222 } 1223 if (ArraycopyDstPrefetchDistance > 0) { 1224 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); 1225 } 1226 } 1227 __ ldx(from, 0, O4); 1228 __ ldx(from, 8, G4); 1229 __ inc(to, 16); 1230 __ inc(from, 16); 1231 __ deccc(count, count_dec); // Can we do next iteration after this one? 1232 __ srlx(O4, right_shift, G3); 1233 __ bset(G3, O3); 1234 __ sllx(O4, left_shift, O4); 1235 __ srlx(G4, right_shift, G3); 1236 __ bset(G3, O4); 1237 if (use_bis) { 1238 __ stxa(O3, to, -16); 1239 __ stxa(O4, to, -8); 1240 } else { 1241 __ stx(O3, to, -16); 1242 __ stx(O4, to, -8); 1243 } 1244 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1245 __ delayed()->sllx(G4, left_shift, O3); 1246 } 1247 1248 // Copy big chunks forward with shift 1249 // 1250 // Inputs: 1251 // from - source arrays 1252 // to - destination array aligned to 8-bytes 1253 // count - elements count to copy >= the count equivalent to 16 bytes 1254 // count_dec - elements count's decrement equivalent to 16 bytes 1255 // L_copy_bytes - copy exit label 1256 // 1257 void copy_16_bytes_forward_with_shift(Register from, Register to, 1258 Register count, int log2_elem_size, Label& L_copy_bytes) { 1259 Label L_aligned_copy, L_copy_last_bytes; 1260 assert(log2_elem_size <= 3, "the following code should be changed"); 1261 int count_dec = 16>>log2_elem_size; 1262 1263 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy 1264 __ andcc(from, 7, G1); // misaligned bytes 1265 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1266 __ delayed()->nop(); 1267 1268 const Register left_shift = G1; // left shift bit counter 1269 const Register right_shift = G5; // right shift bit counter 1270 1271 __ sll(G1, LogBitsPerByte, left_shift); 1272 __ mov(64, right_shift); 1273 __ sub(right_shift, left_shift, right_shift); 1274 1275 // 1276 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1277 // to form 2 aligned 8-bytes chunks to store. 1278 // 1279 __ dec(count, count_dec); // Pre-decrement 'count' 1280 __ andn(from, 7, from); // Align address 1281 __ ldx(from, 0, O3); 1282 __ inc(from, 8); 1283 __ sllx(O3, left_shift, O3); 1284 1285 disjoint_copy_core(from, to, count, log2_elem_size, 16, copy_16_bytes_shift_loop); 1286 1287 __ inccc(count, count_dec>>1 ); // + 8 bytes 1288 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); 1289 __ delayed()->inc(count, count_dec>>1); // restore 'count' 1290 1291 // copy 8 bytes, part of them already loaded in O3 1292 __ ldx(from, 0, O4); 1293 __ inc(to, 8); 1294 __ inc(from, 8); 1295 __ srlx(O4, right_shift, G3); 1296 __ bset(O3, G3); 1297 __ stx(G3, to, -8); 1298 1299 __ BIND(L_copy_last_bytes); 1300 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes 1301 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); 1302 __ delayed()->sub(from, right_shift, from); // restore address 1303 1304 __ BIND(L_aligned_copy); 1305 } 1306 1307 // Copy big chunks backward with shift 1308 // 1309 // Inputs: 1310 // end_from - source arrays end address 1311 // end_to - destination array end address aligned to 8-bytes 1312 // count - elements count to copy >= the count equivalent to 16 bytes 1313 // count_dec - elements count's decrement equivalent to 16 bytes 1314 // L_aligned_copy - aligned copy exit label 1315 // L_copy_bytes - copy exit label 1316 // 1317 void copy_16_bytes_backward_with_shift(Register end_from, Register end_to, 1318 Register count, int count_dec, 1319 Label& L_aligned_copy, Label& L_copy_bytes) { 1320 Label L_loop, L_copy_last_bytes; 1321 1322 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy 1323 __ andcc(end_from, 7, G1); // misaligned bytes 1324 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1325 __ delayed()->deccc(count, count_dec); // Pre-decrement 'count' 1326 1327 const Register left_shift = G1; // left shift bit counter 1328 const Register right_shift = G5; // right shift bit counter 1329 1330 __ sll(G1, LogBitsPerByte, left_shift); 1331 __ mov(64, right_shift); 1332 __ sub(right_shift, left_shift, right_shift); 1333 1334 // 1335 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1336 // to form 2 aligned 8-bytes chunks to store. 1337 // 1338 __ andn(end_from, 7, end_from); // Align address 1339 __ ldx(end_from, 0, O3); 1340 __ align(OptoLoopAlignment); 1341 __ BIND(L_loop); 1342 __ ldx(end_from, -8, O4); 1343 __ deccc(count, count_dec); // Can we do next iteration after this one? 1344 __ ldx(end_from, -16, G4); 1345 __ dec(end_to, 16); 1346 __ dec(end_from, 16); 1347 __ srlx(O3, right_shift, O3); 1348 __ sllx(O4, left_shift, G3); 1349 __ bset(G3, O3); 1350 __ stx(O3, end_to, 8); 1351 __ srlx(O4, right_shift, O4); 1352 __ sllx(G4, left_shift, G3); 1353 __ bset(G3, O4); 1354 __ stx(O4, end_to, 0); 1355 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1356 __ delayed()->mov(G4, O3); 1357 1358 __ inccc(count, count_dec>>1 ); // + 8 bytes 1359 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); 1360 __ delayed()->inc(count, count_dec>>1); // restore 'count' 1361 1362 // copy 8 bytes, part of them already loaded in O3 1363 __ ldx(end_from, -8, O4); 1364 __ dec(end_to, 8); 1365 __ dec(end_from, 8); 1366 __ srlx(O3, right_shift, O3); 1367 __ sllx(O4, left_shift, G3); 1368 __ bset(O3, G3); 1369 __ stx(G3, end_to, 0); 1370 1371 __ BIND(L_copy_last_bytes); 1372 __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes 1373 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); 1374 __ delayed()->add(end_from, left_shift, end_from); // restore address 1375 } 1376 1377 // 1378 // Generate stub for disjoint byte copy. If "aligned" is true, the 1379 // "from" and "to" addresses are assumed to be heapword aligned. 1380 // 1381 // Arguments for generated stub: 1382 // from: O0 1383 // to: O1 1384 // count: O2 treated as signed 1385 // 1386 address generate_disjoint_byte_copy(bool aligned, address *entry, const char *name) { 1387 __ align(CodeEntryAlignment); 1388 StubCodeMark mark(this, "StubRoutines", name); 1389 address start = __ pc(); 1390 1391 Label L_skip_alignment, L_align; 1392 Label L_copy_byte, L_copy_byte_loop, L_exit; 1393 1394 const Register from = O0; // source array address 1395 const Register to = O1; // destination array address 1396 const Register count = O2; // elements count 1397 const Register offset = O5; // offset from start of arrays 1398 // O3, O4, G3, G4 are used as temp registers 1399 1400 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1401 1402 if (entry != NULL) { 1403 *entry = __ pc(); 1404 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1405 BLOCK_COMMENT("Entry:"); 1406 } 1407 1408 // for short arrays, just do single element copy 1409 __ cmp(count, 23); // 16 + 7 1410 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1411 __ delayed()->mov(G0, offset); 1412 1413 if (aligned) { 1414 // 'aligned' == true when it is known statically during compilation 1415 // of this arraycopy call site that both 'from' and 'to' addresses 1416 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1417 // 1418 // Aligned arrays have 4 bytes alignment in 32-bits VM 1419 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM 1420 // 1421 #ifndef _LP64 1422 // copy a 4-bytes word if necessary to align 'to' to 8 bytes 1423 __ andcc(to, 7, G0); 1424 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment); 1425 __ delayed()->ld(from, 0, O3); 1426 __ inc(from, 4); 1427 __ inc(to, 4); 1428 __ dec(count, 4); 1429 __ st(O3, to, -4); 1430 __ BIND(L_skip_alignment); 1431 #endif 1432 } else { 1433 // copy bytes to align 'to' on 8 byte boundary 1434 __ andcc(to, 7, G1); // misaligned bytes 1435 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1436 __ delayed()->neg(G1); 1437 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment 1438 __ sub(count, G1, count); 1439 __ BIND(L_align); 1440 __ ldub(from, 0, O3); 1441 __ deccc(G1); 1442 __ inc(from); 1443 __ stb(O3, to, 0); 1444 __ br(Assembler::notZero, false, Assembler::pt, L_align); 1445 __ delayed()->inc(to); 1446 __ BIND(L_skip_alignment); 1447 } 1448 #ifdef _LP64 1449 if (!aligned) 1450 #endif 1451 { 1452 // Copy with shift 16 bytes per iteration if arrays do not have 1453 // the same alignment mod 8, otherwise fall through to the next 1454 // code for aligned copy. 1455 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. 1456 // Also jump over aligned copy after the copy with shift completed. 1457 1458 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); 1459 } 1460 1461 // Both array are 8 bytes aligned, copy 16 bytes at a time 1462 __ and3(count, 7, G4); // Save count 1463 __ srl(count, 3, count); 1464 generate_disjoint_long_copy_core(aligned); 1465 __ mov(G4, count); // Restore count 1466 1467 // copy tailing bytes 1468 __ BIND(L_copy_byte); 1469 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1470 __ align(OptoLoopAlignment); 1471 __ BIND(L_copy_byte_loop); 1472 __ ldub(from, offset, O3); 1473 __ deccc(count); 1474 __ stb(O3, to, offset); 1475 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); 1476 __ delayed()->inc(offset); 1477 1478 __ BIND(L_exit); 1479 // O3, O4 are used as temp registers 1480 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); 1481 __ retl(); 1482 __ delayed()->mov(G0, O0); // return 0 1483 return start; 1484 } 1485 1486 // 1487 // Generate stub for conjoint byte copy. If "aligned" is true, the 1488 // "from" and "to" addresses are assumed to be heapword aligned. 1489 // 1490 // Arguments for generated stub: 1491 // from: O0 1492 // to: O1 1493 // count: O2 treated as signed 1494 // 1495 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, 1496 address *entry, const char *name) { 1497 // Do reverse copy. 1498 1499 __ align(CodeEntryAlignment); 1500 StubCodeMark mark(this, "StubRoutines", name); 1501 address start = __ pc(); 1502 1503 Label L_skip_alignment, L_align, L_aligned_copy; 1504 Label L_copy_byte, L_copy_byte_loop, L_exit; 1505 1506 const Register from = O0; // source array address 1507 const Register to = O1; // destination array address 1508 const Register count = O2; // elements count 1509 const Register end_from = from; // source array end address 1510 const Register end_to = to; // destination array end address 1511 1512 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1513 1514 if (entry != NULL) { 1515 *entry = __ pc(); 1516 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1517 BLOCK_COMMENT("Entry:"); 1518 } 1519 1520 array_overlap_test(nooverlap_target, 0); 1521 1522 __ add(to, count, end_to); // offset after last copied element 1523 1524 // for short arrays, just do single element copy 1525 __ cmp(count, 23); // 16 + 7 1526 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1527 __ delayed()->add(from, count, end_from); 1528 1529 { 1530 // Align end of arrays since they could be not aligned even 1531 // when arrays itself are aligned. 1532 1533 // copy bytes to align 'end_to' on 8 byte boundary 1534 __ andcc(end_to, 7, G1); // misaligned bytes 1535 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1536 __ delayed()->nop(); 1537 __ sub(count, G1, count); 1538 __ BIND(L_align); 1539 __ dec(end_from); 1540 __ dec(end_to); 1541 __ ldub(end_from, 0, O3); 1542 __ deccc(G1); 1543 __ brx(Assembler::notZero, false, Assembler::pt, L_align); 1544 __ delayed()->stb(O3, end_to, 0); 1545 __ BIND(L_skip_alignment); 1546 } 1547 #ifdef _LP64 1548 if (aligned) { 1549 // Both arrays are aligned to 8-bytes in 64-bits VM. 1550 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1551 // in unaligned case. 1552 __ dec(count, 16); 1553 } else 1554 #endif 1555 { 1556 // Copy with shift 16 bytes per iteration if arrays do not have 1557 // the same alignment mod 8, otherwise jump to the next 1558 // code for aligned copy (and substracting 16 from 'count' before jump). 1559 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1560 // Also jump over aligned copy after the copy with shift completed. 1561 1562 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, 1563 L_aligned_copy, L_copy_byte); 1564 } 1565 // copy 4 elements (16 bytes) at a time 1566 __ align(OptoLoopAlignment); 1567 __ BIND(L_aligned_copy); 1568 __ dec(end_from, 16); 1569 __ ldx(end_from, 8, O3); 1570 __ ldx(end_from, 0, O4); 1571 __ dec(end_to, 16); 1572 __ deccc(count, 16); 1573 __ stx(O3, end_to, 8); 1574 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1575 __ delayed()->stx(O4, end_to, 0); 1576 __ inc(count, 16); 1577 1578 // copy 1 element (2 bytes) at a time 1579 __ BIND(L_copy_byte); 1580 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1581 __ align(OptoLoopAlignment); 1582 __ BIND(L_copy_byte_loop); 1583 __ dec(end_from); 1584 __ dec(end_to); 1585 __ ldub(end_from, 0, O4); 1586 __ deccc(count); 1587 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); 1588 __ delayed()->stb(O4, end_to, 0); 1589 1590 __ BIND(L_exit); 1591 // O3, O4 are used as temp registers 1592 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); 1593 __ retl(); 1594 __ delayed()->mov(G0, O0); // return 0 1595 return start; 1596 } 1597 1598 // 1599 // Generate stub for disjoint short copy. If "aligned" is true, the 1600 // "from" and "to" addresses are assumed to be heapword aligned. 1601 // 1602 // Arguments for generated stub: 1603 // from: O0 1604 // to: O1 1605 // count: O2 treated as signed 1606 // 1607 address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) { 1608 __ align(CodeEntryAlignment); 1609 StubCodeMark mark(this, "StubRoutines", name); 1610 address start = __ pc(); 1611 1612 Label L_skip_alignment, L_skip_alignment2; 1613 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; 1614 1615 const Register from = O0; // source array address 1616 const Register to = O1; // destination array address 1617 const Register count = O2; // elements count 1618 const Register offset = O5; // offset from start of arrays 1619 // O3, O4, G3, G4 are used as temp registers 1620 1621 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1622 1623 if (entry != NULL) { 1624 *entry = __ pc(); 1625 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1626 BLOCK_COMMENT("Entry:"); 1627 } 1628 1629 // for short arrays, just do single element copy 1630 __ cmp(count, 11); // 8 + 3 (22 bytes) 1631 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1632 __ delayed()->mov(G0, offset); 1633 1634 if (aligned) { 1635 // 'aligned' == true when it is known statically during compilation 1636 // of this arraycopy call site that both 'from' and 'to' addresses 1637 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1638 // 1639 // Aligned arrays have 4 bytes alignment in 32-bits VM 1640 // and 8 bytes - in 64-bits VM. 1641 // 1642 #ifndef _LP64 1643 // copy a 2-elements word if necessary to align 'to' to 8 bytes 1644 __ andcc(to, 7, G0); 1645 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1646 __ delayed()->ld(from, 0, O3); 1647 __ inc(from, 4); 1648 __ inc(to, 4); 1649 __ dec(count, 2); 1650 __ st(O3, to, -4); 1651 __ BIND(L_skip_alignment); 1652 #endif 1653 } else { 1654 // copy 1 element if necessary to align 'to' on an 4 bytes 1655 __ andcc(to, 3, G0); 1656 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1657 __ delayed()->lduh(from, 0, O3); 1658 __ inc(from, 2); 1659 __ inc(to, 2); 1660 __ dec(count); 1661 __ sth(O3, to, -2); 1662 __ BIND(L_skip_alignment); 1663 1664 // copy 2 elements to align 'to' on an 8 byte boundary 1665 __ andcc(to, 7, G0); 1666 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1667 __ delayed()->lduh(from, 0, O3); 1668 __ dec(count, 2); 1669 __ lduh(from, 2, O4); 1670 __ inc(from, 4); 1671 __ inc(to, 4); 1672 __ sth(O3, to, -4); 1673 __ sth(O4, to, -2); 1674 __ BIND(L_skip_alignment2); 1675 } 1676 #ifdef _LP64 1677 if (!aligned) 1678 #endif 1679 { 1680 // Copy with shift 16 bytes per iteration if arrays do not have 1681 // the same alignment mod 8, otherwise fall through to the next 1682 // code for aligned copy. 1683 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1684 // Also jump over aligned copy after the copy with shift completed. 1685 1686 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); 1687 } 1688 1689 // Both array are 8 bytes aligned, copy 16 bytes at a time 1690 __ and3(count, 3, G4); // Save 1691 __ srl(count, 2, count); 1692 generate_disjoint_long_copy_core(aligned); 1693 __ mov(G4, count); // restore 1694 1695 // copy 1 element at a time 1696 __ BIND(L_copy_2_bytes); 1697 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1698 __ align(OptoLoopAlignment); 1699 __ BIND(L_copy_2_bytes_loop); 1700 __ lduh(from, offset, O3); 1701 __ deccc(count); 1702 __ sth(O3, to, offset); 1703 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); 1704 __ delayed()->inc(offset, 2); 1705 1706 __ BIND(L_exit); 1707 // O3, O4 are used as temp registers 1708 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); 1709 __ retl(); 1710 __ delayed()->mov(G0, O0); // return 0 1711 return start; 1712 } 1713 1714 // 1715 // Generate stub for disjoint short fill. If "aligned" is true, the 1716 // "to" address is assumed to be heapword aligned. 1717 // 1718 // Arguments for generated stub: 1719 // to: O0 1720 // value: O1 1721 // count: O2 treated as signed 1722 // 1723 address generate_fill(BasicType t, bool aligned, const char* name) { 1724 __ align(CodeEntryAlignment); 1725 StubCodeMark mark(this, "StubRoutines", name); 1726 address start = __ pc(); 1727 1728 const Register to = O0; // source array address 1729 const Register value = O1; // fill value 1730 const Register count = O2; // elements count 1731 // O3 is used as a temp register 1732 1733 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1734 1735 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 1736 Label L_fill_2_bytes, L_fill_elements, L_fill_32_bytes; 1737 1738 int shift = -1; 1739 switch (t) { 1740 case T_BYTE: 1741 shift = 2; 1742 break; 1743 case T_SHORT: 1744 shift = 1; 1745 break; 1746 case T_INT: 1747 shift = 0; 1748 break; 1749 default: ShouldNotReachHere(); 1750 } 1751 1752 BLOCK_COMMENT("Entry:"); 1753 1754 if (t == T_BYTE) { 1755 // Zero extend value 1756 __ and3(value, 0xff, value); 1757 __ sllx(value, 8, O3); 1758 __ or3(value, O3, value); 1759 } 1760 if (t == T_SHORT) { 1761 // Zero extend value 1762 __ sllx(value, 48, value); 1763 __ srlx(value, 48, value); 1764 } 1765 if (t == T_BYTE || t == T_SHORT) { 1766 __ sllx(value, 16, O3); 1767 __ or3(value, O3, value); 1768 } 1769 1770 __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 1771 __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_elements); // use unsigned cmp 1772 __ delayed()->andcc(count, 1, G0); 1773 1774 if (!aligned && (t == T_BYTE || t == T_SHORT)) { 1775 // align source address at 4 bytes address boundary 1776 if (t == T_BYTE) { 1777 // One byte misalignment happens only for byte arrays 1778 __ andcc(to, 1, G0); 1779 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); 1780 __ delayed()->nop(); 1781 __ stb(value, to, 0); 1782 __ inc(to, 1); 1783 __ dec(count, 1); 1784 __ BIND(L_skip_align1); 1785 } 1786 // Two bytes misalignment happens only for byte and short (char) arrays 1787 __ andcc(to, 2, G0); 1788 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); 1789 __ delayed()->nop(); 1790 __ sth(value, to, 0); 1791 __ inc(to, 2); 1792 __ dec(count, 1 << (shift - 1)); 1793 __ BIND(L_skip_align2); 1794 } 1795 #ifdef _LP64 1796 if (!aligned) { 1797 #endif 1798 // align to 8 bytes, we know we are 4 byte aligned to start 1799 __ andcc(to, 7, G0); 1800 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes); 1801 __ delayed()->nop(); 1802 __ stw(value, to, 0); 1803 __ inc(to, 4); 1804 __ dec(count, 1 << shift); 1805 __ BIND(L_fill_32_bytes); 1806 #ifdef _LP64 1807 } 1808 #endif 1809 1810 if (t == T_INT) { 1811 // Zero extend value 1812 __ srl(value, 0, value); 1813 } 1814 if (t == T_BYTE || t == T_SHORT || t == T_INT) { 1815 __ sllx(value, 32, O3); 1816 __ or3(value, O3, value); 1817 } 1818 1819 Label L_check_fill_8_bytes; 1820 // Fill 32-byte chunks 1821 __ subcc(count, 8 << shift, count); 1822 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); 1823 __ delayed()->nop(); 1824 1825 Label L_fill_32_bytes_loop, L_fill_4_bytes; 1826 __ align(16); 1827 __ BIND(L_fill_32_bytes_loop); 1828 1829 __ stx(value, to, 0); 1830 __ stx(value, to, 8); 1831 __ stx(value, to, 16); 1832 __ stx(value, to, 24); 1833 1834 __ subcc(count, 8 << shift, count); 1835 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop); 1836 __ delayed()->add(to, 32, to); 1837 1838 __ BIND(L_check_fill_8_bytes); 1839 __ addcc(count, 8 << shift, count); 1840 __ brx(Assembler::zero, false, Assembler::pn, L_exit); 1841 __ delayed()->subcc(count, 1 << (shift + 1), count); 1842 __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes); 1843 __ delayed()->andcc(count, 1<<shift, G0); 1844 1845 // 1846 // length is too short, just fill 8 bytes at a time 1847 // 1848 Label L_fill_8_bytes_loop; 1849 __ BIND(L_fill_8_bytes_loop); 1850 __ stx(value, to, 0); 1851 __ subcc(count, 1 << (shift + 1), count); 1852 __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop); 1853 __ delayed()->add(to, 8, to); 1854 1855 // fill trailing 4 bytes 1856 __ andcc(count, 1<<shift, G0); // in delay slot of branches 1857 if (t == T_INT) { 1858 __ BIND(L_fill_elements); 1859 } 1860 __ BIND(L_fill_4_bytes); 1861 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes); 1862 if (t == T_BYTE || t == T_SHORT) { 1863 __ delayed()->andcc(count, 1<<(shift-1), G0); 1864 } else { 1865 __ delayed()->nop(); 1866 } 1867 __ stw(value, to, 0); 1868 if (t == T_BYTE || t == T_SHORT) { 1869 __ inc(to, 4); 1870 // fill trailing 2 bytes 1871 __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches 1872 __ BIND(L_fill_2_bytes); 1873 __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte); 1874 __ delayed()->andcc(count, 1, count); 1875 __ sth(value, to, 0); 1876 if (t == T_BYTE) { 1877 __ inc(to, 2); 1878 // fill trailing byte 1879 __ andcc(count, 1, count); // in delay slot of branches 1880 __ BIND(L_fill_byte); 1881 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1882 __ delayed()->nop(); 1883 __ stb(value, to, 0); 1884 } else { 1885 __ BIND(L_fill_byte); 1886 } 1887 } else { 1888 __ BIND(L_fill_2_bytes); 1889 } 1890 __ BIND(L_exit); 1891 __ retl(); 1892 __ delayed()->nop(); 1893 1894 // Handle copies less than 8 bytes. Int is handled elsewhere. 1895 if (t == T_BYTE) { 1896 __ BIND(L_fill_elements); 1897 Label L_fill_2, L_fill_4; 1898 // in delay slot __ andcc(count, 1, G0); 1899 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); 1900 __ delayed()->andcc(count, 2, G0); 1901 __ stb(value, to, 0); 1902 __ inc(to, 1); 1903 __ BIND(L_fill_2); 1904 __ brx(Assembler::zero, false, Assembler::pt, L_fill_4); 1905 __ delayed()->andcc(count, 4, G0); 1906 __ stb(value, to, 0); 1907 __ stb(value, to, 1); 1908 __ inc(to, 2); 1909 __ BIND(L_fill_4); 1910 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1911 __ delayed()->nop(); 1912 __ stb(value, to, 0); 1913 __ stb(value, to, 1); 1914 __ stb(value, to, 2); 1915 __ retl(); 1916 __ delayed()->stb(value, to, 3); 1917 } 1918 1919 if (t == T_SHORT) { 1920 Label L_fill_2; 1921 __ BIND(L_fill_elements); 1922 // in delay slot __ andcc(count, 1, G0); 1923 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); 1924 __ delayed()->andcc(count, 2, G0); 1925 __ sth(value, to, 0); 1926 __ inc(to, 2); 1927 __ BIND(L_fill_2); 1928 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1929 __ delayed()->nop(); 1930 __ sth(value, to, 0); 1931 __ retl(); 1932 __ delayed()->sth(value, to, 2); 1933 } 1934 return start; 1935 } 1936 1937 // 1938 // Generate stub for conjoint short copy. If "aligned" is true, the 1939 // "from" and "to" addresses are assumed to be heapword aligned. 1940 // 1941 // Arguments for generated stub: 1942 // from: O0 1943 // to: O1 1944 // count: O2 treated as signed 1945 // 1946 address generate_conjoint_short_copy(bool aligned, address nooverlap_target, 1947 address *entry, const char *name) { 1948 // Do reverse copy. 1949 1950 __ align(CodeEntryAlignment); 1951 StubCodeMark mark(this, "StubRoutines", name); 1952 address start = __ pc(); 1953 1954 Label L_skip_alignment, L_skip_alignment2, L_aligned_copy; 1955 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; 1956 1957 const Register from = O0; // source array address 1958 const Register to = O1; // destination array address 1959 const Register count = O2; // elements count 1960 const Register end_from = from; // source array end address 1961 const Register end_to = to; // destination array end address 1962 1963 const Register byte_count = O3; // bytes count to copy 1964 1965 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1966 1967 if (entry != NULL) { 1968 *entry = __ pc(); 1969 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1970 BLOCK_COMMENT("Entry:"); 1971 } 1972 1973 array_overlap_test(nooverlap_target, 1); 1974 1975 __ sllx(count, LogBytesPerShort, byte_count); 1976 __ add(to, byte_count, end_to); // offset after last copied element 1977 1978 // for short arrays, just do single element copy 1979 __ cmp(count, 11); // 8 + 3 (22 bytes) 1980 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1981 __ delayed()->add(from, byte_count, end_from); 1982 1983 { 1984 // Align end of arrays since they could be not aligned even 1985 // when arrays itself are aligned. 1986 1987 // copy 1 element if necessary to align 'end_to' on an 4 bytes 1988 __ andcc(end_to, 3, G0); 1989 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1990 __ delayed()->lduh(end_from, -2, O3); 1991 __ dec(end_from, 2); 1992 __ dec(end_to, 2); 1993 __ dec(count); 1994 __ sth(O3, end_to, 0); 1995 __ BIND(L_skip_alignment); 1996 1997 // copy 2 elements to align 'end_to' on an 8 byte boundary 1998 __ andcc(end_to, 7, G0); 1999 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 2000 __ delayed()->lduh(end_from, -2, O3); 2001 __ dec(count, 2); 2002 __ lduh(end_from, -4, O4); 2003 __ dec(end_from, 4); 2004 __ dec(end_to, 4); 2005 __ sth(O3, end_to, 2); 2006 __ sth(O4, end_to, 0); 2007 __ BIND(L_skip_alignment2); 2008 } 2009 #ifdef _LP64 2010 if (aligned) { 2011 // Both arrays are aligned to 8-bytes in 64-bits VM. 2012 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 2013 // in unaligned case. 2014 __ dec(count, 8); 2015 } else 2016 #endif 2017 { 2018 // Copy with shift 16 bytes per iteration if arrays do not have 2019 // the same alignment mod 8, otherwise jump to the next 2020 // code for aligned copy (and substracting 8 from 'count' before jump). 2021 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 2022 // Also jump over aligned copy after the copy with shift completed. 2023 2024 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, 2025 L_aligned_copy, L_copy_2_bytes); 2026 } 2027 // copy 4 elements (16 bytes) at a time 2028 __ align(OptoLoopAlignment); 2029 __ BIND(L_aligned_copy); 2030 __ dec(end_from, 16); 2031 __ ldx(end_from, 8, O3); 2032 __ ldx(end_from, 0, O4); 2033 __ dec(end_to, 16); 2034 __ deccc(count, 8); 2035 __ stx(O3, end_to, 8); 2036 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 2037 __ delayed()->stx(O4, end_to, 0); 2038 __ inc(count, 8); 2039 2040 // copy 1 element (2 bytes) at a time 2041 __ BIND(L_copy_2_bytes); 2042 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 2043 __ BIND(L_copy_2_bytes_loop); 2044 __ dec(end_from, 2); 2045 __ dec(end_to, 2); 2046 __ lduh(end_from, 0, O4); 2047 __ deccc(count); 2048 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); 2049 __ delayed()->sth(O4, end_to, 0); 2050 2051 __ BIND(L_exit); 2052 // O3, O4 are used as temp registers 2053 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); 2054 __ retl(); 2055 __ delayed()->mov(G0, O0); // return 0 2056 return start; 2057 } 2058 2059 // 2060 // Helper methods for generate_disjoint_int_copy_core() 2061 // 2062 void copy_16_bytes_loop(Register from, Register to, Register count, int count_dec, 2063 Label& L_loop, bool use_prefetch, bool use_bis) { 2064 2065 __ align(OptoLoopAlignment); 2066 __ BIND(L_loop); 2067 if (use_prefetch) { 2068 if (ArraycopySrcPrefetchDistance > 0) { 2069 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); 2070 } 2071 if (ArraycopyDstPrefetchDistance > 0) { 2072 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); 2073 } 2074 } 2075 __ ldx(from, 4, O4); 2076 __ ldx(from, 12, G4); 2077 __ inc(to, 16); 2078 __ inc(from, 16); 2079 __ deccc(count, 4); // Can we do next iteration after this one? 2080 2081 __ srlx(O4, 32, G3); 2082 __ bset(G3, O3); 2083 __ sllx(O4, 32, O4); 2084 __ srlx(G4, 32, G3); 2085 __ bset(G3, O4); 2086 if (use_bis) { 2087 __ stxa(O3, to, -16); 2088 __ stxa(O4, to, -8); 2089 } else { 2090 __ stx(O3, to, -16); 2091 __ stx(O4, to, -8); 2092 } 2093 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 2094 __ delayed()->sllx(G4, 32, O3); 2095 2096 } 2097 2098 // 2099 // Generate core code for disjoint int copy (and oop copy on 32-bit). 2100 // If "aligned" is true, the "from" and "to" addresses are assumed 2101 // to be heapword aligned. 2102 // 2103 // Arguments: 2104 // from: O0 2105 // to: O1 2106 // count: O2 treated as signed 2107 // 2108 void generate_disjoint_int_copy_core(bool aligned) { 2109 2110 Label L_skip_alignment, L_aligned_copy; 2111 Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 2112 2113 const Register from = O0; // source array address 2114 const Register to = O1; // destination array address 2115 const Register count = O2; // elements count 2116 const Register offset = O5; // offset from start of arrays 2117 // O3, O4, G3, G4 are used as temp registers 2118 2119 // 'aligned' == true when it is known statically during compilation 2120 // of this arraycopy call site that both 'from' and 'to' addresses 2121 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 2122 // 2123 // Aligned arrays have 4 bytes alignment in 32-bits VM 2124 // and 8 bytes - in 64-bits VM. 2125 // 2126 #ifdef _LP64 2127 if (!aligned) 2128 #endif 2129 { 2130 // The next check could be put under 'ifndef' since the code in 2131 // generate_disjoint_long_copy_core() has own checks and set 'offset'. 2132 2133 // for short arrays, just do single element copy 2134 __ cmp(count, 5); // 4 + 1 (20 bytes) 2135 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 2136 __ delayed()->mov(G0, offset); 2137 2138 // copy 1 element to align 'to' on an 8 byte boundary 2139 __ andcc(to, 7, G0); 2140 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 2141 __ delayed()->ld(from, 0, O3); 2142 __ inc(from, 4); 2143 __ inc(to, 4); 2144 __ dec(count); 2145 __ st(O3, to, -4); 2146 __ BIND(L_skip_alignment); 2147 2148 // if arrays have same alignment mod 8, do 4 elements copy 2149 __ andcc(from, 7, G0); 2150 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 2151 __ delayed()->ld(from, 0, O3); 2152 2153 // 2154 // Load 2 aligned 8-bytes chunks and use one from previous iteration 2155 // to form 2 aligned 8-bytes chunks to store. 2156 // 2157 // copy_16_bytes_forward_with_shift() is not used here since this 2158 // code is more optimal. 2159 2160 // copy with shift 4 elements (16 bytes) at a time 2161 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 2162 __ sllx(O3, 32, O3); 2163 2164 disjoint_copy_core(from, to, count, 2, 16, copy_16_bytes_loop); 2165 2166 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 2167 __ delayed()->inc(count, 4); // restore 'count' 2168 2169 __ BIND(L_aligned_copy); 2170 } // !aligned 2171 2172 // copy 4 elements (16 bytes) at a time 2173 __ and3(count, 1, G4); // Save 2174 __ srl(count, 1, count); 2175 generate_disjoint_long_copy_core(aligned); 2176 __ mov(G4, count); // Restore 2177 2178 // copy 1 element at a time 2179 __ BIND(L_copy_4_bytes); 2180 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 2181 __ BIND(L_copy_4_bytes_loop); 2182 __ ld(from, offset, O3); 2183 __ deccc(count); 2184 __ st(O3, to, offset); 2185 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop); 2186 __ delayed()->inc(offset, 4); 2187 __ BIND(L_exit); 2188 } 2189 2190 // 2191 // Generate stub for disjoint int copy. If "aligned" is true, the 2192 // "from" and "to" addresses are assumed to be heapword aligned. 2193 // 2194 // Arguments for generated stub: 2195 // from: O0 2196 // to: O1 2197 // count: O2 treated as signed 2198 // 2199 address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) { 2200 __ align(CodeEntryAlignment); 2201 StubCodeMark mark(this, "StubRoutines", name); 2202 address start = __ pc(); 2203 2204 const Register count = O2; 2205 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2206 2207 if (entry != NULL) { 2208 *entry = __ pc(); 2209 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2210 BLOCK_COMMENT("Entry:"); 2211 } 2212 2213 generate_disjoint_int_copy_core(aligned); 2214 2215 // O3, O4 are used as temp registers 2216 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 2217 __ retl(); 2218 __ delayed()->mov(G0, O0); // return 0 2219 return start; 2220 } 2221 2222 // 2223 // Generate core code for conjoint int copy (and oop copy on 32-bit). 2224 // If "aligned" is true, the "from" and "to" addresses are assumed 2225 // to be heapword aligned. 2226 // 2227 // Arguments: 2228 // from: O0 2229 // to: O1 2230 // count: O2 treated as signed 2231 // 2232 void generate_conjoint_int_copy_core(bool aligned) { 2233 // Do reverse copy. 2234 2235 Label L_skip_alignment, L_aligned_copy; 2236 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 2237 2238 const Register from = O0; // source array address 2239 const Register to = O1; // destination array address 2240 const Register count = O2; // elements count 2241 const Register end_from = from; // source array end address 2242 const Register end_to = to; // destination array end address 2243 // O3, O4, O5, G3 are used as temp registers 2244 2245 const Register byte_count = O3; // bytes count to copy 2246 2247 __ sllx(count, LogBytesPerInt, byte_count); 2248 __ add(to, byte_count, end_to); // offset after last copied element 2249 2250 __ cmp(count, 5); // for short arrays, just do single element copy 2251 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 2252 __ delayed()->add(from, byte_count, end_from); 2253 2254 // copy 1 element to align 'to' on an 8 byte boundary 2255 __ andcc(end_to, 7, G0); 2256 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 2257 __ delayed()->nop(); 2258 __ dec(count); 2259 __ dec(end_from, 4); 2260 __ dec(end_to, 4); 2261 __ ld(end_from, 0, O4); 2262 __ st(O4, end_to, 0); 2263 __ BIND(L_skip_alignment); 2264 2265 // Check if 'end_from' and 'end_to' has the same alignment. 2266 __ andcc(end_from, 7, G0); 2267 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 2268 __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4 2269 2270 // copy with shift 4 elements (16 bytes) at a time 2271 // 2272 // Load 2 aligned 8-bytes chunks and use one from previous iteration 2273 // to form 2 aligned 8-bytes chunks to store. 2274 // 2275 __ ldx(end_from, -4, O3); 2276 __ align(OptoLoopAlignment); 2277 __ BIND(L_copy_16_bytes); 2278 __ ldx(end_from, -12, O4); 2279 __ deccc(count, 4); 2280 __ ldx(end_from, -20, O5); 2281 __ dec(end_to, 16); 2282 __ dec(end_from, 16); 2283 __ srlx(O3, 32, O3); 2284 __ sllx(O4, 32, G3); 2285 __ bset(G3, O3); 2286 __ stx(O3, end_to, 8); 2287 __ srlx(O4, 32, O4); 2288 __ sllx(O5, 32, G3); 2289 __ bset(O4, G3); 2290 __ stx(G3, end_to, 0); 2291 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 2292 __ delayed()->mov(O5, O3); 2293 2294 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 2295 __ delayed()->inc(count, 4); 2296 2297 // copy 4 elements (16 bytes) at a time 2298 __ align(OptoLoopAlignment); 2299 __ BIND(L_aligned_copy); 2300 __ dec(end_from, 16); 2301 __ ldx(end_from, 8, O3); 2302 __ ldx(end_from, 0, O4); 2303 __ dec(end_to, 16); 2304 __ deccc(count, 4); 2305 __ stx(O3, end_to, 8); 2306 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 2307 __ delayed()->stx(O4, end_to, 0); 2308 __ inc(count, 4); 2309 2310 // copy 1 element (4 bytes) at a time 2311 __ BIND(L_copy_4_bytes); 2312 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 2313 __ BIND(L_copy_4_bytes_loop); 2314 __ dec(end_from, 4); 2315 __ dec(end_to, 4); 2316 __ ld(end_from, 0, O4); 2317 __ deccc(count); 2318 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop); 2319 __ delayed()->st(O4, end_to, 0); 2320 __ BIND(L_exit); 2321 } 2322 2323 // 2324 // Generate stub for conjoint int copy. If "aligned" is true, the 2325 // "from" and "to" addresses are assumed to be heapword aligned. 2326 // 2327 // Arguments for generated stub: 2328 // from: O0 2329 // to: O1 2330 // count: O2 treated as signed 2331 // 2332 address generate_conjoint_int_copy(bool aligned, address nooverlap_target, 2333 address *entry, const char *name) { 2334 __ align(CodeEntryAlignment); 2335 StubCodeMark mark(this, "StubRoutines", name); 2336 address start = __ pc(); 2337 2338 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2339 2340 if (entry != NULL) { 2341 *entry = __ pc(); 2342 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2343 BLOCK_COMMENT("Entry:"); 2344 } 2345 2346 array_overlap_test(nooverlap_target, 2); 2347 2348 generate_conjoint_int_copy_core(aligned); 2349 2350 // O3, O4 are used as temp registers 2351 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 2352 __ retl(); 2353 __ delayed()->mov(G0, O0); // return 0 2354 return start; 2355 } 2356 2357 // 2358 // Helper methods for generate_disjoint_long_copy_core() 2359 // 2360 void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec, 2361 Label& L_loop, bool use_prefetch, bool use_bis) { 2362 __ align(OptoLoopAlignment); 2363 __ BIND(L_loop); 2364 for (int off = 0; off < 64; off += 16) { 2365 if (use_prefetch && (off & 31) == 0) { 2366 if (ArraycopySrcPrefetchDistance > 0) { 2367 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); 2368 } 2369 if (ArraycopyDstPrefetchDistance > 0) { 2370 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); 2371 } 2372 } 2373 __ ldx(from, off+0, O4); 2374 __ ldx(from, off+8, O5); 2375 if (use_bis) { 2376 __ stxa(O4, to, off+0); 2377 __ stxa(O5, to, off+8); 2378 } else { 2379 __ stx(O4, to, off+0); 2380 __ stx(O5, to, off+8); 2381 } 2382 } 2383 __ deccc(count, 8); 2384 __ inc(from, 64); 2385 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 2386 __ delayed()->inc(to, 64); 2387 } 2388 2389 // 2390 // Generate core code for disjoint long copy (and oop copy on 64-bit). 2391 // "aligned" is ignored, because we must make the stronger 2392 // assumption that both addresses are always 64-bit aligned. 2393 // 2394 // Arguments: 2395 // from: O0 2396 // to: O1 2397 // count: O2 treated as signed 2398 // 2399 // count -= 2; 2400 // if ( count >= 0 ) { // >= 2 elements 2401 // if ( count > 6) { // >= 8 elements 2402 // count -= 6; // original count - 8 2403 // do { 2404 // copy_8_elements; 2405 // count -= 8; 2406 // } while ( count >= 0 ); 2407 // count += 6; 2408 // } 2409 // if ( count >= 0 ) { // >= 2 elements 2410 // do { 2411 // copy_2_elements; 2412 // } while ( (count=count-2) >= 0 ); 2413 // } 2414 // } 2415 // count += 2; 2416 // if ( count != 0 ) { // 1 element left 2417 // copy_1_element; 2418 // } 2419 // 2420 void generate_disjoint_long_copy_core(bool aligned) { 2421 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; 2422 const Register from = O0; // source array address 2423 const Register to = O1; // destination array address 2424 const Register count = O2; // elements count 2425 const Register offset0 = O4; // element offset 2426 const Register offset8 = O5; // next element offset 2427 2428 __ deccc(count, 2); 2429 __ mov(G0, offset0); // offset from start of arrays (0) 2430 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2431 __ delayed()->add(offset0, 8, offset8); 2432 2433 // Copy by 64 bytes chunks 2434 2435 const Register from64 = O3; // source address 2436 const Register to64 = G3; // destination address 2437 __ subcc(count, 6, O3); 2438 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); 2439 __ delayed()->mov(to, to64); 2440 // Now we can use O4(offset0), O5(offset8) as temps 2441 __ mov(O3, count); 2442 // count >= 0 (original count - 8) 2443 __ mov(from, from64); 2444 2445 disjoint_copy_core(from64, to64, count, 3, 64, copy_64_bytes_loop); 2446 2447 // Restore O4(offset0), O5(offset8) 2448 __ sub(from64, from, offset0); 2449 __ inccc(count, 6); // restore count 2450 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2451 __ delayed()->add(offset0, 8, offset8); 2452 2453 // Copy by 16 bytes chunks 2454 __ align(OptoLoopAlignment); 2455 __ BIND(L_copy_16_bytes); 2456 __ ldx(from, offset0, O3); 2457 __ ldx(from, offset8, G3); 2458 __ deccc(count, 2); 2459 __ stx(O3, to, offset0); 2460 __ inc(offset0, 16); 2461 __ stx(G3, to, offset8); 2462 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 2463 __ delayed()->inc(offset8, 16); 2464 2465 // Copy last 8 bytes 2466 __ BIND(L_copy_8_bytes); 2467 __ inccc(count, 2); 2468 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); 2469 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs 2470 __ ldx(from, offset0, O3); 2471 __ stx(O3, to, offset0); 2472 __ BIND(L_exit); 2473 } 2474 2475 // 2476 // Generate stub for disjoint long copy. 2477 // "aligned" is ignored, because we must make the stronger 2478 // assumption that both addresses are always 64-bit aligned. 2479 // 2480 // Arguments for generated stub: 2481 // from: O0 2482 // to: O1 2483 // count: O2 treated as signed 2484 // 2485 address generate_disjoint_long_copy(bool aligned, address *entry, const char *name) { 2486 __ align(CodeEntryAlignment); 2487 StubCodeMark mark(this, "StubRoutines", name); 2488 address start = __ pc(); 2489 2490 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2491 2492 if (entry != NULL) { 2493 *entry = __ pc(); 2494 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2495 BLOCK_COMMENT("Entry:"); 2496 } 2497 2498 generate_disjoint_long_copy_core(aligned); 2499 2500 // O3, O4 are used as temp registers 2501 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); 2502 __ retl(); 2503 __ delayed()->mov(G0, O0); // return 0 2504 return start; 2505 } 2506 2507 // 2508 // Generate core code for conjoint long copy (and oop copy on 64-bit). 2509 // "aligned" is ignored, because we must make the stronger 2510 // assumption that both addresses are always 64-bit aligned. 2511 // 2512 // Arguments: 2513 // from: O0 2514 // to: O1 2515 // count: O2 treated as signed 2516 // 2517 void generate_conjoint_long_copy_core(bool aligned) { 2518 // Do reverse copy. 2519 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; 2520 const Register from = O0; // source array address 2521 const Register to = O1; // destination array address 2522 const Register count = O2; // elements count 2523 const Register offset8 = O4; // element offset 2524 const Register offset0 = O5; // previous element offset 2525 2526 __ subcc(count, 1, count); 2527 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes ); 2528 __ delayed()->sllx(count, LogBytesPerLong, offset8); 2529 __ sub(offset8, 8, offset0); 2530 __ align(OptoLoopAlignment); 2531 __ BIND(L_copy_16_bytes); 2532 __ ldx(from, offset8, O2); 2533 __ ldx(from, offset0, O3); 2534 __ stx(O2, to, offset8); 2535 __ deccc(offset8, 16); // use offset8 as counter 2536 __ stx(O3, to, offset0); 2537 __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes); 2538 __ delayed()->dec(offset0, 16); 2539 2540 __ BIND(L_copy_8_bytes); 2541 __ brx(Assembler::negative, false, Assembler::pn, L_exit ); 2542 __ delayed()->nop(); 2543 __ ldx(from, 0, O3); 2544 __ stx(O3, to, 0); 2545 __ BIND(L_exit); 2546 } 2547 2548 // Generate stub for conjoint long copy. 2549 // "aligned" is ignored, because we must make the stronger 2550 // assumption that both addresses are always 64-bit aligned. 2551 // 2552 // Arguments for generated stub: 2553 // from: O0 2554 // to: O1 2555 // count: O2 treated as signed 2556 // 2557 address generate_conjoint_long_copy(bool aligned, address nooverlap_target, 2558 address *entry, const char *name) { 2559 __ align(CodeEntryAlignment); 2560 StubCodeMark mark(this, "StubRoutines", name); 2561 address start = __ pc(); 2562 2563 assert(aligned, "Should always be aligned"); 2564 2565 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2566 2567 if (entry != NULL) { 2568 *entry = __ pc(); 2569 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2570 BLOCK_COMMENT("Entry:"); 2571 } 2572 2573 array_overlap_test(nooverlap_target, 3); 2574 2575 generate_conjoint_long_copy_core(aligned); 2576 2577 // O3, O4 are used as temp registers 2578 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); 2579 __ retl(); 2580 __ delayed()->mov(G0, O0); // return 0 2581 return start; 2582 } 2583 2584 // Generate stub for disjoint oop copy. If "aligned" is true, the 2585 // "from" and "to" addresses are assumed to be heapword aligned. 2586 // 2587 // Arguments for generated stub: 2588 // from: O0 2589 // to: O1 2590 // count: O2 treated as signed 2591 // 2592 address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name, 2593 bool dest_uninitialized = false) { 2594 2595 const Register from = O0; // source array address 2596 const Register to = O1; // destination array address 2597 const Register count = O2; // elements count 2598 2599 __ align(CodeEntryAlignment); 2600 StubCodeMark mark(this, "StubRoutines", name); 2601 address start = __ pc(); 2602 2603 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2604 2605 if (entry != NULL) { 2606 *entry = __ pc(); 2607 // caller can pass a 64-bit byte count here 2608 BLOCK_COMMENT("Entry:"); 2609 } 2610 2611 // save arguments for barrier generation 2612 __ mov(to, G1); 2613 __ mov(count, G5); 2614 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized); 2615 #ifdef _LP64 2616 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2617 if (UseCompressedOops) { 2618 generate_disjoint_int_copy_core(aligned); 2619 } else { 2620 generate_disjoint_long_copy_core(aligned); 2621 } 2622 #else 2623 generate_disjoint_int_copy_core(aligned); 2624 #endif 2625 // O0 is used as temp register 2626 gen_write_ref_array_post_barrier(G1, G5, O0); 2627 2628 // O3, O4 are used as temp registers 2629 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2630 __ retl(); 2631 __ delayed()->mov(G0, O0); // return 0 2632 return start; 2633 } 2634 2635 // Generate stub for conjoint oop copy. If "aligned" is true, the 2636 // "from" and "to" addresses are assumed to be heapword aligned. 2637 // 2638 // Arguments for generated stub: 2639 // from: O0 2640 // to: O1 2641 // count: O2 treated as signed 2642 // 2643 address generate_conjoint_oop_copy(bool aligned, address nooverlap_target, 2644 address *entry, const char *name, 2645 bool dest_uninitialized = false) { 2646 2647 const Register from = O0; // source array address 2648 const Register to = O1; // destination array address 2649 const Register count = O2; // elements count 2650 2651 __ align(CodeEntryAlignment); 2652 StubCodeMark mark(this, "StubRoutines", name); 2653 address start = __ pc(); 2654 2655 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2656 2657 if (entry != NULL) { 2658 *entry = __ pc(); 2659 // caller can pass a 64-bit byte count here 2660 BLOCK_COMMENT("Entry:"); 2661 } 2662 2663 array_overlap_test(nooverlap_target, LogBytesPerHeapOop); 2664 2665 // save arguments for barrier generation 2666 __ mov(to, G1); 2667 __ mov(count, G5); 2668 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized); 2669 2670 #ifdef _LP64 2671 if (UseCompressedOops) { 2672 generate_conjoint_int_copy_core(aligned); 2673 } else { 2674 generate_conjoint_long_copy_core(aligned); 2675 } 2676 #else 2677 generate_conjoint_int_copy_core(aligned); 2678 #endif 2679 2680 // O0 is used as temp register 2681 gen_write_ref_array_post_barrier(G1, G5, O0); 2682 2683 // O3, O4 are used as temp registers 2684 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2685 __ retl(); 2686 __ delayed()->mov(G0, O0); // return 0 2687 return start; 2688 } 2689 2690 2691 // Helper for generating a dynamic type check. 2692 // Smashes only the given temp registers. 2693 void generate_type_check(Register sub_klass, 2694 Register super_check_offset, 2695 Register super_klass, 2696 Register temp, 2697 Label& L_success) { 2698 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); 2699 2700 BLOCK_COMMENT("type_check:"); 2701 2702 Label L_miss, L_pop_to_miss; 2703 2704 assert_clean_int(super_check_offset, temp); 2705 2706 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, 2707 &L_success, &L_miss, NULL, 2708 super_check_offset); 2709 2710 BLOCK_COMMENT("type_check_slow_path:"); 2711 __ save_frame(0); 2712 __ check_klass_subtype_slow_path(sub_klass->after_save(), 2713 super_klass->after_save(), 2714 L0, L1, L2, L4, 2715 NULL, &L_pop_to_miss); 2716 __ ba(L_success); 2717 __ delayed()->restore(); 2718 2719 __ bind(L_pop_to_miss); 2720 __ restore(); 2721 2722 // Fall through on failure! 2723 __ BIND(L_miss); 2724 } 2725 2726 2727 // Generate stub for checked oop copy. 2728 // 2729 // Arguments for generated stub: 2730 // from: O0 2731 // to: O1 2732 // count: O2 treated as signed 2733 // ckoff: O3 (super_check_offset) 2734 // ckval: O4 (super_klass) 2735 // ret: O0 zero for success; (-1^K) where K is partial transfer count 2736 // 2737 address generate_checkcast_copy(const char *name, address *entry, bool dest_uninitialized = false) { 2738 2739 const Register O0_from = O0; // source array address 2740 const Register O1_to = O1; // destination array address 2741 const Register O2_count = O2; // elements count 2742 const Register O3_ckoff = O3; // super_check_offset 2743 const Register O4_ckval = O4; // super_klass 2744 2745 const Register O5_offset = O5; // loop var, with stride wordSize 2746 const Register G1_remain = G1; // loop var, with stride -1 2747 const Register G3_oop = G3; // actual oop copied 2748 const Register G4_klass = G4; // oop._klass 2749 const Register G5_super = G5; // oop._klass._primary_supers[ckval] 2750 2751 __ align(CodeEntryAlignment); 2752 StubCodeMark mark(this, "StubRoutines", name); 2753 address start = __ pc(); 2754 2755 #ifdef ASSERT 2756 // We sometimes save a frame (see generate_type_check below). 2757 // If this will cause trouble, let's fail now instead of later. 2758 __ save_frame(0); 2759 __ restore(); 2760 #endif 2761 2762 assert_clean_int(O2_count, G1); // Make sure 'count' is clean int. 2763 2764 #ifdef ASSERT 2765 // caller guarantees that the arrays really are different 2766 // otherwise, we would have to make conjoint checks 2767 { Label L; 2768 __ mov(O3, G1); // spill: overlap test smashes O3 2769 __ mov(O4, G4); // spill: overlap test smashes O4 2770 array_overlap_test(L, LogBytesPerHeapOop); 2771 __ stop("checkcast_copy within a single array"); 2772 __ bind(L); 2773 __ mov(G1, O3); 2774 __ mov(G4, O4); 2775 } 2776 #endif //ASSERT 2777 2778 if (entry != NULL) { 2779 *entry = __ pc(); 2780 // caller can pass a 64-bit byte count here (from generic stub) 2781 BLOCK_COMMENT("Entry:"); 2782 } 2783 gen_write_ref_array_pre_barrier(O1_to, O2_count, dest_uninitialized); 2784 2785 Label load_element, store_element, do_card_marks, fail, done; 2786 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it 2787 __ brx(Assembler::notZero, false, Assembler::pt, load_element); 2788 __ delayed()->mov(G0, O5_offset); // offset from start of arrays 2789 2790 // Empty array: Nothing to do. 2791 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 2792 __ retl(); 2793 __ delayed()->set(0, O0); // return 0 on (trivial) success 2794 2795 // ======== begin loop ======== 2796 // (Loop is rotated; its entry is load_element.) 2797 // Loop variables: 2798 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays 2799 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* 2800 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super 2801 __ align(OptoLoopAlignment); 2802 2803 __ BIND(store_element); 2804 __ deccc(G1_remain); // decrement the count 2805 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop 2806 __ inc(O5_offset, heapOopSize); // step to next offset 2807 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); 2808 __ delayed()->set(0, O0); // return -1 on success 2809 2810 // ======== loop entry is here ======== 2811 __ BIND(load_element); 2812 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop 2813 __ br_null_short(G3_oop, Assembler::pt, store_element); 2814 2815 __ load_klass(G3_oop, G4_klass); // query the object klass 2816 2817 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, 2818 // branch to this on success: 2819 store_element); 2820 // ======== end loop ======== 2821 2822 // It was a real error; we must depend on the caller to finish the job. 2823 // Register G1 has number of *remaining* oops, O2 number of *total* oops. 2824 // Emit GC store barriers for the oops we have copied (O2 minus G1), 2825 // and report their number to the caller. 2826 __ BIND(fail); 2827 __ subcc(O2_count, G1_remain, O2_count); 2828 __ brx(Assembler::zero, false, Assembler::pt, done); 2829 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller 2830 2831 __ BIND(do_card_marks); 2832 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] 2833 2834 __ BIND(done); 2835 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 2836 __ retl(); 2837 __ delayed()->nop(); // return value in 00 2838 2839 return start; 2840 } 2841 2842 2843 // Generate 'unsafe' array copy stub 2844 // Though just as safe as the other stubs, it takes an unscaled 2845 // size_t argument instead of an element count. 2846 // 2847 // Arguments for generated stub: 2848 // from: O0 2849 // to: O1 2850 // count: O2 byte count, treated as ssize_t, can be zero 2851 // 2852 // Examines the alignment of the operands and dispatches 2853 // to a long, int, short, or byte copy loop. 2854 // 2855 address generate_unsafe_copy(const char* name, 2856 address byte_copy_entry, 2857 address short_copy_entry, 2858 address int_copy_entry, 2859 address long_copy_entry) { 2860 2861 const Register O0_from = O0; // source array address 2862 const Register O1_to = O1; // destination array address 2863 const Register O2_count = O2; // elements count 2864 2865 const Register G1_bits = G1; // test copy of low bits 2866 2867 __ align(CodeEntryAlignment); 2868 StubCodeMark mark(this, "StubRoutines", name); 2869 address start = __ pc(); 2870 2871 // bump this on entry, not on exit: 2872 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3); 2873 2874 __ or3(O0_from, O1_to, G1_bits); 2875 __ or3(O2_count, G1_bits, G1_bits); 2876 2877 __ btst(BytesPerLong-1, G1_bits); 2878 __ br(Assembler::zero, true, Assembler::pt, 2879 long_copy_entry, relocInfo::runtime_call_type); 2880 // scale the count on the way out: 2881 __ delayed()->srax(O2_count, LogBytesPerLong, O2_count); 2882 2883 __ btst(BytesPerInt-1, G1_bits); 2884 __ br(Assembler::zero, true, Assembler::pt, 2885 int_copy_entry, relocInfo::runtime_call_type); 2886 // scale the count on the way out: 2887 __ delayed()->srax(O2_count, LogBytesPerInt, O2_count); 2888 2889 __ btst(BytesPerShort-1, G1_bits); 2890 __ br(Assembler::zero, true, Assembler::pt, 2891 short_copy_entry, relocInfo::runtime_call_type); 2892 // scale the count on the way out: 2893 __ delayed()->srax(O2_count, LogBytesPerShort, O2_count); 2894 2895 __ br(Assembler::always, false, Assembler::pt, 2896 byte_copy_entry, relocInfo::runtime_call_type); 2897 __ delayed()->nop(); 2898 2899 return start; 2900 } 2901 2902 2903 // Perform range checks on the proposed arraycopy. 2904 // Kills the two temps, but nothing else. 2905 // Also, clean the sign bits of src_pos and dst_pos. 2906 void arraycopy_range_checks(Register src, // source array oop (O0) 2907 Register src_pos, // source position (O1) 2908 Register dst, // destination array oo (O2) 2909 Register dst_pos, // destination position (O3) 2910 Register length, // length of copy (O4) 2911 Register temp1, Register temp2, 2912 Label& L_failed) { 2913 BLOCK_COMMENT("arraycopy_range_checks:"); 2914 2915 // if (src_pos + length > arrayOop(src)->length() ) FAIL; 2916 2917 const Register array_length = temp1; // scratch 2918 const Register end_pos = temp2; // scratch 2919 2920 // Note: This next instruction may be in the delay slot of a branch: 2921 __ add(length, src_pos, end_pos); // src_pos + length 2922 __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length); 2923 __ cmp(end_pos, array_length); 2924 __ br(Assembler::greater, false, Assembler::pn, L_failed); 2925 2926 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL; 2927 __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length 2928 __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length); 2929 __ cmp(end_pos, array_length); 2930 __ br(Assembler::greater, false, Assembler::pn, L_failed); 2931 2932 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. 2933 // Move with sign extension can be used since they are positive. 2934 __ delayed()->signx(src_pos, src_pos); 2935 __ signx(dst_pos, dst_pos); 2936 2937 BLOCK_COMMENT("arraycopy_range_checks done"); 2938 } 2939 2940 2941 // 2942 // Generate generic array copy stubs 2943 // 2944 // Input: 2945 // O0 - src oop 2946 // O1 - src_pos 2947 // O2 - dst oop 2948 // O3 - dst_pos 2949 // O4 - element count 2950 // 2951 // Output: 2952 // O0 == 0 - success 2953 // O0 == -1 - need to call System.arraycopy 2954 // 2955 address generate_generic_copy(const char *name, 2956 address entry_jbyte_arraycopy, 2957 address entry_jshort_arraycopy, 2958 address entry_jint_arraycopy, 2959 address entry_oop_arraycopy, 2960 address entry_jlong_arraycopy, 2961 address entry_checkcast_arraycopy) { 2962 Label L_failed, L_objArray; 2963 2964 // Input registers 2965 const Register src = O0; // source array oop 2966 const Register src_pos = O1; // source position 2967 const Register dst = O2; // destination array oop 2968 const Register dst_pos = O3; // destination position 2969 const Register length = O4; // elements count 2970 2971 // registers used as temp 2972 const Register G3_src_klass = G3; // source array klass 2973 const Register G4_dst_klass = G4; // destination array klass 2974 const Register G5_lh = G5; // layout handler 2975 const Register O5_temp = O5; 2976 2977 __ align(CodeEntryAlignment); 2978 StubCodeMark mark(this, "StubRoutines", name); 2979 address start = __ pc(); 2980 2981 // bump this on entry, not on exit: 2982 inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3); 2983 2984 // In principle, the int arguments could be dirty. 2985 //assert_clean_int(src_pos, G1); 2986 //assert_clean_int(dst_pos, G1); 2987 //assert_clean_int(length, G1); 2988 2989 //----------------------------------------------------------------------- 2990 // Assembler stubs will be used for this call to arraycopy 2991 // if the following conditions are met: 2992 // 2993 // (1) src and dst must not be null. 2994 // (2) src_pos must not be negative. 2995 // (3) dst_pos must not be negative. 2996 // (4) length must not be negative. 2997 // (5) src klass and dst klass should be the same and not NULL. 2998 // (6) src and dst should be arrays. 2999 // (7) src_pos + length must not exceed length of src. 3000 // (8) dst_pos + length must not exceed length of dst. 3001 BLOCK_COMMENT("arraycopy initial argument checks"); 3002 3003 // if (src == NULL) return -1; 3004 __ br_null(src, false, Assembler::pn, L_failed); 3005 3006 // if (src_pos < 0) return -1; 3007 __ delayed()->tst(src_pos); 3008 __ br(Assembler::negative, false, Assembler::pn, L_failed); 3009 __ delayed()->nop(); 3010 3011 // if (dst == NULL) return -1; 3012 __ br_null(dst, false, Assembler::pn, L_failed); 3013 3014 // if (dst_pos < 0) return -1; 3015 __ delayed()->tst(dst_pos); 3016 __ br(Assembler::negative, false, Assembler::pn, L_failed); 3017 3018 // if (length < 0) return -1; 3019 __ delayed()->tst(length); 3020 __ br(Assembler::negative, false, Assembler::pn, L_failed); 3021 3022 BLOCK_COMMENT("arraycopy argument klass checks"); 3023 // get src->klass() 3024 if (UseCompressedOops) { 3025 __ delayed()->nop(); // ??? not good 3026 __ load_klass(src, G3_src_klass); 3027 } else { 3028 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass); 3029 } 3030 3031 #ifdef ASSERT 3032 // assert(src->klass() != NULL); 3033 BLOCK_COMMENT("assert klasses not null"); 3034 { Label L_a, L_b; 3035 __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL 3036 __ bind(L_a); 3037 __ stop("broken null klass"); 3038 __ bind(L_b); 3039 __ load_klass(dst, G4_dst_klass); 3040 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also 3041 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp 3042 BLOCK_COMMENT("assert done"); 3043 } 3044 #endif 3045 3046 // Load layout helper 3047 // 3048 // |array_tag| | header_size | element_type | |log2_element_size| 3049 // 32 30 24 16 8 2 0 3050 // 3051 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 3052 // 3053 3054 int lh_offset = klassOopDesc::header_size() * HeapWordSize + 3055 Klass::layout_helper_offset_in_bytes(); 3056 3057 // Load 32-bits signed value. Use br() instruction with it to check icc. 3058 __ lduw(G3_src_klass, lh_offset, G5_lh); 3059 3060 if (UseCompressedOops) { 3061 __ load_klass(dst, G4_dst_klass); 3062 } 3063 // Handle objArrays completely differently... 3064 juint objArray_lh = Klass::array_layout_helper(T_OBJECT); 3065 __ set(objArray_lh, O5_temp); 3066 __ cmp(G5_lh, O5_temp); 3067 __ br(Assembler::equal, false, Assembler::pt, L_objArray); 3068 if (UseCompressedOops) { 3069 __ delayed()->nop(); 3070 } else { 3071 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); 3072 } 3073 3074 // if (src->klass() != dst->klass()) return -1; 3075 __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed); 3076 3077 // if (!src->is_Array()) return -1; 3078 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 3079 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed); 3080 3081 // At this point, it is known to be a typeArray (array_tag 0x3). 3082 #ifdef ASSERT 3083 __ delayed()->nop(); 3084 { Label L; 3085 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); 3086 __ set(lh_prim_tag_in_place, O5_temp); 3087 __ cmp(G5_lh, O5_temp); 3088 __ br(Assembler::greaterEqual, false, Assembler::pt, L); 3089 __ delayed()->nop(); 3090 __ stop("must be a primitive array"); 3091 __ bind(L); 3092 } 3093 #else 3094 __ delayed(); // match next insn to prev branch 3095 #endif 3096 3097 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 3098 O5_temp, G4_dst_klass, L_failed); 3099 3100 // typeArrayKlass 3101 // 3102 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); 3103 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); 3104 // 3105 3106 const Register G4_offset = G4_dst_klass; // array offset 3107 const Register G3_elsize = G3_src_klass; // log2 element size 3108 3109 __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset); 3110 __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset 3111 __ add(src, G4_offset, src); // src array offset 3112 __ add(dst, G4_offset, dst); // dst array offset 3113 __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size 3114 3115 // next registers should be set before the jump to corresponding stub 3116 const Register from = O0; // source array address 3117 const Register to = O1; // destination array address 3118 const Register count = O2; // elements count 3119 3120 // 'from', 'to', 'count' registers should be set in this order 3121 // since they are the same as 'src', 'src_pos', 'dst'. 3122 3123 BLOCK_COMMENT("scale indexes to element size"); 3124 __ sll_ptr(src_pos, G3_elsize, src_pos); 3125 __ sll_ptr(dst_pos, G3_elsize, dst_pos); 3126 __ add(src, src_pos, from); // src_addr 3127 __ add(dst, dst_pos, to); // dst_addr 3128 3129 BLOCK_COMMENT("choose copy loop based on element size"); 3130 __ cmp(G3_elsize, 0); 3131 __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy); 3132 __ delayed()->signx(length, count); // length 3133 3134 __ cmp(G3_elsize, LogBytesPerShort); 3135 __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy); 3136 __ delayed()->signx(length, count); // length 3137 3138 __ cmp(G3_elsize, LogBytesPerInt); 3139 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy); 3140 __ delayed()->signx(length, count); // length 3141 #ifdef ASSERT 3142 { Label L; 3143 __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L); 3144 __ stop("must be long copy, but elsize is wrong"); 3145 __ bind(L); 3146 } 3147 #endif 3148 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy); 3149 __ delayed()->signx(length, count); // length 3150 3151 // objArrayKlass 3152 __ BIND(L_objArray); 3153 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length 3154 3155 Label L_plain_copy, L_checkcast_copy; 3156 // test array classes for subtyping 3157 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality 3158 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy); 3159 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below 3160 3161 // Identically typed arrays can be copied without element-wise checks. 3162 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 3163 O5_temp, G5_lh, L_failed); 3164 3165 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 3166 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 3167 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); 3168 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); 3169 __ add(src, src_pos, from); // src_addr 3170 __ add(dst, dst_pos, to); // dst_addr 3171 __ BIND(L_plain_copy); 3172 __ br(Assembler::always, false, Assembler::pt, entry_oop_arraycopy); 3173 __ delayed()->signx(length, count); // length 3174 3175 __ BIND(L_checkcast_copy); 3176 // live at this point: G3_src_klass, G4_dst_klass 3177 { 3178 // Before looking at dst.length, make sure dst is also an objArray. 3179 // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot 3180 __ cmp(G5_lh, O5_temp); 3181 __ br(Assembler::notEqual, false, Assembler::pn, L_failed); 3182 3183 // It is safe to examine both src.length and dst.length. 3184 __ delayed(); // match next insn to prev branch 3185 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 3186 O5_temp, G5_lh, L_failed); 3187 3188 // Marshal the base address arguments now, freeing registers. 3189 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 3190 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 3191 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); 3192 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); 3193 __ add(src, src_pos, from); // src_addr 3194 __ add(dst, dst_pos, to); // dst_addr 3195 __ signx(length, count); // length (reloaded) 3196 3197 Register sco_temp = O3; // this register is free now 3198 assert_different_registers(from, to, count, sco_temp, 3199 G4_dst_klass, G3_src_klass); 3200 3201 // Generate the type check. 3202 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 3203 Klass::super_check_offset_offset_in_bytes()); 3204 __ lduw(G4_dst_klass, sco_offset, sco_temp); 3205 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass, 3206 O5_temp, L_plain_copy); 3207 3208 // Fetch destination element klass from the objArrayKlass header. 3209 int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 3210 objArrayKlass::element_klass_offset_in_bytes()); 3211 3212 // the checkcast_copy loop needs two extra arguments: 3213 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass 3214 // lduw(O4, sco_offset, O3); // sco of elem klass 3215 3216 __ br(Assembler::always, false, Assembler::pt, entry_checkcast_arraycopy); 3217 __ delayed()->lduw(O4, sco_offset, O3); 3218 } 3219 3220 __ BIND(L_failed); 3221 __ retl(); 3222 __ delayed()->sub(G0, 1, O0); // return -1 3223 return start; 3224 } 3225 3226 // 3227 // Generate stub for heap zeroing. 3228 // "to" address is aligned to jlong (8 bytes). 3229 // 3230 // Arguments for generated stub: 3231 // to: O0 3232 // count: O1 treated as signed (count of HeapWord) 3233 // count could be 0 3234 // 3235 address generate_zero_aligned_words(const char* name) { 3236 __ align(CodeEntryAlignment); 3237 StubCodeMark mark(this, "StubRoutines", name); 3238 address start = __ pc(); 3239 3240 const Register to = O0; // source array address 3241 const Register count = O1; // HeapWords count 3242 const Register temp = O2; // scratch 3243 3244 Label Ldone; 3245 __ sllx(count, LogHeapWordSize, count); // to bytes count 3246 // Use BIS for zeroing 3247 __ bis_zeroing(to, count, temp, Ldone); 3248 __ bind(Ldone); 3249 __ retl(); 3250 __ delayed()->nop(); 3251 return start; 3252 } 3253 3254 void generate_arraycopy_stubs() { 3255 address entry; 3256 address entry_jbyte_arraycopy; 3257 address entry_jshort_arraycopy; 3258 address entry_jint_arraycopy; 3259 address entry_oop_arraycopy; 3260 address entry_jlong_arraycopy; 3261 address entry_checkcast_arraycopy; 3262 3263 //*** jbyte 3264 // Always need aligned and unaligned versions 3265 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, 3266 "jbyte_disjoint_arraycopy"); 3267 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, 3268 &entry_jbyte_arraycopy, 3269 "jbyte_arraycopy"); 3270 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry, 3271 "arrayof_jbyte_disjoint_arraycopy"); 3272 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL, 3273 "arrayof_jbyte_arraycopy"); 3274 3275 //*** jshort 3276 // Always need aligned and unaligned versions 3277 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, 3278 "jshort_disjoint_arraycopy"); 3279 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, 3280 &entry_jshort_arraycopy, 3281 "jshort_arraycopy"); 3282 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, 3283 "arrayof_jshort_disjoint_arraycopy"); 3284 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, 3285 "arrayof_jshort_arraycopy"); 3286 3287 //*** jint 3288 // Aligned versions 3289 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, 3290 "arrayof_jint_disjoint_arraycopy"); 3291 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, 3292 "arrayof_jint_arraycopy"); 3293 #ifdef _LP64 3294 // In 64 bit we need both aligned and unaligned versions of jint arraycopy. 3295 // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it). 3296 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, 3297 "jint_disjoint_arraycopy"); 3298 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, 3299 &entry_jint_arraycopy, 3300 "jint_arraycopy"); 3301 #else 3302 // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version 3303 // (in fact in 32bit we always have a pre-loop part even in the aligned version, 3304 // because it uses 64-bit loads/stores, so the aligned flag is actually ignored). 3305 StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy; 3306 StubRoutines::_jint_arraycopy = StubRoutines::_arrayof_jint_arraycopy; 3307 #endif 3308 3309 3310 //*** jlong 3311 // It is always aligned 3312 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, 3313 "arrayof_jlong_disjoint_arraycopy"); 3314 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, 3315 "arrayof_jlong_arraycopy"); 3316 StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; 3317 StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; 3318 3319 3320 //*** oops 3321 // Aligned versions 3322 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, &entry, 3323 "arrayof_oop_disjoint_arraycopy"); 3324 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy, 3325 "arrayof_oop_arraycopy"); 3326 // Aligned versions without pre-barriers 3327 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry, 3328 "arrayof_oop_disjoint_arraycopy_uninit", 3329 /*dest_uninitialized*/true); 3330 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, entry, NULL, 3331 "arrayof_oop_arraycopy_uninit", 3332 /*dest_uninitialized*/true); 3333 #ifdef _LP64 3334 if (UseCompressedOops) { 3335 // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy. 3336 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry, 3337 "oop_disjoint_arraycopy"); 3338 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy, 3339 "oop_arraycopy"); 3340 // Unaligned versions without pre-barriers 3341 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, &entry, 3342 "oop_disjoint_arraycopy_uninit", 3343 /*dest_uninitialized*/true); 3344 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, entry, NULL, 3345 "oop_arraycopy_uninit", 3346 /*dest_uninitialized*/true); 3347 } else 3348 #endif 3349 { 3350 // oop arraycopy is always aligned on 32bit and 64bit without compressed oops 3351 StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; 3352 StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; 3353 StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; 3354 StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; 3355 } 3356 3357 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); 3358 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, 3359 /*dest_uninitialized*/true); 3360 3361 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", 3362 entry_jbyte_arraycopy, 3363 entry_jshort_arraycopy, 3364 entry_jint_arraycopy, 3365 entry_jlong_arraycopy); 3366 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", 3367 entry_jbyte_arraycopy, 3368 entry_jshort_arraycopy, 3369 entry_jint_arraycopy, 3370 entry_oop_arraycopy, 3371 entry_jlong_arraycopy, 3372 entry_checkcast_arraycopy); 3373 3374 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); 3375 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); 3376 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); 3377 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); 3378 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); 3379 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); 3380 3381 if (UseBlockZeroing) { 3382 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); 3383 } 3384 } 3385 3386 void generate_initial() { 3387 // Generates all stubs and initializes the entry points 3388 3389 //------------------------------------------------------------------------------------------------------------------------ 3390 // entry points that exist in all platforms 3391 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than 3392 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. 3393 StubRoutines::_forward_exception_entry = generate_forward_exception(); 3394 3395 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 3396 StubRoutines::_catch_exception_entry = generate_catch_exception(); 3397 3398 //------------------------------------------------------------------------------------------------------------------------ 3399 // entry points that are platform specific 3400 StubRoutines::Sparc::_test_stop_entry = generate_test_stop(); 3401 3402 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine(); 3403 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows(); 3404 3405 #if !defined(COMPILER2) && !defined(_LP64) 3406 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); 3407 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); 3408 StubRoutines::_atomic_add_entry = generate_atomic_add(); 3409 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry; 3410 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry; 3411 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); 3412 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry; 3413 #endif // COMPILER2 !=> _LP64 3414 3415 // Build this early so it's available for the interpreter. The 3416 // stub expects the required and actual type to already be in O1 3417 // and O2 respectively. 3418 StubRoutines::_throw_WrongMethodTypeException_entry = 3419 generate_throw_exception("WrongMethodTypeException throw_exception", 3420 CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException), 3421 false, G5_method_type, G3_method_handle); 3422 } 3423 3424 3425 void generate_all() { 3426 // Generates all stubs and initializes the entry points 3427 3428 // Generate partial_subtype_check first here since its code depends on 3429 // UseZeroBaseCompressedOops which is defined after heap initialization. 3430 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); 3431 // These entry points require SharedInfo::stack0 to be set up in non-core builds 3432 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); 3433 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); 3434 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); 3435 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); 3436 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); 3437 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); 3438 3439 StubRoutines::_handler_for_unsafe_access_entry = 3440 generate_handler_for_unsafe_access(); 3441 3442 // support for verify_oop (must happen after universe_init) 3443 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); 3444 3445 // arraycopy stubs used by compilers 3446 generate_arraycopy_stubs(); 3447 3448 // Don't initialize the platform math functions since sparc 3449 // doesn't have intrinsics for these operations. 3450 } 3451 3452 3453 public: 3454 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 3455 // replace the standard masm with a special one: 3456 _masm = new MacroAssembler(code); 3457 3458 _stub_count = !all ? 0x100 : 0x200; 3459 if (all) { 3460 generate_all(); 3461 } else { 3462 generate_initial(); 3463 } 3464 3465 // make sure this stub is available for all local calls 3466 if (_atomic_add_stub.is_unbound()) { 3467 // generate a second time, if necessary 3468 (void) generate_atomic_add(); 3469 } 3470 } 3471 3472 3473 private: 3474 int _stub_count; 3475 void stub_prolog(StubCodeDesc* cdesc) { 3476 # ifdef ASSERT 3477 // put extra information in the stub code, to make it more readable 3478 #ifdef _LP64 3479 // Write the high part of the address 3480 // [RGV] Check if there is a dependency on the size of this prolog 3481 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none); 3482 #endif 3483 __ emit_data((intptr_t)cdesc, relocInfo::none); 3484 __ emit_data(++_stub_count, relocInfo::none); 3485 # endif 3486 align(true); 3487 } 3488 3489 void align(bool at_header = false) { 3490 // %%%%% move this constant somewhere else 3491 // UltraSPARC cache line size is 8 instructions: 3492 const unsigned int icache_line_size = 32; 3493 const unsigned int icache_half_line_size = 16; 3494 3495 if (at_header) { 3496 while ((intptr_t)(__ pc()) % icache_line_size != 0) { 3497 __ emit_data(0, relocInfo::none); 3498 } 3499 } else { 3500 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { 3501 __ nop(); 3502 } 3503 } 3504 } 3505 3506 }; // end class declaration 3507 3508 void StubGenerator_generate(CodeBuffer* code, bool all) { 3509 StubGenerator g(code, all); 3510 }