1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "compiler/disassembler.hpp" 29 #include "gc_interface/collectedHeap.inline.hpp" 30 #include "interpreter/interpreter.hpp" 31 #include "memory/cardTableModRefBS.hpp" 32 #include "memory/resourceArea.hpp" 33 #include "prims/methodHandles.hpp" 34 #include "runtime/biasedLocking.hpp" 35 #include "runtime/interfaceSupport.hpp" 36 #include "runtime/objectMonitor.hpp" 37 #include "runtime/os.hpp" 38 #include "runtime/sharedRuntime.hpp" 39 #include "runtime/stubRoutines.hpp" 40 #ifndef SERIALGC 41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 43 #include "gc_implementation/g1/heapRegion.hpp" 44 #endif 45 46 #ifdef PRODUCT 47 #define BLOCK_COMMENT(str) /* nothing */ 48 #define STOP(error) stop(error) 49 #else 50 #define BLOCK_COMMENT(str) block_comment(str) 51 #define STOP(error) block_comment(error); stop(error) 52 #endif 53 54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 55 56 57 #ifdef ASSERT 58 bool AbstractAssembler::pd_check_instruction_mark() { return true; } 59 #endif 60 61 static Assembler::Condition reverse[] = { 62 Assembler::noOverflow /* overflow = 0x0 */ , 63 Assembler::overflow /* noOverflow = 0x1 */ , 64 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 65 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 66 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 67 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 68 Assembler::above /* belowEqual = 0x6 */ , 69 Assembler::belowEqual /* above = 0x7 */ , 70 Assembler::positive /* negative = 0x8 */ , 71 Assembler::negative /* positive = 0x9 */ , 72 Assembler::noParity /* parity = 0xa */ , 73 Assembler::parity /* noParity = 0xb */ , 74 Assembler::greaterEqual /* less = 0xc */ , 75 Assembler::less /* greaterEqual = 0xd */ , 76 Assembler::greater /* lessEqual = 0xe */ , 77 Assembler::lessEqual /* greater = 0xf, */ 78 79 }; 80 81 82 // Implementation of MacroAssembler 83 84 // First all the versions that have distinct versions depending on 32/64 bit 85 // Unless the difference is trivial (1 line or so). 86 87 #ifndef _LP64 88 89 // 32bit versions 90 91 Address MacroAssembler::as_Address(AddressLiteral adr) { 92 return Address(adr.target(), adr.rspec()); 93 } 94 95 Address MacroAssembler::as_Address(ArrayAddress adr) { 96 return Address::make_array(adr); 97 } 98 99 int MacroAssembler::biased_locking_enter(Register lock_reg, 100 Register obj_reg, 101 Register swap_reg, 102 Register tmp_reg, 103 bool swap_reg_contains_mark, 104 Label& done, 105 Label* slow_case, 106 BiasedLockingCounters* counters) { 107 assert(UseBiasedLocking, "why call this otherwise?"); 108 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 109 assert_different_registers(lock_reg, obj_reg, swap_reg); 110 111 if (PrintBiasedLockingStatistics && counters == NULL) 112 counters = BiasedLocking::counters(); 113 114 bool need_tmp_reg = false; 115 if (tmp_reg == noreg) { 116 need_tmp_reg = true; 117 tmp_reg = lock_reg; 118 } else { 119 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 120 } 121 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 122 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 123 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 124 Address saved_mark_addr(lock_reg, 0); 125 126 // Biased locking 127 // See whether the lock is currently biased toward our thread and 128 // whether the epoch is still valid 129 // Note that the runtime guarantees sufficient alignment of JavaThread 130 // pointers to allow age to be placed into low bits 131 // First check to see whether biasing is even enabled for this object 132 Label cas_label; 133 int null_check_offset = -1; 134 if (!swap_reg_contains_mark) { 135 null_check_offset = offset(); 136 movl(swap_reg, mark_addr); 137 } 138 if (need_tmp_reg) { 139 push(tmp_reg); 140 } 141 movl(tmp_reg, swap_reg); 142 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 143 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 144 if (need_tmp_reg) { 145 pop(tmp_reg); 146 } 147 jcc(Assembler::notEqual, cas_label); 148 // The bias pattern is present in the object's header. Need to check 149 // whether the bias owner and the epoch are both still current. 150 // Note that because there is no current thread register on x86 we 151 // need to store off the mark word we read out of the object to 152 // avoid reloading it and needing to recheck invariants below. This 153 // store is unfortunate but it makes the overall code shorter and 154 // simpler. 155 movl(saved_mark_addr, swap_reg); 156 if (need_tmp_reg) { 157 push(tmp_reg); 158 } 159 get_thread(tmp_reg); 160 xorl(swap_reg, tmp_reg); 161 if (swap_reg_contains_mark) { 162 null_check_offset = offset(); 163 } 164 movl(tmp_reg, klass_addr); 165 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 166 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 167 if (need_tmp_reg) { 168 pop(tmp_reg); 169 } 170 if (counters != NULL) { 171 cond_inc32(Assembler::zero, 172 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 173 } 174 jcc(Assembler::equal, done); 175 176 Label try_revoke_bias; 177 Label try_rebias; 178 179 // At this point we know that the header has the bias pattern and 180 // that we are not the bias owner in the current epoch. We need to 181 // figure out more details about the state of the header in order to 182 // know what operations can be legally performed on the object's 183 // header. 184 185 // If the low three bits in the xor result aren't clear, that means 186 // the prototype header is no longer biased and we have to revoke 187 // the bias on this object. 188 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 189 jcc(Assembler::notZero, try_revoke_bias); 190 191 // Biasing is still enabled for this data type. See whether the 192 // epoch of the current bias is still valid, meaning that the epoch 193 // bits of the mark word are equal to the epoch bits of the 194 // prototype header. (Note that the prototype header's epoch bits 195 // only change at a safepoint.) If not, attempt to rebias the object 196 // toward the current thread. Note that we must be absolutely sure 197 // that the current epoch is invalid in order to do this because 198 // otherwise the manipulations it performs on the mark word are 199 // illegal. 200 testl(swap_reg, markOopDesc::epoch_mask_in_place); 201 jcc(Assembler::notZero, try_rebias); 202 203 // The epoch of the current bias is still valid but we know nothing 204 // about the owner; it might be set or it might be clear. Try to 205 // acquire the bias of the object using an atomic operation. If this 206 // fails we will go in to the runtime to revoke the object's bias. 207 // Note that we first construct the presumed unbiased header so we 208 // don't accidentally blow away another thread's valid bias. 209 movl(swap_reg, saved_mark_addr); 210 andl(swap_reg, 211 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 212 if (need_tmp_reg) { 213 push(tmp_reg); 214 } 215 get_thread(tmp_reg); 216 orl(tmp_reg, swap_reg); 217 if (os::is_MP()) { 218 lock(); 219 } 220 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 221 if (need_tmp_reg) { 222 pop(tmp_reg); 223 } 224 // If the biasing toward our thread failed, this means that 225 // another thread succeeded in biasing it toward itself and we 226 // need to revoke that bias. The revocation will occur in the 227 // interpreter runtime in the slow case. 228 if (counters != NULL) { 229 cond_inc32(Assembler::zero, 230 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 231 } 232 if (slow_case != NULL) { 233 jcc(Assembler::notZero, *slow_case); 234 } 235 jmp(done); 236 237 bind(try_rebias); 238 // At this point we know the epoch has expired, meaning that the 239 // current "bias owner", if any, is actually invalid. Under these 240 // circumstances _only_, we are allowed to use the current header's 241 // value as the comparison value when doing the cas to acquire the 242 // bias in the current epoch. In other words, we allow transfer of 243 // the bias from one thread to another directly in this situation. 244 // 245 // FIXME: due to a lack of registers we currently blow away the age 246 // bits in this situation. Should attempt to preserve them. 247 if (need_tmp_reg) { 248 push(tmp_reg); 249 } 250 get_thread(tmp_reg); 251 movl(swap_reg, klass_addr); 252 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 253 movl(swap_reg, saved_mark_addr); 254 if (os::is_MP()) { 255 lock(); 256 } 257 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 258 if (need_tmp_reg) { 259 pop(tmp_reg); 260 } 261 // If the biasing toward our thread failed, then another thread 262 // succeeded in biasing it toward itself and we need to revoke that 263 // bias. The revocation will occur in the runtime in the slow case. 264 if (counters != NULL) { 265 cond_inc32(Assembler::zero, 266 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 267 } 268 if (slow_case != NULL) { 269 jcc(Assembler::notZero, *slow_case); 270 } 271 jmp(done); 272 273 bind(try_revoke_bias); 274 // The prototype mark in the klass doesn't have the bias bit set any 275 // more, indicating that objects of this data type are not supposed 276 // to be biased any more. We are going to try to reset the mark of 277 // this object to the prototype value and fall through to the 278 // CAS-based locking scheme. Note that if our CAS fails, it means 279 // that another thread raced us for the privilege of revoking the 280 // bias of this particular object, so it's okay to continue in the 281 // normal locking code. 282 // 283 // FIXME: due to a lack of registers we currently blow away the age 284 // bits in this situation. Should attempt to preserve them. 285 movl(swap_reg, saved_mark_addr); 286 if (need_tmp_reg) { 287 push(tmp_reg); 288 } 289 movl(tmp_reg, klass_addr); 290 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 291 if (os::is_MP()) { 292 lock(); 293 } 294 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 295 if (need_tmp_reg) { 296 pop(tmp_reg); 297 } 298 // Fall through to the normal CAS-based lock, because no matter what 299 // the result of the above CAS, some thread must have succeeded in 300 // removing the bias bit from the object's header. 301 if (counters != NULL) { 302 cond_inc32(Assembler::zero, 303 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 304 } 305 306 bind(cas_label); 307 308 return null_check_offset; 309 } 310 void MacroAssembler::call_VM_leaf_base(address entry_point, 311 int number_of_arguments) { 312 call(RuntimeAddress(entry_point)); 313 increment(rsp, number_of_arguments * wordSize); 314 } 315 316 void MacroAssembler::cmpklass(Address src1, Metadata* obj) { 317 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 318 } 319 320 void MacroAssembler::cmpklass(Register src1, Metadata* obj) { 321 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 322 } 323 324 void MacroAssembler::cmpoop(Address src1, jobject obj) { 325 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 326 } 327 328 void MacroAssembler::cmpoop(Register src1, jobject obj) { 329 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 330 } 331 332 void MacroAssembler::extend_sign(Register hi, Register lo) { 333 // According to Intel Doc. AP-526, "Integer Divide", p.18. 334 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 335 cdql(); 336 } else { 337 movl(hi, lo); 338 sarl(hi, 31); 339 } 340 } 341 342 void MacroAssembler::jC2(Register tmp, Label& L) { 343 // set parity bit if FPU flag C2 is set (via rax) 344 save_rax(tmp); 345 fwait(); fnstsw_ax(); 346 sahf(); 347 restore_rax(tmp); 348 // branch 349 jcc(Assembler::parity, L); 350 } 351 352 void MacroAssembler::jnC2(Register tmp, Label& L) { 353 // set parity bit if FPU flag C2 is set (via rax) 354 save_rax(tmp); 355 fwait(); fnstsw_ax(); 356 sahf(); 357 restore_rax(tmp); 358 // branch 359 jcc(Assembler::noParity, L); 360 } 361 362 // 32bit can do a case table jump in one instruction but we no longer allow the base 363 // to be installed in the Address class 364 void MacroAssembler::jump(ArrayAddress entry) { 365 jmp(as_Address(entry)); 366 } 367 368 // Note: y_lo will be destroyed 369 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 370 // Long compare for Java (semantics as described in JVM spec.) 371 Label high, low, done; 372 373 cmpl(x_hi, y_hi); 374 jcc(Assembler::less, low); 375 jcc(Assembler::greater, high); 376 // x_hi is the return register 377 xorl(x_hi, x_hi); 378 cmpl(x_lo, y_lo); 379 jcc(Assembler::below, low); 380 jcc(Assembler::equal, done); 381 382 bind(high); 383 xorl(x_hi, x_hi); 384 increment(x_hi); 385 jmp(done); 386 387 bind(low); 388 xorl(x_hi, x_hi); 389 decrementl(x_hi); 390 391 bind(done); 392 } 393 394 void MacroAssembler::lea(Register dst, AddressLiteral src) { 395 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 396 } 397 398 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 399 // leal(dst, as_Address(adr)); 400 // see note in movl as to why we must use a move 401 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 402 } 403 404 void MacroAssembler::leave() { 405 mov(rsp, rbp); 406 pop(rbp); 407 } 408 409 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 410 // Multiplication of two Java long values stored on the stack 411 // as illustrated below. Result is in rdx:rax. 412 // 413 // rsp ---> [ ?? ] \ \ 414 // .... | y_rsp_offset | 415 // [ y_lo ] / (in bytes) | x_rsp_offset 416 // [ y_hi ] | (in bytes) 417 // .... | 418 // [ x_lo ] / 419 // [ x_hi ] 420 // .... 421 // 422 // Basic idea: lo(result) = lo(x_lo * y_lo) 423 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 424 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 425 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 426 Label quick; 427 // load x_hi, y_hi and check if quick 428 // multiplication is possible 429 movl(rbx, x_hi); 430 movl(rcx, y_hi); 431 movl(rax, rbx); 432 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 433 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 434 // do full multiplication 435 // 1st step 436 mull(y_lo); // x_hi * y_lo 437 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 438 // 2nd step 439 movl(rax, x_lo); 440 mull(rcx); // x_lo * y_hi 441 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 442 // 3rd step 443 bind(quick); // note: rbx, = 0 if quick multiply! 444 movl(rax, x_lo); 445 mull(y_lo); // x_lo * y_lo 446 addl(rdx, rbx); // correct hi(x_lo * y_lo) 447 } 448 449 void MacroAssembler::lneg(Register hi, Register lo) { 450 negl(lo); 451 adcl(hi, 0); 452 negl(hi); 453 } 454 455 void MacroAssembler::lshl(Register hi, Register lo) { 456 // Java shift left long support (semantics as described in JVM spec., p.305) 457 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 458 // shift value is in rcx ! 459 assert(hi != rcx, "must not use rcx"); 460 assert(lo != rcx, "must not use rcx"); 461 const Register s = rcx; // shift count 462 const int n = BitsPerWord; 463 Label L; 464 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 465 cmpl(s, n); // if (s < n) 466 jcc(Assembler::less, L); // else (s >= n) 467 movl(hi, lo); // x := x << n 468 xorl(lo, lo); 469 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 470 bind(L); // s (mod n) < n 471 shldl(hi, lo); // x := x << s 472 shll(lo); 473 } 474 475 476 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 477 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 478 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 479 assert(hi != rcx, "must not use rcx"); 480 assert(lo != rcx, "must not use rcx"); 481 const Register s = rcx; // shift count 482 const int n = BitsPerWord; 483 Label L; 484 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 485 cmpl(s, n); // if (s < n) 486 jcc(Assembler::less, L); // else (s >= n) 487 movl(lo, hi); // x := x >> n 488 if (sign_extension) sarl(hi, 31); 489 else xorl(hi, hi); 490 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 491 bind(L); // s (mod n) < n 492 shrdl(lo, hi); // x := x >> s 493 if (sign_extension) sarl(hi); 494 else shrl(hi); 495 } 496 497 void MacroAssembler::movoop(Register dst, jobject obj) { 498 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 499 } 500 501 void MacroAssembler::movoop(Address dst, jobject obj) { 502 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 503 } 504 505 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 506 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 507 } 508 509 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 510 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 511 } 512 513 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 514 if (src.is_lval()) { 515 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 516 } else { 517 movl(dst, as_Address(src)); 518 } 519 } 520 521 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 522 movl(as_Address(dst), src); 523 } 524 525 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 526 movl(dst, as_Address(src)); 527 } 528 529 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 530 void MacroAssembler::movptr(Address dst, intptr_t src) { 531 movl(dst, src); 532 } 533 534 535 void MacroAssembler::pop_callee_saved_registers() { 536 pop(rcx); 537 pop(rdx); 538 pop(rdi); 539 pop(rsi); 540 } 541 542 void MacroAssembler::pop_fTOS() { 543 fld_d(Address(rsp, 0)); 544 addl(rsp, 2 * wordSize); 545 } 546 547 void MacroAssembler::push_callee_saved_registers() { 548 push(rsi); 549 push(rdi); 550 push(rdx); 551 push(rcx); 552 } 553 554 void MacroAssembler::push_fTOS() { 555 subl(rsp, 2 * wordSize); 556 fstp_d(Address(rsp, 0)); 557 } 558 559 560 void MacroAssembler::pushoop(jobject obj) { 561 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 562 } 563 564 void MacroAssembler::pushklass(Metadata* obj) { 565 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); 566 } 567 568 void MacroAssembler::pushptr(AddressLiteral src) { 569 if (src.is_lval()) { 570 push_literal32((int32_t)src.target(), src.rspec()); 571 } else { 572 pushl(as_Address(src)); 573 } 574 } 575 576 void MacroAssembler::set_word_if_not_zero(Register dst) { 577 xorl(dst, dst); 578 set_byte_if_not_zero(dst); 579 } 580 581 static void pass_arg0(MacroAssembler* masm, Register arg) { 582 masm->push(arg); 583 } 584 585 static void pass_arg1(MacroAssembler* masm, Register arg) { 586 masm->push(arg); 587 } 588 589 static void pass_arg2(MacroAssembler* masm, Register arg) { 590 masm->push(arg); 591 } 592 593 static void pass_arg3(MacroAssembler* masm, Register arg) { 594 masm->push(arg); 595 } 596 597 #ifndef PRODUCT 598 extern "C" void findpc(intptr_t x); 599 #endif 600 601 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 602 // In order to get locks to work, we need to fake a in_VM state 603 JavaThread* thread = JavaThread::current(); 604 JavaThreadState saved_state = thread->thread_state(); 605 thread->set_thread_state(_thread_in_vm); 606 if (ShowMessageBoxOnError) { 607 JavaThread* thread = JavaThread::current(); 608 JavaThreadState saved_state = thread->thread_state(); 609 thread->set_thread_state(_thread_in_vm); 610 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 611 ttyLocker ttyl; 612 BytecodeCounter::print(); 613 } 614 // To see where a verify_oop failed, get $ebx+40/X for this frame. 615 // This is the value of eip which points to where verify_oop will return. 616 if (os::message_box(msg, "Execution stopped, print registers?")) { 617 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); 618 BREAKPOINT; 619 } 620 } else { 621 ttyLocker ttyl; 622 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 623 } 624 // Don't assert holding the ttyLock 625 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 626 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 627 } 628 629 void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { 630 ttyLocker ttyl; 631 FlagSetting fs(Debugging, true); 632 tty->print_cr("eip = 0x%08x", eip); 633 #ifndef PRODUCT 634 if ((WizardMode || Verbose) && PrintMiscellaneous) { 635 tty->cr(); 636 findpc(eip); 637 tty->cr(); 638 } 639 #endif 640 #define PRINT_REG(rax) \ 641 { tty->print("%s = ", #rax); os::print_location(tty, rax); } 642 PRINT_REG(rax); 643 PRINT_REG(rbx); 644 PRINT_REG(rcx); 645 PRINT_REG(rdx); 646 PRINT_REG(rdi); 647 PRINT_REG(rsi); 648 PRINT_REG(rbp); 649 PRINT_REG(rsp); 650 #undef PRINT_REG 651 // Print some words near top of staack. 652 int* dump_sp = (int*) rsp; 653 for (int col1 = 0; col1 < 8; col1++) { 654 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 655 os::print_location(tty, *dump_sp++); 656 } 657 for (int row = 0; row < 16; row++) { 658 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 659 for (int col = 0; col < 8; col++) { 660 tty->print(" 0x%08x", *dump_sp++); 661 } 662 tty->cr(); 663 } 664 // Print some instructions around pc: 665 Disassembler::decode((address)eip-64, (address)eip); 666 tty->print_cr("--------"); 667 Disassembler::decode((address)eip, (address)eip+32); 668 } 669 670 void MacroAssembler::stop(const char* msg) { 671 ExternalAddress message((address)msg); 672 // push address of message 673 pushptr(message.addr()); 674 { Label L; call(L, relocInfo::none); bind(L); } // push eip 675 pusha(); // push registers 676 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 677 hlt(); 678 } 679 680 void MacroAssembler::warn(const char* msg) { 681 push_CPU_state(); 682 683 ExternalAddress message((address) msg); 684 // push address of message 685 pushptr(message.addr()); 686 687 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 688 addl(rsp, wordSize); // discard argument 689 pop_CPU_state(); 690 } 691 692 void MacroAssembler::print_state() { 693 { Label L; call(L, relocInfo::none); bind(L); } // push eip 694 pusha(); // push registers 695 696 push_CPU_state(); 697 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); 698 pop_CPU_state(); 699 700 popa(); 701 addl(rsp, wordSize); 702 } 703 704 #else // _LP64 705 706 // 64 bit versions 707 708 Address MacroAssembler::as_Address(AddressLiteral adr) { 709 // amd64 always does this as a pc-rel 710 // we can be absolute or disp based on the instruction type 711 // jmp/call are displacements others are absolute 712 assert(!adr.is_lval(), "must be rval"); 713 assert(reachable(adr), "must be"); 714 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 715 716 } 717 718 Address MacroAssembler::as_Address(ArrayAddress adr) { 719 AddressLiteral base = adr.base(); 720 lea(rscratch1, base); 721 Address index = adr.index(); 722 assert(index._disp == 0, "must not have disp"); // maybe it can? 723 Address array(rscratch1, index._index, index._scale, index._disp); 724 return array; 725 } 726 727 int MacroAssembler::biased_locking_enter(Register lock_reg, 728 Register obj_reg, 729 Register swap_reg, 730 Register tmp_reg, 731 bool swap_reg_contains_mark, 732 Label& done, 733 Label* slow_case, 734 BiasedLockingCounters* counters) { 735 assert(UseBiasedLocking, "why call this otherwise?"); 736 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 737 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 738 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 739 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 740 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 741 Address saved_mark_addr(lock_reg, 0); 742 743 if (PrintBiasedLockingStatistics && counters == NULL) 744 counters = BiasedLocking::counters(); 745 746 // Biased locking 747 // See whether the lock is currently biased toward our thread and 748 // whether the epoch is still valid 749 // Note that the runtime guarantees sufficient alignment of JavaThread 750 // pointers to allow age to be placed into low bits 751 // First check to see whether biasing is even enabled for this object 752 Label cas_label; 753 int null_check_offset = -1; 754 if (!swap_reg_contains_mark) { 755 null_check_offset = offset(); 756 movq(swap_reg, mark_addr); 757 } 758 movq(tmp_reg, swap_reg); 759 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 760 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 761 jcc(Assembler::notEqual, cas_label); 762 // The bias pattern is present in the object's header. Need to check 763 // whether the bias owner and the epoch are both still current. 764 load_prototype_header(tmp_reg, obj_reg); 765 orq(tmp_reg, r15_thread); 766 xorq(tmp_reg, swap_reg); 767 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 768 if (counters != NULL) { 769 cond_inc32(Assembler::zero, 770 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 771 } 772 jcc(Assembler::equal, done); 773 774 Label try_revoke_bias; 775 Label try_rebias; 776 777 // At this point we know that the header has the bias pattern and 778 // that we are not the bias owner in the current epoch. We need to 779 // figure out more details about the state of the header in order to 780 // know what operations can be legally performed on the object's 781 // header. 782 783 // If the low three bits in the xor result aren't clear, that means 784 // the prototype header is no longer biased and we have to revoke 785 // the bias on this object. 786 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 787 jcc(Assembler::notZero, try_revoke_bias); 788 789 // Biasing is still enabled for this data type. See whether the 790 // epoch of the current bias is still valid, meaning that the epoch 791 // bits of the mark word are equal to the epoch bits of the 792 // prototype header. (Note that the prototype header's epoch bits 793 // only change at a safepoint.) If not, attempt to rebias the object 794 // toward the current thread. Note that we must be absolutely sure 795 // that the current epoch is invalid in order to do this because 796 // otherwise the manipulations it performs on the mark word are 797 // illegal. 798 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 799 jcc(Assembler::notZero, try_rebias); 800 801 // The epoch of the current bias is still valid but we know nothing 802 // about the owner; it might be set or it might be clear. Try to 803 // acquire the bias of the object using an atomic operation. If this 804 // fails we will go in to the runtime to revoke the object's bias. 805 // Note that we first construct the presumed unbiased header so we 806 // don't accidentally blow away another thread's valid bias. 807 andq(swap_reg, 808 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 809 movq(tmp_reg, swap_reg); 810 orq(tmp_reg, r15_thread); 811 if (os::is_MP()) { 812 lock(); 813 } 814 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 815 // If the biasing toward our thread failed, this means that 816 // another thread succeeded in biasing it toward itself and we 817 // need to revoke that bias. The revocation will occur in the 818 // interpreter runtime in the slow case. 819 if (counters != NULL) { 820 cond_inc32(Assembler::zero, 821 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 822 } 823 if (slow_case != NULL) { 824 jcc(Assembler::notZero, *slow_case); 825 } 826 jmp(done); 827 828 bind(try_rebias); 829 // At this point we know the epoch has expired, meaning that the 830 // current "bias owner", if any, is actually invalid. Under these 831 // circumstances _only_, we are allowed to use the current header's 832 // value as the comparison value when doing the cas to acquire the 833 // bias in the current epoch. In other words, we allow transfer of 834 // the bias from one thread to another directly in this situation. 835 // 836 // FIXME: due to a lack of registers we currently blow away the age 837 // bits in this situation. Should attempt to preserve them. 838 load_prototype_header(tmp_reg, obj_reg); 839 orq(tmp_reg, r15_thread); 840 if (os::is_MP()) { 841 lock(); 842 } 843 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 844 // If the biasing toward our thread failed, then another thread 845 // succeeded in biasing it toward itself and we need to revoke that 846 // bias. The revocation will occur in the runtime in the slow case. 847 if (counters != NULL) { 848 cond_inc32(Assembler::zero, 849 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 850 } 851 if (slow_case != NULL) { 852 jcc(Assembler::notZero, *slow_case); 853 } 854 jmp(done); 855 856 bind(try_revoke_bias); 857 // The prototype mark in the klass doesn't have the bias bit set any 858 // more, indicating that objects of this data type are not supposed 859 // to be biased any more. We are going to try to reset the mark of 860 // this object to the prototype value and fall through to the 861 // CAS-based locking scheme. Note that if our CAS fails, it means 862 // that another thread raced us for the privilege of revoking the 863 // bias of this particular object, so it's okay to continue in the 864 // normal locking code. 865 // 866 // FIXME: due to a lack of registers we currently blow away the age 867 // bits in this situation. Should attempt to preserve them. 868 load_prototype_header(tmp_reg, obj_reg); 869 if (os::is_MP()) { 870 lock(); 871 } 872 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 873 // Fall through to the normal CAS-based lock, because no matter what 874 // the result of the above CAS, some thread must have succeeded in 875 // removing the bias bit from the object's header. 876 if (counters != NULL) { 877 cond_inc32(Assembler::zero, 878 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 879 } 880 881 bind(cas_label); 882 883 return null_check_offset; 884 } 885 886 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 887 Label L, E; 888 889 #ifdef _WIN64 890 // Windows always allocates space for it's register args 891 assert(num_args <= 4, "only register arguments supported"); 892 subq(rsp, frame::arg_reg_save_area_bytes); 893 #endif 894 895 // Align stack if necessary 896 testl(rsp, 15); 897 jcc(Assembler::zero, L); 898 899 subq(rsp, 8); 900 { 901 call(RuntimeAddress(entry_point)); 902 } 903 addq(rsp, 8); 904 jmp(E); 905 906 bind(L); 907 { 908 call(RuntimeAddress(entry_point)); 909 } 910 911 bind(E); 912 913 #ifdef _WIN64 914 // restore stack pointer 915 addq(rsp, frame::arg_reg_save_area_bytes); 916 #endif 917 918 } 919 920 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 921 assert(!src2.is_lval(), "should use cmpptr"); 922 923 if (reachable(src2)) { 924 cmpq(src1, as_Address(src2)); 925 } else { 926 lea(rscratch1, src2); 927 Assembler::cmpq(src1, Address(rscratch1, 0)); 928 } 929 } 930 931 int MacroAssembler::corrected_idivq(Register reg) { 932 // Full implementation of Java ldiv and lrem; checks for special 933 // case as described in JVM spec., p.243 & p.271. The function 934 // returns the (pc) offset of the idivl instruction - may be needed 935 // for implicit exceptions. 936 // 937 // normal case special case 938 // 939 // input : rax: dividend min_long 940 // reg: divisor (may not be eax/edx) -1 941 // 942 // output: rax: quotient (= rax idiv reg) min_long 943 // rdx: remainder (= rax irem reg) 0 944 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 945 static const int64_t min_long = 0x8000000000000000; 946 Label normal_case, special_case; 947 948 // check for special case 949 cmp64(rax, ExternalAddress((address) &min_long)); 950 jcc(Assembler::notEqual, normal_case); 951 xorl(rdx, rdx); // prepare rdx for possible special case (where 952 // remainder = 0) 953 cmpq(reg, -1); 954 jcc(Assembler::equal, special_case); 955 956 // handle normal case 957 bind(normal_case); 958 cdqq(); 959 int idivq_offset = offset(); 960 idivq(reg); 961 962 // normal and special case exit 963 bind(special_case); 964 965 return idivq_offset; 966 } 967 968 void MacroAssembler::decrementq(Register reg, int value) { 969 if (value == min_jint) { subq(reg, value); return; } 970 if (value < 0) { incrementq(reg, -value); return; } 971 if (value == 0) { ; return; } 972 if (value == 1 && UseIncDec) { decq(reg) ; return; } 973 /* else */ { subq(reg, value) ; return; } 974 } 975 976 void MacroAssembler::decrementq(Address dst, int value) { 977 if (value == min_jint) { subq(dst, value); return; } 978 if (value < 0) { incrementq(dst, -value); return; } 979 if (value == 0) { ; return; } 980 if (value == 1 && UseIncDec) { decq(dst) ; return; } 981 /* else */ { subq(dst, value) ; return; } 982 } 983 984 void MacroAssembler::incrementq(Register reg, int value) { 985 if (value == min_jint) { addq(reg, value); return; } 986 if (value < 0) { decrementq(reg, -value); return; } 987 if (value == 0) { ; return; } 988 if (value == 1 && UseIncDec) { incq(reg) ; return; } 989 /* else */ { addq(reg, value) ; return; } 990 } 991 992 void MacroAssembler::incrementq(Address dst, int value) { 993 if (value == min_jint) { addq(dst, value); return; } 994 if (value < 0) { decrementq(dst, -value); return; } 995 if (value == 0) { ; return; } 996 if (value == 1 && UseIncDec) { incq(dst) ; return; } 997 /* else */ { addq(dst, value) ; return; } 998 } 999 1000 // 32bit can do a case table jump in one instruction but we no longer allow the base 1001 // to be installed in the Address class 1002 void MacroAssembler::jump(ArrayAddress entry) { 1003 lea(rscratch1, entry.base()); 1004 Address dispatch = entry.index(); 1005 assert(dispatch._base == noreg, "must be"); 1006 dispatch._base = rscratch1; 1007 jmp(dispatch); 1008 } 1009 1010 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 1011 ShouldNotReachHere(); // 64bit doesn't use two regs 1012 cmpq(x_lo, y_lo); 1013 } 1014 1015 void MacroAssembler::lea(Register dst, AddressLiteral src) { 1016 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 1017 } 1018 1019 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 1020 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 1021 movptr(dst, rscratch1); 1022 } 1023 1024 void MacroAssembler::leave() { 1025 // %%% is this really better? Why not on 32bit too? 1026 emit_byte(0xC9); // LEAVE 1027 } 1028 1029 void MacroAssembler::lneg(Register hi, Register lo) { 1030 ShouldNotReachHere(); // 64bit doesn't use two regs 1031 negq(lo); 1032 } 1033 1034 void MacroAssembler::movoop(Register dst, jobject obj) { 1035 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 1036 } 1037 1038 void MacroAssembler::movoop(Address dst, jobject obj) { 1039 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 1040 movq(dst, rscratch1); 1041 } 1042 1043 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 1044 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 1045 } 1046 1047 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 1048 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 1049 movq(dst, rscratch1); 1050 } 1051 1052 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 1053 if (src.is_lval()) { 1054 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 1055 } else { 1056 if (reachable(src)) { 1057 movq(dst, as_Address(src)); 1058 } else { 1059 lea(rscratch1, src); 1060 movq(dst, Address(rscratch1,0)); 1061 } 1062 } 1063 } 1064 1065 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 1066 movq(as_Address(dst), src); 1067 } 1068 1069 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 1070 movq(dst, as_Address(src)); 1071 } 1072 1073 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 1074 void MacroAssembler::movptr(Address dst, intptr_t src) { 1075 mov64(rscratch1, src); 1076 movq(dst, rscratch1); 1077 } 1078 1079 // These are mostly for initializing NULL 1080 void MacroAssembler::movptr(Address dst, int32_t src) { 1081 movslq(dst, src); 1082 } 1083 1084 void MacroAssembler::movptr(Register dst, int32_t src) { 1085 mov64(dst, (intptr_t)src); 1086 } 1087 1088 void MacroAssembler::pushoop(jobject obj) { 1089 movoop(rscratch1, obj); 1090 push(rscratch1); 1091 } 1092 1093 void MacroAssembler::pushklass(Metadata* obj) { 1094 mov_metadata(rscratch1, obj); 1095 push(rscratch1); 1096 } 1097 1098 void MacroAssembler::pushptr(AddressLiteral src) { 1099 lea(rscratch1, src); 1100 if (src.is_lval()) { 1101 push(rscratch1); 1102 } else { 1103 pushq(Address(rscratch1, 0)); 1104 } 1105 } 1106 1107 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 1108 bool clear_pc) { 1109 // we must set sp to zero to clear frame 1110 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 1111 // must clear fp, so that compiled frames are not confused; it is 1112 // possible that we need it only for debugging 1113 if (clear_fp) { 1114 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 1115 } 1116 1117 if (clear_pc) { 1118 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 1119 } 1120 } 1121 1122 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 1123 Register last_java_fp, 1124 address last_java_pc) { 1125 // determine last_java_sp register 1126 if (!last_java_sp->is_valid()) { 1127 last_java_sp = rsp; 1128 } 1129 1130 // last_java_fp is optional 1131 if (last_java_fp->is_valid()) { 1132 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 1133 last_java_fp); 1134 } 1135 1136 // last_java_pc is optional 1137 if (last_java_pc != NULL) { 1138 Address java_pc(r15_thread, 1139 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 1140 lea(rscratch1, InternalAddress(last_java_pc)); 1141 movptr(java_pc, rscratch1); 1142 } 1143 1144 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 1145 } 1146 1147 static void pass_arg0(MacroAssembler* masm, Register arg) { 1148 if (c_rarg0 != arg ) { 1149 masm->mov(c_rarg0, arg); 1150 } 1151 } 1152 1153 static void pass_arg1(MacroAssembler* masm, Register arg) { 1154 if (c_rarg1 != arg ) { 1155 masm->mov(c_rarg1, arg); 1156 } 1157 } 1158 1159 static void pass_arg2(MacroAssembler* masm, Register arg) { 1160 if (c_rarg2 != arg ) { 1161 masm->mov(c_rarg2, arg); 1162 } 1163 } 1164 1165 static void pass_arg3(MacroAssembler* masm, Register arg) { 1166 if (c_rarg3 != arg ) { 1167 masm->mov(c_rarg3, arg); 1168 } 1169 } 1170 1171 void MacroAssembler::stop(const char* msg) { 1172 address rip = pc(); 1173 pusha(); // get regs on stack 1174 lea(c_rarg0, ExternalAddress((address) msg)); 1175 lea(c_rarg1, InternalAddress(rip)); 1176 movq(c_rarg2, rsp); // pass pointer to regs array 1177 andq(rsp, -16); // align stack as required by ABI 1178 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 1179 hlt(); 1180 } 1181 1182 void MacroAssembler::warn(const char* msg) { 1183 push(rbp); 1184 movq(rbp, rsp); 1185 andq(rsp, -16); // align stack as required by push_CPU_state and call 1186 push_CPU_state(); // keeps alignment at 16 bytes 1187 lea(c_rarg0, ExternalAddress((address) msg)); 1188 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 1189 pop_CPU_state(); 1190 mov(rsp, rbp); 1191 pop(rbp); 1192 } 1193 1194 void MacroAssembler::print_state() { 1195 address rip = pc(); 1196 pusha(); // get regs on stack 1197 push(rbp); 1198 movq(rbp, rsp); 1199 andq(rsp, -16); // align stack as required by push_CPU_state and call 1200 push_CPU_state(); // keeps alignment at 16 bytes 1201 1202 lea(c_rarg0, InternalAddress(rip)); 1203 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array 1204 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); 1205 1206 pop_CPU_state(); 1207 mov(rsp, rbp); 1208 pop(rbp); 1209 popa(); 1210 } 1211 1212 #ifndef PRODUCT 1213 extern "C" void findpc(intptr_t x); 1214 #endif 1215 1216 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 1217 // In order to get locks to work, we need to fake a in_VM state 1218 if (ShowMessageBoxOnError) { 1219 JavaThread* thread = JavaThread::current(); 1220 JavaThreadState saved_state = thread->thread_state(); 1221 thread->set_thread_state(_thread_in_vm); 1222 #ifndef PRODUCT 1223 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1224 ttyLocker ttyl; 1225 BytecodeCounter::print(); 1226 } 1227 #endif 1228 // To see where a verify_oop failed, get $ebx+40/X for this frame. 1229 // XXX correct this offset for amd64 1230 // This is the value of eip which points to where verify_oop will return. 1231 if (os::message_box(msg, "Execution stopped, print registers?")) { 1232 print_state64(pc, regs); 1233 BREAKPOINT; 1234 assert(false, "start up GDB"); 1235 } 1236 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 1237 } else { 1238 ttyLocker ttyl; 1239 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 1240 msg); 1241 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 1242 } 1243 } 1244 1245 void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { 1246 ttyLocker ttyl; 1247 FlagSetting fs(Debugging, true); 1248 tty->print_cr("rip = 0x%016lx", pc); 1249 #ifndef PRODUCT 1250 tty->cr(); 1251 findpc(pc); 1252 tty->cr(); 1253 #endif 1254 #define PRINT_REG(rax, value) \ 1255 { tty->print("%s = ", #rax); os::print_location(tty, value); } 1256 PRINT_REG(rax, regs[15]); 1257 PRINT_REG(rbx, regs[12]); 1258 PRINT_REG(rcx, regs[14]); 1259 PRINT_REG(rdx, regs[13]); 1260 PRINT_REG(rdi, regs[8]); 1261 PRINT_REG(rsi, regs[9]); 1262 PRINT_REG(rbp, regs[10]); 1263 PRINT_REG(rsp, regs[11]); 1264 PRINT_REG(r8 , regs[7]); 1265 PRINT_REG(r9 , regs[6]); 1266 PRINT_REG(r10, regs[5]); 1267 PRINT_REG(r11, regs[4]); 1268 PRINT_REG(r12, regs[3]); 1269 PRINT_REG(r13, regs[2]); 1270 PRINT_REG(r14, regs[1]); 1271 PRINT_REG(r15, regs[0]); 1272 #undef PRINT_REG 1273 // Print some words near top of staack. 1274 int64_t* rsp = (int64_t*) regs[11]; 1275 int64_t* dump_sp = rsp; 1276 for (int col1 = 0; col1 < 8; col1++) { 1277 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 1278 os::print_location(tty, *dump_sp++); 1279 } 1280 for (int row = 0; row < 25; row++) { 1281 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 1282 for (int col = 0; col < 4; col++) { 1283 tty->print(" 0x%016lx", *dump_sp++); 1284 } 1285 tty->cr(); 1286 } 1287 // Print some instructions around pc: 1288 Disassembler::decode((address)pc-64, (address)pc); 1289 tty->print_cr("--------"); 1290 Disassembler::decode((address)pc, (address)pc+32); 1291 } 1292 1293 #endif // _LP64 1294 1295 // Now versions that are common to 32/64 bit 1296 1297 void MacroAssembler::addptr(Register dst, int32_t imm32) { 1298 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 1299 } 1300 1301 void MacroAssembler::addptr(Register dst, Register src) { 1302 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 1303 } 1304 1305 void MacroAssembler::addptr(Address dst, Register src) { 1306 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 1307 } 1308 1309 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 1310 if (reachable(src)) { 1311 Assembler::addsd(dst, as_Address(src)); 1312 } else { 1313 lea(rscratch1, src); 1314 Assembler::addsd(dst, Address(rscratch1, 0)); 1315 } 1316 } 1317 1318 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 1319 if (reachable(src)) { 1320 addss(dst, as_Address(src)); 1321 } else { 1322 lea(rscratch1, src); 1323 addss(dst, Address(rscratch1, 0)); 1324 } 1325 } 1326 1327 void MacroAssembler::align(int modulus) { 1328 if (offset() % modulus != 0) { 1329 nop(modulus - (offset() % modulus)); 1330 } 1331 } 1332 1333 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 1334 // Used in sign-masking with aligned address. 1335 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 1336 if (reachable(src)) { 1337 Assembler::andpd(dst, as_Address(src)); 1338 } else { 1339 lea(rscratch1, src); 1340 Assembler::andpd(dst, Address(rscratch1, 0)); 1341 } 1342 } 1343 1344 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 1345 // Used in sign-masking with aligned address. 1346 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 1347 if (reachable(src)) { 1348 Assembler::andps(dst, as_Address(src)); 1349 } else { 1350 lea(rscratch1, src); 1351 Assembler::andps(dst, Address(rscratch1, 0)); 1352 } 1353 } 1354 1355 void MacroAssembler::andptr(Register dst, int32_t imm32) { 1356 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 1357 } 1358 1359 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 1360 pushf(); 1361 if (os::is_MP()) 1362 lock(); 1363 incrementl(counter_addr); 1364 popf(); 1365 } 1366 1367 // Writes to stack successive pages until offset reached to check for 1368 // stack overflow + shadow pages. This clobbers tmp. 1369 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 1370 movptr(tmp, rsp); 1371 // Bang stack for total size given plus shadow page size. 1372 // Bang one page at a time because large size can bang beyond yellow and 1373 // red zones. 1374 Label loop; 1375 bind(loop); 1376 movl(Address(tmp, (-os::vm_page_size())), size ); 1377 subptr(tmp, os::vm_page_size()); 1378 subl(size, os::vm_page_size()); 1379 jcc(Assembler::greater, loop); 1380 1381 // Bang down shadow pages too. 1382 // The -1 because we already subtracted 1 page. 1383 for (int i = 0; i< StackShadowPages-1; i++) { 1384 // this could be any sized move but this is can be a debugging crumb 1385 // so the bigger the better. 1386 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 1387 } 1388 } 1389 1390 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 1391 assert(UseBiasedLocking, "why call this otherwise?"); 1392 1393 // Check for biased locking unlock case, which is a no-op 1394 // Note: we do not have to check the thread ID for two reasons. 1395 // First, the interpreter checks for IllegalMonitorStateException at 1396 // a higher level. Second, if the bias was revoked while we held the 1397 // lock, the object could not be rebiased toward another thread, so 1398 // the bias bit would be clear. 1399 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1400 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 1401 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 1402 jcc(Assembler::equal, done); 1403 } 1404 1405 void MacroAssembler::c2bool(Register x) { 1406 // implements x == 0 ? 0 : 1 1407 // note: must only look at least-significant byte of x 1408 // since C-style booleans are stored in one byte 1409 // only! (was bug) 1410 andl(x, 0xFF); 1411 setb(Assembler::notZero, x); 1412 } 1413 1414 // Wouldn't need if AddressLiteral version had new name 1415 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 1416 Assembler::call(L, rtype); 1417 } 1418 1419 void MacroAssembler::call(Register entry) { 1420 Assembler::call(entry); 1421 } 1422 1423 void MacroAssembler::call(AddressLiteral entry) { 1424 if (reachable(entry)) { 1425 Assembler::call_literal(entry.target(), entry.rspec()); 1426 } else { 1427 lea(rscratch1, entry); 1428 Assembler::call(rscratch1); 1429 } 1430 } 1431 1432 void MacroAssembler::ic_call(address entry) { 1433 RelocationHolder rh = virtual_call_Relocation::spec(pc()); 1434 movptr(rax, (intptr_t)Universe::non_oop_word()); 1435 call(AddressLiteral(entry, rh)); 1436 } 1437 1438 // Implementation of call_VM versions 1439 1440 void MacroAssembler::call_VM(Register oop_result, 1441 address entry_point, 1442 bool check_exceptions) { 1443 Label C, E; 1444 call(C, relocInfo::none); 1445 jmp(E); 1446 1447 bind(C); 1448 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 1449 ret(0); 1450 1451 bind(E); 1452 } 1453 1454 void MacroAssembler::call_VM(Register oop_result, 1455 address entry_point, 1456 Register arg_1, 1457 bool check_exceptions) { 1458 Label C, E; 1459 call(C, relocInfo::none); 1460 jmp(E); 1461 1462 bind(C); 1463 pass_arg1(this, arg_1); 1464 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 1465 ret(0); 1466 1467 bind(E); 1468 } 1469 1470 void MacroAssembler::call_VM(Register oop_result, 1471 address entry_point, 1472 Register arg_1, 1473 Register arg_2, 1474 bool check_exceptions) { 1475 Label C, E; 1476 call(C, relocInfo::none); 1477 jmp(E); 1478 1479 bind(C); 1480 1481 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1482 1483 pass_arg2(this, arg_2); 1484 pass_arg1(this, arg_1); 1485 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 1486 ret(0); 1487 1488 bind(E); 1489 } 1490 1491 void MacroAssembler::call_VM(Register oop_result, 1492 address entry_point, 1493 Register arg_1, 1494 Register arg_2, 1495 Register arg_3, 1496 bool check_exceptions) { 1497 Label C, E; 1498 call(C, relocInfo::none); 1499 jmp(E); 1500 1501 bind(C); 1502 1503 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1504 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1505 pass_arg3(this, arg_3); 1506 1507 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1508 pass_arg2(this, arg_2); 1509 1510 pass_arg1(this, arg_1); 1511 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 1512 ret(0); 1513 1514 bind(E); 1515 } 1516 1517 void MacroAssembler::call_VM(Register oop_result, 1518 Register last_java_sp, 1519 address entry_point, 1520 int number_of_arguments, 1521 bool check_exceptions) { 1522 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 1523 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1524 } 1525 1526 void MacroAssembler::call_VM(Register oop_result, 1527 Register last_java_sp, 1528 address entry_point, 1529 Register arg_1, 1530 bool check_exceptions) { 1531 pass_arg1(this, arg_1); 1532 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1533 } 1534 1535 void MacroAssembler::call_VM(Register oop_result, 1536 Register last_java_sp, 1537 address entry_point, 1538 Register arg_1, 1539 Register arg_2, 1540 bool check_exceptions) { 1541 1542 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1543 pass_arg2(this, arg_2); 1544 pass_arg1(this, arg_1); 1545 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1546 } 1547 1548 void MacroAssembler::call_VM(Register oop_result, 1549 Register last_java_sp, 1550 address entry_point, 1551 Register arg_1, 1552 Register arg_2, 1553 Register arg_3, 1554 bool check_exceptions) { 1555 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1556 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1557 pass_arg3(this, arg_3); 1558 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1559 pass_arg2(this, arg_2); 1560 pass_arg1(this, arg_1); 1561 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1562 } 1563 1564 void MacroAssembler::super_call_VM(Register oop_result, 1565 Register last_java_sp, 1566 address entry_point, 1567 int number_of_arguments, 1568 bool check_exceptions) { 1569 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 1570 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1571 } 1572 1573 void MacroAssembler::super_call_VM(Register oop_result, 1574 Register last_java_sp, 1575 address entry_point, 1576 Register arg_1, 1577 bool check_exceptions) { 1578 pass_arg1(this, arg_1); 1579 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1580 } 1581 1582 void MacroAssembler::super_call_VM(Register oop_result, 1583 Register last_java_sp, 1584 address entry_point, 1585 Register arg_1, 1586 Register arg_2, 1587 bool check_exceptions) { 1588 1589 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1590 pass_arg2(this, arg_2); 1591 pass_arg1(this, arg_1); 1592 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1593 } 1594 1595 void MacroAssembler::super_call_VM(Register oop_result, 1596 Register last_java_sp, 1597 address entry_point, 1598 Register arg_1, 1599 Register arg_2, 1600 Register arg_3, 1601 bool check_exceptions) { 1602 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1603 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1604 pass_arg3(this, arg_3); 1605 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1606 pass_arg2(this, arg_2); 1607 pass_arg1(this, arg_1); 1608 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1609 } 1610 1611 void MacroAssembler::call_VM_base(Register oop_result, 1612 Register java_thread, 1613 Register last_java_sp, 1614 address entry_point, 1615 int number_of_arguments, 1616 bool check_exceptions) { 1617 // determine java_thread register 1618 if (!java_thread->is_valid()) { 1619 #ifdef _LP64 1620 java_thread = r15_thread; 1621 #else 1622 java_thread = rdi; 1623 get_thread(java_thread); 1624 #endif // LP64 1625 } 1626 // determine last_java_sp register 1627 if (!last_java_sp->is_valid()) { 1628 last_java_sp = rsp; 1629 } 1630 // debugging support 1631 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 1632 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 1633 #ifdef ASSERT 1634 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 1635 // r12 is the heapbase. 1636 LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) 1637 #endif // ASSERT 1638 1639 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 1640 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 1641 1642 // push java thread (becomes first argument of C function) 1643 1644 NOT_LP64(push(java_thread); number_of_arguments++); 1645 LP64_ONLY(mov(c_rarg0, r15_thread)); 1646 1647 // set last Java frame before call 1648 assert(last_java_sp != rbp, "can't use ebp/rbp"); 1649 1650 // Only interpreter should have to set fp 1651 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 1652 1653 // do the call, remove parameters 1654 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 1655 1656 // restore the thread (cannot use the pushed argument since arguments 1657 // may be overwritten by C code generated by an optimizing compiler); 1658 // however can use the register value directly if it is callee saved. 1659 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 1660 // rdi & rsi (also r15) are callee saved -> nothing to do 1661 #ifdef ASSERT 1662 guarantee(java_thread != rax, "change this code"); 1663 push(rax); 1664 { Label L; 1665 get_thread(rax); 1666 cmpptr(java_thread, rax); 1667 jcc(Assembler::equal, L); 1668 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); 1669 bind(L); 1670 } 1671 pop(rax); 1672 #endif 1673 } else { 1674 get_thread(java_thread); 1675 } 1676 // reset last Java frame 1677 // Only interpreter should have to clear fp 1678 reset_last_Java_frame(java_thread, true, false); 1679 1680 #ifndef CC_INTERP 1681 // C++ interp handles this in the interpreter 1682 check_and_handle_popframe(java_thread); 1683 check_and_handle_earlyret(java_thread); 1684 #endif /* CC_INTERP */ 1685 1686 if (check_exceptions) { 1687 // check for pending exceptions (java_thread is set upon return) 1688 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 1689 #ifndef _LP64 1690 jump_cc(Assembler::notEqual, 1691 RuntimeAddress(StubRoutines::forward_exception_entry())); 1692 #else 1693 // This used to conditionally jump to forward_exception however it is 1694 // possible if we relocate that the branch will not reach. So we must jump 1695 // around so we can always reach 1696 1697 Label ok; 1698 jcc(Assembler::equal, ok); 1699 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1700 bind(ok); 1701 #endif // LP64 1702 } 1703 1704 // get oop result if there is one and reset the value in the thread 1705 if (oop_result->is_valid()) { 1706 get_vm_result(oop_result, java_thread); 1707 } 1708 } 1709 1710 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 1711 1712 // Calculate the value for last_Java_sp 1713 // somewhat subtle. call_VM does an intermediate call 1714 // which places a return address on the stack just under the 1715 // stack pointer as the user finsihed with it. This allows 1716 // use to retrieve last_Java_pc from last_Java_sp[-1]. 1717 // On 32bit we then have to push additional args on the stack to accomplish 1718 // the actual requested call. On 64bit call_VM only can use register args 1719 // so the only extra space is the return address that call_VM created. 1720 // This hopefully explains the calculations here. 1721 1722 #ifdef _LP64 1723 // We've pushed one address, correct last_Java_sp 1724 lea(rax, Address(rsp, wordSize)); 1725 #else 1726 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 1727 #endif // LP64 1728 1729 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 1730 1731 } 1732 1733 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 1734 call_VM_leaf_base(entry_point, number_of_arguments); 1735 } 1736 1737 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 1738 pass_arg0(this, arg_0); 1739 call_VM_leaf(entry_point, 1); 1740 } 1741 1742 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1743 1744 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1745 pass_arg1(this, arg_1); 1746 pass_arg0(this, arg_0); 1747 call_VM_leaf(entry_point, 2); 1748 } 1749 1750 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1751 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1752 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1753 pass_arg2(this, arg_2); 1754 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1755 pass_arg1(this, arg_1); 1756 pass_arg0(this, arg_0); 1757 call_VM_leaf(entry_point, 3); 1758 } 1759 1760 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 1761 pass_arg0(this, arg_0); 1762 MacroAssembler::call_VM_leaf_base(entry_point, 1); 1763 } 1764 1765 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1766 1767 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1768 pass_arg1(this, arg_1); 1769 pass_arg0(this, arg_0); 1770 MacroAssembler::call_VM_leaf_base(entry_point, 2); 1771 } 1772 1773 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1774 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1775 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1776 pass_arg2(this, arg_2); 1777 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1778 pass_arg1(this, arg_1); 1779 pass_arg0(this, arg_0); 1780 MacroAssembler::call_VM_leaf_base(entry_point, 3); 1781 } 1782 1783 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 1784 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 1785 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1786 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1787 pass_arg3(this, arg_3); 1788 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1789 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1790 pass_arg2(this, arg_2); 1791 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1792 pass_arg1(this, arg_1); 1793 pass_arg0(this, arg_0); 1794 MacroAssembler::call_VM_leaf_base(entry_point, 4); 1795 } 1796 1797 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 1798 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 1799 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 1800 verify_oop(oop_result, "broken oop in call_VM_base"); 1801 } 1802 1803 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 1804 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 1805 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); 1806 } 1807 1808 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 1809 } 1810 1811 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 1812 } 1813 1814 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 1815 if (reachable(src1)) { 1816 cmpl(as_Address(src1), imm); 1817 } else { 1818 lea(rscratch1, src1); 1819 cmpl(Address(rscratch1, 0), imm); 1820 } 1821 } 1822 1823 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 1824 assert(!src2.is_lval(), "use cmpptr"); 1825 if (reachable(src2)) { 1826 cmpl(src1, as_Address(src2)); 1827 } else { 1828 lea(rscratch1, src2); 1829 cmpl(src1, Address(rscratch1, 0)); 1830 } 1831 } 1832 1833 void MacroAssembler::cmp32(Register src1, int32_t imm) { 1834 Assembler::cmpl(src1, imm); 1835 } 1836 1837 void MacroAssembler::cmp32(Register src1, Address src2) { 1838 Assembler::cmpl(src1, src2); 1839 } 1840 1841 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1842 ucomisd(opr1, opr2); 1843 1844 Label L; 1845 if (unordered_is_less) { 1846 movl(dst, -1); 1847 jcc(Assembler::parity, L); 1848 jcc(Assembler::below , L); 1849 movl(dst, 0); 1850 jcc(Assembler::equal , L); 1851 increment(dst); 1852 } else { // unordered is greater 1853 movl(dst, 1); 1854 jcc(Assembler::parity, L); 1855 jcc(Assembler::above , L); 1856 movl(dst, 0); 1857 jcc(Assembler::equal , L); 1858 decrementl(dst); 1859 } 1860 bind(L); 1861 } 1862 1863 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1864 ucomiss(opr1, opr2); 1865 1866 Label L; 1867 if (unordered_is_less) { 1868 movl(dst, -1); 1869 jcc(Assembler::parity, L); 1870 jcc(Assembler::below , L); 1871 movl(dst, 0); 1872 jcc(Assembler::equal , L); 1873 increment(dst); 1874 } else { // unordered is greater 1875 movl(dst, 1); 1876 jcc(Assembler::parity, L); 1877 jcc(Assembler::above , L); 1878 movl(dst, 0); 1879 jcc(Assembler::equal , L); 1880 decrementl(dst); 1881 } 1882 bind(L); 1883 } 1884 1885 1886 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 1887 if (reachable(src1)) { 1888 cmpb(as_Address(src1), imm); 1889 } else { 1890 lea(rscratch1, src1); 1891 cmpb(Address(rscratch1, 0), imm); 1892 } 1893 } 1894 1895 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 1896 #ifdef _LP64 1897 if (src2.is_lval()) { 1898 movptr(rscratch1, src2); 1899 Assembler::cmpq(src1, rscratch1); 1900 } else if (reachable(src2)) { 1901 cmpq(src1, as_Address(src2)); 1902 } else { 1903 lea(rscratch1, src2); 1904 Assembler::cmpq(src1, Address(rscratch1, 0)); 1905 } 1906 #else 1907 if (src2.is_lval()) { 1908 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1909 } else { 1910 cmpl(src1, as_Address(src2)); 1911 } 1912 #endif // _LP64 1913 } 1914 1915 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 1916 assert(src2.is_lval(), "not a mem-mem compare"); 1917 #ifdef _LP64 1918 // moves src2's literal address 1919 movptr(rscratch1, src2); 1920 Assembler::cmpq(src1, rscratch1); 1921 #else 1922 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1923 #endif // _LP64 1924 } 1925 1926 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 1927 if (reachable(adr)) { 1928 if (os::is_MP()) 1929 lock(); 1930 cmpxchgptr(reg, as_Address(adr)); 1931 } else { 1932 lea(rscratch1, adr); 1933 if (os::is_MP()) 1934 lock(); 1935 cmpxchgptr(reg, Address(rscratch1, 0)); 1936 } 1937 } 1938 1939 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 1940 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 1941 } 1942 1943 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 1944 if (reachable(src)) { 1945 Assembler::comisd(dst, as_Address(src)); 1946 } else { 1947 lea(rscratch1, src); 1948 Assembler::comisd(dst, Address(rscratch1, 0)); 1949 } 1950 } 1951 1952 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 1953 if (reachable(src)) { 1954 Assembler::comiss(dst, as_Address(src)); 1955 } else { 1956 lea(rscratch1, src); 1957 Assembler::comiss(dst, Address(rscratch1, 0)); 1958 } 1959 } 1960 1961 1962 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 1963 Condition negated_cond = negate_condition(cond); 1964 Label L; 1965 jcc(negated_cond, L); 1966 atomic_incl(counter_addr); 1967 bind(L); 1968 } 1969 1970 int MacroAssembler::corrected_idivl(Register reg) { 1971 // Full implementation of Java idiv and irem; checks for 1972 // special case as described in JVM spec., p.243 & p.271. 1973 // The function returns the (pc) offset of the idivl 1974 // instruction - may be needed for implicit exceptions. 1975 // 1976 // normal case special case 1977 // 1978 // input : rax,: dividend min_int 1979 // reg: divisor (may not be rax,/rdx) -1 1980 // 1981 // output: rax,: quotient (= rax, idiv reg) min_int 1982 // rdx: remainder (= rax, irem reg) 0 1983 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 1984 const int min_int = 0x80000000; 1985 Label normal_case, special_case; 1986 1987 // check for special case 1988 cmpl(rax, min_int); 1989 jcc(Assembler::notEqual, normal_case); 1990 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 1991 cmpl(reg, -1); 1992 jcc(Assembler::equal, special_case); 1993 1994 // handle normal case 1995 bind(normal_case); 1996 cdql(); 1997 int idivl_offset = offset(); 1998 idivl(reg); 1999 2000 // normal and special case exit 2001 bind(special_case); 2002 2003 return idivl_offset; 2004 } 2005 2006 2007 2008 void MacroAssembler::decrementl(Register reg, int value) { 2009 if (value == min_jint) {subl(reg, value) ; return; } 2010 if (value < 0) { incrementl(reg, -value); return; } 2011 if (value == 0) { ; return; } 2012 if (value == 1 && UseIncDec) { decl(reg) ; return; } 2013 /* else */ { subl(reg, value) ; return; } 2014 } 2015 2016 void MacroAssembler::decrementl(Address dst, int value) { 2017 if (value == min_jint) {subl(dst, value) ; return; } 2018 if (value < 0) { incrementl(dst, -value); return; } 2019 if (value == 0) { ; return; } 2020 if (value == 1 && UseIncDec) { decl(dst) ; return; } 2021 /* else */ { subl(dst, value) ; return; } 2022 } 2023 2024 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 2025 assert (shift_value > 0, "illegal shift value"); 2026 Label _is_positive; 2027 testl (reg, reg); 2028 jcc (Assembler::positive, _is_positive); 2029 int offset = (1 << shift_value) - 1 ; 2030 2031 if (offset == 1) { 2032 incrementl(reg); 2033 } else { 2034 addl(reg, offset); 2035 } 2036 2037 bind (_is_positive); 2038 sarl(reg, shift_value); 2039 } 2040 2041 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 2042 if (reachable(src)) { 2043 Assembler::divsd(dst, as_Address(src)); 2044 } else { 2045 lea(rscratch1, src); 2046 Assembler::divsd(dst, Address(rscratch1, 0)); 2047 } 2048 } 2049 2050 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 2051 if (reachable(src)) { 2052 Assembler::divss(dst, as_Address(src)); 2053 } else { 2054 lea(rscratch1, src); 2055 Assembler::divss(dst, Address(rscratch1, 0)); 2056 } 2057 } 2058 2059 // !defined(COMPILER2) is because of stupid core builds 2060 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 2061 void MacroAssembler::empty_FPU_stack() { 2062 if (VM_Version::supports_mmx()) { 2063 emms(); 2064 } else { 2065 for (int i = 8; i-- > 0; ) ffree(i); 2066 } 2067 } 2068 #endif // !LP64 || C1 || !C2 2069 2070 2071 // Defines obj, preserves var_size_in_bytes 2072 void MacroAssembler::eden_allocate(Register obj, 2073 Register var_size_in_bytes, 2074 int con_size_in_bytes, 2075 Register t1, 2076 Label& slow_case) { 2077 assert(obj == rax, "obj must be in rax, for cmpxchg"); 2078 assert_different_registers(obj, var_size_in_bytes, t1); 2079 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 2080 jmp(slow_case); 2081 } else { 2082 Register end = t1; 2083 Label retry; 2084 bind(retry); 2085 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 2086 movptr(obj, heap_top); 2087 if (var_size_in_bytes == noreg) { 2088 lea(end, Address(obj, con_size_in_bytes)); 2089 } else { 2090 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 2091 } 2092 // if end < obj then we wrapped around => object too long => slow case 2093 cmpptr(end, obj); 2094 jcc(Assembler::below, slow_case); 2095 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 2096 jcc(Assembler::above, slow_case); 2097 // Compare obj with the top addr, and if still equal, store the new top addr in 2098 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 2099 // it otherwise. Use lock prefix for atomicity on MPs. 2100 locked_cmpxchgptr(end, heap_top); 2101 jcc(Assembler::notEqual, retry); 2102 } 2103 } 2104 2105 void MacroAssembler::enter() { 2106 push(rbp); 2107 mov(rbp, rsp); 2108 } 2109 2110 // A 5 byte nop that is safe for patching (see patch_verified_entry) 2111 void MacroAssembler::fat_nop() { 2112 if (UseAddressNop) { 2113 addr_nop_5(); 2114 } else { 2115 emit_byte(0x26); // es: 2116 emit_byte(0x2e); // cs: 2117 emit_byte(0x64); // fs: 2118 emit_byte(0x65); // gs: 2119 emit_byte(0x90); 2120 } 2121 } 2122 2123 void MacroAssembler::fcmp(Register tmp) { 2124 fcmp(tmp, 1, true, true); 2125 } 2126 2127 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 2128 assert(!pop_right || pop_left, "usage error"); 2129 if (VM_Version::supports_cmov()) { 2130 assert(tmp == noreg, "unneeded temp"); 2131 if (pop_left) { 2132 fucomip(index); 2133 } else { 2134 fucomi(index); 2135 } 2136 if (pop_right) { 2137 fpop(); 2138 } 2139 } else { 2140 assert(tmp != noreg, "need temp"); 2141 if (pop_left) { 2142 if (pop_right) { 2143 fcompp(); 2144 } else { 2145 fcomp(index); 2146 } 2147 } else { 2148 fcom(index); 2149 } 2150 // convert FPU condition into eflags condition via rax, 2151 save_rax(tmp); 2152 fwait(); fnstsw_ax(); 2153 sahf(); 2154 restore_rax(tmp); 2155 } 2156 // condition codes set as follows: 2157 // 2158 // CF (corresponds to C0) if x < y 2159 // PF (corresponds to C2) if unordered 2160 // ZF (corresponds to C3) if x = y 2161 } 2162 2163 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 2164 fcmp2int(dst, unordered_is_less, 1, true, true); 2165 } 2166 2167 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 2168 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 2169 Label L; 2170 if (unordered_is_less) { 2171 movl(dst, -1); 2172 jcc(Assembler::parity, L); 2173 jcc(Assembler::below , L); 2174 movl(dst, 0); 2175 jcc(Assembler::equal , L); 2176 increment(dst); 2177 } else { // unordered is greater 2178 movl(dst, 1); 2179 jcc(Assembler::parity, L); 2180 jcc(Assembler::above , L); 2181 movl(dst, 0); 2182 jcc(Assembler::equal , L); 2183 decrementl(dst); 2184 } 2185 bind(L); 2186 } 2187 2188 void MacroAssembler::fld_d(AddressLiteral src) { 2189 fld_d(as_Address(src)); 2190 } 2191 2192 void MacroAssembler::fld_s(AddressLiteral src) { 2193 fld_s(as_Address(src)); 2194 } 2195 2196 void MacroAssembler::fld_x(AddressLiteral src) { 2197 Assembler::fld_x(as_Address(src)); 2198 } 2199 2200 void MacroAssembler::fldcw(AddressLiteral src) { 2201 Assembler::fldcw(as_Address(src)); 2202 } 2203 2204 void MacroAssembler::pow_exp_core_encoding() { 2205 // kills rax, rcx, rdx 2206 subptr(rsp,sizeof(jdouble)); 2207 // computes 2^X. Stack: X ... 2208 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 2209 // keep it on the thread's stack to compute 2^int(X) later 2210 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 2211 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 2212 fld_s(0); // Stack: X X ... 2213 frndint(); // Stack: int(X) X ... 2214 fsuba(1); // Stack: int(X) X-int(X) ... 2215 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 2216 f2xm1(); // Stack: 2^(X-int(X))-1 ... 2217 fld1(); // Stack: 1 2^(X-int(X))-1 ... 2218 faddp(1); // Stack: 2^(X-int(X)) 2219 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 2220 // shift int(X)+1023 to exponent position. 2221 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 2222 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 2223 // values so detect them and set result to NaN. 2224 movl(rax,Address(rsp,0)); 2225 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 2226 addl(rax, 1023); 2227 movl(rdx,rax); 2228 shll(rax,20); 2229 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 2230 addl(rdx,1); 2231 // Check that 1 < int(X)+1023+1 < 2048 2232 // in 3 steps: 2233 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 2234 // 2- (int(X)+1023+1)&-2048 != 0 2235 // 3- (int(X)+1023+1)&-2048 != 1 2236 // Do 2- first because addl just updated the flags. 2237 cmov32(Assembler::equal,rax,rcx); 2238 cmpl(rdx,1); 2239 cmov32(Assembler::equal,rax,rcx); 2240 testl(rdx,rcx); 2241 cmov32(Assembler::notEqual,rax,rcx); 2242 movl(Address(rsp,4),rax); 2243 movl(Address(rsp,0),0); 2244 fmul_d(Address(rsp,0)); // Stack: 2^X ... 2245 addptr(rsp,sizeof(jdouble)); 2246 } 2247 2248 void MacroAssembler::increase_precision() { 2249 subptr(rsp, BytesPerWord); 2250 fnstcw(Address(rsp, 0)); 2251 movl(rax, Address(rsp, 0)); 2252 orl(rax, 0x300); 2253 push(rax); 2254 fldcw(Address(rsp, 0)); 2255 pop(rax); 2256 } 2257 2258 void MacroAssembler::restore_precision() { 2259 fldcw(Address(rsp, 0)); 2260 addptr(rsp, BytesPerWord); 2261 } 2262 2263 void MacroAssembler::fast_pow() { 2264 // computes X^Y = 2^(Y * log2(X)) 2265 // if fast computation is not possible, result is NaN. Requires 2266 // fallback from user of this macro. 2267 // increase precision for intermediate steps of the computation 2268 increase_precision(); 2269 fyl2x(); // Stack: (Y*log2(X)) ... 2270 pow_exp_core_encoding(); // Stack: exp(X) ... 2271 restore_precision(); 2272 } 2273 2274 void MacroAssembler::fast_exp() { 2275 // computes exp(X) = 2^(X * log2(e)) 2276 // if fast computation is not possible, result is NaN. Requires 2277 // fallback from user of this macro. 2278 // increase precision for intermediate steps of the computation 2279 increase_precision(); 2280 fldl2e(); // Stack: log2(e) X ... 2281 fmulp(1); // Stack: (X*log2(e)) ... 2282 pow_exp_core_encoding(); // Stack: exp(X) ... 2283 restore_precision(); 2284 } 2285 2286 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 2287 // kills rax, rcx, rdx 2288 // pow and exp needs 2 extra registers on the fpu stack. 2289 Label slow_case, done; 2290 Register tmp = noreg; 2291 if (!VM_Version::supports_cmov()) { 2292 // fcmp needs a temporary so preserve rdx, 2293 tmp = rdx; 2294 } 2295 Register tmp2 = rax; 2296 Register tmp3 = rcx; 2297 2298 if (is_exp) { 2299 // Stack: X 2300 fld_s(0); // duplicate argument for runtime call. Stack: X X 2301 fast_exp(); // Stack: exp(X) X 2302 fcmp(tmp, 0, false, false); // Stack: exp(X) X 2303 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 2304 jcc(Assembler::parity, slow_case); 2305 // get rid of duplicate argument. Stack: exp(X) 2306 if (num_fpu_regs_in_use > 0) { 2307 fxch(); 2308 fpop(); 2309 } else { 2310 ffree(1); 2311 } 2312 jmp(done); 2313 } else { 2314 // Stack: X Y 2315 Label x_negative, y_odd; 2316 2317 fldz(); // Stack: 0 X Y 2318 fcmp(tmp, 1, true, false); // Stack: X Y 2319 jcc(Assembler::above, x_negative); 2320 2321 // X >= 0 2322 2323 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 2324 fld_s(1); // Stack: X Y X Y 2325 fast_pow(); // Stack: X^Y X Y 2326 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 2327 // X^Y not equal to itself: X^Y is NaN go to slow case. 2328 jcc(Assembler::parity, slow_case); 2329 // get rid of duplicate arguments. Stack: X^Y 2330 if (num_fpu_regs_in_use > 0) { 2331 fxch(); fpop(); 2332 fxch(); fpop(); 2333 } else { 2334 ffree(2); 2335 ffree(1); 2336 } 2337 jmp(done); 2338 2339 // X <= 0 2340 bind(x_negative); 2341 2342 fld_s(1); // Stack: Y X Y 2343 frndint(); // Stack: int(Y) X Y 2344 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 2345 jcc(Assembler::notEqual, slow_case); 2346 2347 subptr(rsp, 8); 2348 2349 // For X^Y, when X < 0, Y has to be an integer and the final 2350 // result depends on whether it's odd or even. We just checked 2351 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 2352 // integer to test its parity. If int(Y) is huge and doesn't fit 2353 // in the 64 bit integer range, the integer indefinite value will 2354 // end up in the gp registers. Huge numbers are all even, the 2355 // integer indefinite number is even so it's fine. 2356 2357 #ifdef ASSERT 2358 // Let's check we don't end up with an integer indefinite number 2359 // when not expected. First test for huge numbers: check whether 2360 // int(Y)+1 == int(Y) which is true for very large numbers and 2361 // those are all even. A 64 bit integer is guaranteed to not 2362 // overflow for numbers where y+1 != y (when precision is set to 2363 // double precision). 2364 Label y_not_huge; 2365 2366 fld1(); // Stack: 1 int(Y) X Y 2367 fadd(1); // Stack: 1+int(Y) int(Y) X Y 2368 2369 #ifdef _LP64 2370 // trip to memory to force the precision down from double extended 2371 // precision 2372 fstp_d(Address(rsp, 0)); 2373 fld_d(Address(rsp, 0)); 2374 #endif 2375 2376 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 2377 #endif 2378 2379 // move int(Y) as 64 bit integer to thread's stack 2380 fistp_d(Address(rsp,0)); // Stack: X Y 2381 2382 #ifdef ASSERT 2383 jcc(Assembler::notEqual, y_not_huge); 2384 2385 // Y is huge so we know it's even. It may not fit in a 64 bit 2386 // integer and we don't want the debug code below to see the 2387 // integer indefinite value so overwrite int(Y) on the thread's 2388 // stack with 0. 2389 movl(Address(rsp, 0), 0); 2390 movl(Address(rsp, 4), 0); 2391 2392 bind(y_not_huge); 2393 #endif 2394 2395 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 2396 fld_s(1); // Stack: X Y X Y 2397 fabs(); // Stack: abs(X) Y X Y 2398 fast_pow(); // Stack: abs(X)^Y X Y 2399 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 2400 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 2401 2402 pop(tmp2); 2403 NOT_LP64(pop(tmp3)); 2404 jcc(Assembler::parity, slow_case); 2405 2406 #ifdef ASSERT 2407 // Check that int(Y) is not integer indefinite value (int 2408 // overflow). Shouldn't happen because for values that would 2409 // overflow, 1+int(Y)==Y which was tested earlier. 2410 #ifndef _LP64 2411 { 2412 Label integer; 2413 testl(tmp2, tmp2); 2414 jcc(Assembler::notZero, integer); 2415 cmpl(tmp3, 0x80000000); 2416 jcc(Assembler::notZero, integer); 2417 STOP("integer indefinite value shouldn't be seen here"); 2418 bind(integer); 2419 } 2420 #else 2421 { 2422 Label integer; 2423 mov(tmp3, tmp2); // preserve tmp2 for parity check below 2424 shlq(tmp3, 1); 2425 jcc(Assembler::carryClear, integer); 2426 jcc(Assembler::notZero, integer); 2427 STOP("integer indefinite value shouldn't be seen here"); 2428 bind(integer); 2429 } 2430 #endif 2431 #endif 2432 2433 // get rid of duplicate arguments. Stack: X^Y 2434 if (num_fpu_regs_in_use > 0) { 2435 fxch(); fpop(); 2436 fxch(); fpop(); 2437 } else { 2438 ffree(2); 2439 ffree(1); 2440 } 2441 2442 testl(tmp2, 1); 2443 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 2444 // X <= 0, Y even: X^Y = -abs(X)^Y 2445 2446 fchs(); // Stack: -abs(X)^Y Y 2447 jmp(done); 2448 } 2449 2450 // slow case: runtime call 2451 bind(slow_case); 2452 2453 fpop(); // pop incorrect result or int(Y) 2454 2455 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2456 is_exp ? 1 : 2, num_fpu_regs_in_use); 2457 2458 // Come here with result in F-TOS 2459 bind(done); 2460 } 2461 2462 void MacroAssembler::fpop() { 2463 ffree(); 2464 fincstp(); 2465 } 2466 2467 void MacroAssembler::fremr(Register tmp) { 2468 save_rax(tmp); 2469 { Label L; 2470 bind(L); 2471 fprem(); 2472 fwait(); fnstsw_ax(); 2473 #ifdef _LP64 2474 testl(rax, 0x400); 2475 jcc(Assembler::notEqual, L); 2476 #else 2477 sahf(); 2478 jcc(Assembler::parity, L); 2479 #endif // _LP64 2480 } 2481 restore_rax(tmp); 2482 // Result is in ST0. 2483 // Note: fxch & fpop to get rid of ST1 2484 // (otherwise FPU stack could overflow eventually) 2485 fxch(1); 2486 fpop(); 2487 } 2488 2489 2490 void MacroAssembler::incrementl(AddressLiteral dst) { 2491 if (reachable(dst)) { 2492 incrementl(as_Address(dst)); 2493 } else { 2494 lea(rscratch1, dst); 2495 incrementl(Address(rscratch1, 0)); 2496 } 2497 } 2498 2499 void MacroAssembler::incrementl(ArrayAddress dst) { 2500 incrementl(as_Address(dst)); 2501 } 2502 2503 void MacroAssembler::incrementl(Register reg, int value) { 2504 if (value == min_jint) {addl(reg, value) ; return; } 2505 if (value < 0) { decrementl(reg, -value); return; } 2506 if (value == 0) { ; return; } 2507 if (value == 1 && UseIncDec) { incl(reg) ; return; } 2508 /* else */ { addl(reg, value) ; return; } 2509 } 2510 2511 void MacroAssembler::incrementl(Address dst, int value) { 2512 if (value == min_jint) {addl(dst, value) ; return; } 2513 if (value < 0) { decrementl(dst, -value); return; } 2514 if (value == 0) { ; return; } 2515 if (value == 1 && UseIncDec) { incl(dst) ; return; } 2516 /* else */ { addl(dst, value) ; return; } 2517 } 2518 2519 void MacroAssembler::jump(AddressLiteral dst) { 2520 if (reachable(dst)) { 2521 jmp_literal(dst.target(), dst.rspec()); 2522 } else { 2523 lea(rscratch1, dst); 2524 jmp(rscratch1); 2525 } 2526 } 2527 2528 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 2529 if (reachable(dst)) { 2530 InstructionMark im(this); 2531 relocate(dst.reloc()); 2532 const int short_size = 2; 2533 const int long_size = 6; 2534 int offs = (intptr_t)dst.target() - ((intptr_t)pc()); 2535 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 2536 // 0111 tttn #8-bit disp 2537 emit_byte(0x70 | cc); 2538 emit_byte((offs - short_size) & 0xFF); 2539 } else { 2540 // 0000 1111 1000 tttn #32-bit disp 2541 emit_byte(0x0F); 2542 emit_byte(0x80 | cc); 2543 emit_long(offs - long_size); 2544 } 2545 } else { 2546 #ifdef ASSERT 2547 warning("reversing conditional branch"); 2548 #endif /* ASSERT */ 2549 Label skip; 2550 jccb(reverse[cc], skip); 2551 lea(rscratch1, dst); 2552 Assembler::jmp(rscratch1); 2553 bind(skip); 2554 } 2555 } 2556 2557 void MacroAssembler::ldmxcsr(AddressLiteral src) { 2558 if (reachable(src)) { 2559 Assembler::ldmxcsr(as_Address(src)); 2560 } else { 2561 lea(rscratch1, src); 2562 Assembler::ldmxcsr(Address(rscratch1, 0)); 2563 } 2564 } 2565 2566 int MacroAssembler::load_signed_byte(Register dst, Address src) { 2567 int off; 2568 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2569 off = offset(); 2570 movsbl(dst, src); // movsxb 2571 } else { 2572 off = load_unsigned_byte(dst, src); 2573 shll(dst, 24); 2574 sarl(dst, 24); 2575 } 2576 return off; 2577 } 2578 2579 // Note: load_signed_short used to be called load_signed_word. 2580 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 2581 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 2582 // The term "word" in HotSpot means a 32- or 64-bit machine word. 2583 int MacroAssembler::load_signed_short(Register dst, Address src) { 2584 int off; 2585 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2586 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 2587 // version but this is what 64bit has always done. This seems to imply 2588 // that users are only using 32bits worth. 2589 off = offset(); 2590 movswl(dst, src); // movsxw 2591 } else { 2592 off = load_unsigned_short(dst, src); 2593 shll(dst, 16); 2594 sarl(dst, 16); 2595 } 2596 return off; 2597 } 2598 2599 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 2600 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 2601 // and "3.9 Partial Register Penalties", p. 22). 2602 int off; 2603 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 2604 off = offset(); 2605 movzbl(dst, src); // movzxb 2606 } else { 2607 xorl(dst, dst); 2608 off = offset(); 2609 movb(dst, src); 2610 } 2611 return off; 2612 } 2613 2614 // Note: load_unsigned_short used to be called load_unsigned_word. 2615 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 2616 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 2617 // and "3.9 Partial Register Penalties", p. 22). 2618 int off; 2619 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 2620 off = offset(); 2621 movzwl(dst, src); // movzxw 2622 } else { 2623 xorl(dst, dst); 2624 off = offset(); 2625 movw(dst, src); 2626 } 2627 return off; 2628 } 2629 2630 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 2631 switch (size_in_bytes) { 2632 #ifndef _LP64 2633 case 8: 2634 assert(dst2 != noreg, "second dest register required"); 2635 movl(dst, src); 2636 movl(dst2, src.plus_disp(BytesPerInt)); 2637 break; 2638 #else 2639 case 8: movq(dst, src); break; 2640 #endif 2641 case 4: movl(dst, src); break; 2642 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 2643 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 2644 default: ShouldNotReachHere(); 2645 } 2646 } 2647 2648 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 2649 switch (size_in_bytes) { 2650 #ifndef _LP64 2651 case 8: 2652 assert(src2 != noreg, "second source register required"); 2653 movl(dst, src); 2654 movl(dst.plus_disp(BytesPerInt), src2); 2655 break; 2656 #else 2657 case 8: movq(dst, src); break; 2658 #endif 2659 case 4: movl(dst, src); break; 2660 case 2: movw(dst, src); break; 2661 case 1: movb(dst, src); break; 2662 default: ShouldNotReachHere(); 2663 } 2664 } 2665 2666 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 2667 if (reachable(dst)) { 2668 movl(as_Address(dst), src); 2669 } else { 2670 lea(rscratch1, dst); 2671 movl(Address(rscratch1, 0), src); 2672 } 2673 } 2674 2675 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 2676 if (reachable(src)) { 2677 movl(dst, as_Address(src)); 2678 } else { 2679 lea(rscratch1, src); 2680 movl(dst, Address(rscratch1, 0)); 2681 } 2682 } 2683 2684 // C++ bool manipulation 2685 2686 void MacroAssembler::movbool(Register dst, Address src) { 2687 if(sizeof(bool) == 1) 2688 movb(dst, src); 2689 else if(sizeof(bool) == 2) 2690 movw(dst, src); 2691 else if(sizeof(bool) == 4) 2692 movl(dst, src); 2693 else 2694 // unsupported 2695 ShouldNotReachHere(); 2696 } 2697 2698 void MacroAssembler::movbool(Address dst, bool boolconst) { 2699 if(sizeof(bool) == 1) 2700 movb(dst, (int) boolconst); 2701 else if(sizeof(bool) == 2) 2702 movw(dst, (int) boolconst); 2703 else if(sizeof(bool) == 4) 2704 movl(dst, (int) boolconst); 2705 else 2706 // unsupported 2707 ShouldNotReachHere(); 2708 } 2709 2710 void MacroAssembler::movbool(Address dst, Register src) { 2711 if(sizeof(bool) == 1) 2712 movb(dst, src); 2713 else if(sizeof(bool) == 2) 2714 movw(dst, src); 2715 else if(sizeof(bool) == 4) 2716 movl(dst, src); 2717 else 2718 // unsupported 2719 ShouldNotReachHere(); 2720 } 2721 2722 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 2723 movb(as_Address(dst), src); 2724 } 2725 2726 void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { 2727 if (reachable(src)) { 2728 movdl(dst, as_Address(src)); 2729 } else { 2730 lea(rscratch1, src); 2731 movdl(dst, Address(rscratch1, 0)); 2732 } 2733 } 2734 2735 void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { 2736 if (reachable(src)) { 2737 movq(dst, as_Address(src)); 2738 } else { 2739 lea(rscratch1, src); 2740 movq(dst, Address(rscratch1, 0)); 2741 } 2742 } 2743 2744 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 2745 if (reachable(src)) { 2746 if (UseXmmLoadAndClearUpper) { 2747 movsd (dst, as_Address(src)); 2748 } else { 2749 movlpd(dst, as_Address(src)); 2750 } 2751 } else { 2752 lea(rscratch1, src); 2753 if (UseXmmLoadAndClearUpper) { 2754 movsd (dst, Address(rscratch1, 0)); 2755 } else { 2756 movlpd(dst, Address(rscratch1, 0)); 2757 } 2758 } 2759 } 2760 2761 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 2762 if (reachable(src)) { 2763 movss(dst, as_Address(src)); 2764 } else { 2765 lea(rscratch1, src); 2766 movss(dst, Address(rscratch1, 0)); 2767 } 2768 } 2769 2770 void MacroAssembler::movptr(Register dst, Register src) { 2771 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2772 } 2773 2774 void MacroAssembler::movptr(Register dst, Address src) { 2775 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2776 } 2777 2778 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 2779 void MacroAssembler::movptr(Register dst, intptr_t src) { 2780 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 2781 } 2782 2783 void MacroAssembler::movptr(Address dst, Register src) { 2784 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2785 } 2786 2787 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { 2788 if (reachable(src)) { 2789 Assembler::movdqu(dst, as_Address(src)); 2790 } else { 2791 lea(rscratch1, src); 2792 Assembler::movdqu(dst, Address(rscratch1, 0)); 2793 } 2794 } 2795 2796 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 2797 if (reachable(src)) { 2798 Assembler::movsd(dst, as_Address(src)); 2799 } else { 2800 lea(rscratch1, src); 2801 Assembler::movsd(dst, Address(rscratch1, 0)); 2802 } 2803 } 2804 2805 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 2806 if (reachable(src)) { 2807 Assembler::movss(dst, as_Address(src)); 2808 } else { 2809 lea(rscratch1, src); 2810 Assembler::movss(dst, Address(rscratch1, 0)); 2811 } 2812 } 2813 2814 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 2815 if (reachable(src)) { 2816 Assembler::mulsd(dst, as_Address(src)); 2817 } else { 2818 lea(rscratch1, src); 2819 Assembler::mulsd(dst, Address(rscratch1, 0)); 2820 } 2821 } 2822 2823 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 2824 if (reachable(src)) { 2825 Assembler::mulss(dst, as_Address(src)); 2826 } else { 2827 lea(rscratch1, src); 2828 Assembler::mulss(dst, Address(rscratch1, 0)); 2829 } 2830 } 2831 2832 void MacroAssembler::null_check(Register reg, int offset) { 2833 if (needs_explicit_null_check(offset)) { 2834 // provoke OS NULL exception if reg = NULL by 2835 // accessing M[reg] w/o changing any (non-CC) registers 2836 // NOTE: cmpl is plenty here to provoke a segv 2837 cmpptr(rax, Address(reg, 0)); 2838 // Note: should probably use testl(rax, Address(reg, 0)); 2839 // may be shorter code (however, this version of 2840 // testl needs to be implemented first) 2841 } else { 2842 // nothing to do, (later) access of M[reg + offset] 2843 // will provoke OS NULL exception if reg = NULL 2844 } 2845 } 2846 2847 void MacroAssembler::os_breakpoint() { 2848 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 2849 // (e.g., MSVC can't call ps() otherwise) 2850 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 2851 } 2852 2853 void MacroAssembler::pop_CPU_state() { 2854 pop_FPU_state(); 2855 pop_IU_state(); 2856 } 2857 2858 void MacroAssembler::pop_FPU_state() { 2859 NOT_LP64(frstor(Address(rsp, 0));) 2860 LP64_ONLY(fxrstor(Address(rsp, 0));) 2861 addptr(rsp, FPUStateSizeInWords * wordSize); 2862 } 2863 2864 void MacroAssembler::pop_IU_state() { 2865 popa(); 2866 LP64_ONLY(addq(rsp, 8)); 2867 popf(); 2868 } 2869 2870 // Save Integer and Float state 2871 // Warning: Stack must be 16 byte aligned (64bit) 2872 void MacroAssembler::push_CPU_state() { 2873 push_IU_state(); 2874 push_FPU_state(); 2875 } 2876 2877 void MacroAssembler::push_FPU_state() { 2878 subptr(rsp, FPUStateSizeInWords * wordSize); 2879 #ifndef _LP64 2880 fnsave(Address(rsp, 0)); 2881 fwait(); 2882 #else 2883 fxsave(Address(rsp, 0)); 2884 #endif // LP64 2885 } 2886 2887 void MacroAssembler::push_IU_state() { 2888 // Push flags first because pusha kills them 2889 pushf(); 2890 // Make sure rsp stays 16-byte aligned 2891 LP64_ONLY(subq(rsp, 8)); 2892 pusha(); 2893 } 2894 2895 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 2896 // determine java_thread register 2897 if (!java_thread->is_valid()) { 2898 java_thread = rdi; 2899 get_thread(java_thread); 2900 } 2901 // we must set sp to zero to clear frame 2902 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 2903 if (clear_fp) { 2904 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 2905 } 2906 2907 if (clear_pc) 2908 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 2909 2910 } 2911 2912 void MacroAssembler::restore_rax(Register tmp) { 2913 if (tmp == noreg) pop(rax); 2914 else if (tmp != rax) mov(rax, tmp); 2915 } 2916 2917 void MacroAssembler::round_to(Register reg, int modulus) { 2918 addptr(reg, modulus - 1); 2919 andptr(reg, -modulus); 2920 } 2921 2922 void MacroAssembler::save_rax(Register tmp) { 2923 if (tmp == noreg) push(rax); 2924 else if (tmp != rax) mov(tmp, rax); 2925 } 2926 2927 // Write serialization page so VM thread can do a pseudo remote membar. 2928 // We use the current thread pointer to calculate a thread specific 2929 // offset to write to within the page. This minimizes bus traffic 2930 // due to cache line collision. 2931 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 2932 movl(tmp, thread); 2933 shrl(tmp, os::get_serialize_page_shift_count()); 2934 andl(tmp, (os::vm_page_size() - sizeof(int))); 2935 2936 Address index(noreg, tmp, Address::times_1); 2937 ExternalAddress page(os::get_memory_serialize_page()); 2938 2939 // Size of store must match masking code above 2940 movl(as_Address(ArrayAddress(page, index)), tmp); 2941 } 2942 2943 // Calls to C land 2944 // 2945 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 2946 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 2947 // has to be reset to 0. This is required to allow proper stack traversal. 2948 void MacroAssembler::set_last_Java_frame(Register java_thread, 2949 Register last_java_sp, 2950 Register last_java_fp, 2951 address last_java_pc) { 2952 // determine java_thread register 2953 if (!java_thread->is_valid()) { 2954 java_thread = rdi; 2955 get_thread(java_thread); 2956 } 2957 // determine last_java_sp register 2958 if (!last_java_sp->is_valid()) { 2959 last_java_sp = rsp; 2960 } 2961 2962 // last_java_fp is optional 2963 2964 if (last_java_fp->is_valid()) { 2965 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 2966 } 2967 2968 // last_java_pc is optional 2969 2970 if (last_java_pc != NULL) { 2971 lea(Address(java_thread, 2972 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 2973 InternalAddress(last_java_pc)); 2974 2975 } 2976 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 2977 } 2978 2979 void MacroAssembler::shlptr(Register dst, int imm8) { 2980 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 2981 } 2982 2983 void MacroAssembler::shrptr(Register dst, int imm8) { 2984 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 2985 } 2986 2987 void MacroAssembler::sign_extend_byte(Register reg) { 2988 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 2989 movsbl(reg, reg); // movsxb 2990 } else { 2991 shll(reg, 24); 2992 sarl(reg, 24); 2993 } 2994 } 2995 2996 void MacroAssembler::sign_extend_short(Register reg) { 2997 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2998 movswl(reg, reg); // movsxw 2999 } else { 3000 shll(reg, 16); 3001 sarl(reg, 16); 3002 } 3003 } 3004 3005 void MacroAssembler::testl(Register dst, AddressLiteral src) { 3006 assert(reachable(src), "Address should be reachable"); 3007 testl(dst, as_Address(src)); 3008 } 3009 3010 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 3011 if (reachable(src)) { 3012 Assembler::sqrtsd(dst, as_Address(src)); 3013 } else { 3014 lea(rscratch1, src); 3015 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 3016 } 3017 } 3018 3019 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 3020 if (reachable(src)) { 3021 Assembler::sqrtss(dst, as_Address(src)); 3022 } else { 3023 lea(rscratch1, src); 3024 Assembler::sqrtss(dst, Address(rscratch1, 0)); 3025 } 3026 } 3027 3028 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 3029 if (reachable(src)) { 3030 Assembler::subsd(dst, as_Address(src)); 3031 } else { 3032 lea(rscratch1, src); 3033 Assembler::subsd(dst, Address(rscratch1, 0)); 3034 } 3035 } 3036 3037 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 3038 if (reachable(src)) { 3039 Assembler::subss(dst, as_Address(src)); 3040 } else { 3041 lea(rscratch1, src); 3042 Assembler::subss(dst, Address(rscratch1, 0)); 3043 } 3044 } 3045 3046 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 3047 if (reachable(src)) { 3048 Assembler::ucomisd(dst, as_Address(src)); 3049 } else { 3050 lea(rscratch1, src); 3051 Assembler::ucomisd(dst, Address(rscratch1, 0)); 3052 } 3053 } 3054 3055 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 3056 if (reachable(src)) { 3057 Assembler::ucomiss(dst, as_Address(src)); 3058 } else { 3059 lea(rscratch1, src); 3060 Assembler::ucomiss(dst, Address(rscratch1, 0)); 3061 } 3062 } 3063 3064 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 3065 // Used in sign-bit flipping with aligned address. 3066 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 3067 if (reachable(src)) { 3068 Assembler::xorpd(dst, as_Address(src)); 3069 } else { 3070 lea(rscratch1, src); 3071 Assembler::xorpd(dst, Address(rscratch1, 0)); 3072 } 3073 } 3074 3075 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 3076 // Used in sign-bit flipping with aligned address. 3077 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 3078 if (reachable(src)) { 3079 Assembler::xorps(dst, as_Address(src)); 3080 } else { 3081 lea(rscratch1, src); 3082 Assembler::xorps(dst, Address(rscratch1, 0)); 3083 } 3084 } 3085 3086 void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { 3087 // Used in sign-bit flipping with aligned address. 3088 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 3089 if (reachable(src)) { 3090 Assembler::pshufb(dst, as_Address(src)); 3091 } else { 3092 lea(rscratch1, src); 3093 Assembler::pshufb(dst, Address(rscratch1, 0)); 3094 } 3095 } 3096 3097 // AVX 3-operands instructions 3098 3099 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3100 if (reachable(src)) { 3101 vaddsd(dst, nds, as_Address(src)); 3102 } else { 3103 lea(rscratch1, src); 3104 vaddsd(dst, nds, Address(rscratch1, 0)); 3105 } 3106 } 3107 3108 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3109 if (reachable(src)) { 3110 vaddss(dst, nds, as_Address(src)); 3111 } else { 3112 lea(rscratch1, src); 3113 vaddss(dst, nds, Address(rscratch1, 0)); 3114 } 3115 } 3116 3117 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3118 if (reachable(src)) { 3119 vandpd(dst, nds, as_Address(src), vector256); 3120 } else { 3121 lea(rscratch1, src); 3122 vandpd(dst, nds, Address(rscratch1, 0), vector256); 3123 } 3124 } 3125 3126 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3127 if (reachable(src)) { 3128 vandps(dst, nds, as_Address(src), vector256); 3129 } else { 3130 lea(rscratch1, src); 3131 vandps(dst, nds, Address(rscratch1, 0), vector256); 3132 } 3133 } 3134 3135 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3136 if (reachable(src)) { 3137 vdivsd(dst, nds, as_Address(src)); 3138 } else { 3139 lea(rscratch1, src); 3140 vdivsd(dst, nds, Address(rscratch1, 0)); 3141 } 3142 } 3143 3144 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3145 if (reachable(src)) { 3146 vdivss(dst, nds, as_Address(src)); 3147 } else { 3148 lea(rscratch1, src); 3149 vdivss(dst, nds, Address(rscratch1, 0)); 3150 } 3151 } 3152 3153 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3154 if (reachable(src)) { 3155 vmulsd(dst, nds, as_Address(src)); 3156 } else { 3157 lea(rscratch1, src); 3158 vmulsd(dst, nds, Address(rscratch1, 0)); 3159 } 3160 } 3161 3162 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3163 if (reachable(src)) { 3164 vmulss(dst, nds, as_Address(src)); 3165 } else { 3166 lea(rscratch1, src); 3167 vmulss(dst, nds, Address(rscratch1, 0)); 3168 } 3169 } 3170 3171 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3172 if (reachable(src)) { 3173 vsubsd(dst, nds, as_Address(src)); 3174 } else { 3175 lea(rscratch1, src); 3176 vsubsd(dst, nds, Address(rscratch1, 0)); 3177 } 3178 } 3179 3180 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3181 if (reachable(src)) { 3182 vsubss(dst, nds, as_Address(src)); 3183 } else { 3184 lea(rscratch1, src); 3185 vsubss(dst, nds, Address(rscratch1, 0)); 3186 } 3187 } 3188 3189 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3190 if (reachable(src)) { 3191 vxorpd(dst, nds, as_Address(src), vector256); 3192 } else { 3193 lea(rscratch1, src); 3194 vxorpd(dst, nds, Address(rscratch1, 0), vector256); 3195 } 3196 } 3197 3198 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3199 if (reachable(src)) { 3200 vxorps(dst, nds, as_Address(src), vector256); 3201 } else { 3202 lea(rscratch1, src); 3203 vxorps(dst, nds, Address(rscratch1, 0), vector256); 3204 } 3205 } 3206 3207 3208 ////////////////////////////////////////////////////////////////////////////////// 3209 #ifndef SERIALGC 3210 3211 void MacroAssembler::g1_write_barrier_pre(Register obj, 3212 Register pre_val, 3213 Register thread, 3214 Register tmp, 3215 bool tosca_live, 3216 bool expand_call) { 3217 3218 // If expand_call is true then we expand the call_VM_leaf macro 3219 // directly to skip generating the check by 3220 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 3221 3222 #ifdef _LP64 3223 assert(thread == r15_thread, "must be"); 3224 #endif // _LP64 3225 3226 Label done; 3227 Label runtime; 3228 3229 assert(pre_val != noreg, "check this code"); 3230 3231 if (obj != noreg) { 3232 assert_different_registers(obj, pre_val, tmp); 3233 assert(pre_val != rax, "check this code"); 3234 } 3235 3236 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3237 PtrQueue::byte_offset_of_active())); 3238 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3239 PtrQueue::byte_offset_of_index())); 3240 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3241 PtrQueue::byte_offset_of_buf())); 3242 3243 3244 // Is marking active? 3245 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 3246 cmpl(in_progress, 0); 3247 } else { 3248 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 3249 cmpb(in_progress, 0); 3250 } 3251 jcc(Assembler::equal, done); 3252 3253 // Do we need to load the previous value? 3254 if (obj != noreg) { 3255 load_heap_oop(pre_val, Address(obj, 0)); 3256 } 3257 3258 // Is the previous value null? 3259 cmpptr(pre_val, (int32_t) NULL_WORD); 3260 jcc(Assembler::equal, done); 3261 3262 // Can we store original value in the thread's buffer? 3263 // Is index == 0? 3264 // (The index field is typed as size_t.) 3265 3266 movptr(tmp, index); // tmp := *index_adr 3267 cmpptr(tmp, 0); // tmp == 0? 3268 jcc(Assembler::equal, runtime); // If yes, goto runtime 3269 3270 subptr(tmp, wordSize); // tmp := tmp - wordSize 3271 movptr(index, tmp); // *index_adr := tmp 3272 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 3273 3274 // Record the previous value 3275 movptr(Address(tmp, 0), pre_val); 3276 jmp(done); 3277 3278 bind(runtime); 3279 // save the live input values 3280 if(tosca_live) push(rax); 3281 3282 if (obj != noreg && obj != rax) 3283 push(obj); 3284 3285 if (pre_val != rax) 3286 push(pre_val); 3287 3288 // Calling the runtime using the regular call_VM_leaf mechanism generates 3289 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 3290 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 3291 // 3292 // If we care generating the pre-barrier without a frame (e.g. in the 3293 // intrinsified Reference.get() routine) then ebp might be pointing to 3294 // the caller frame and so this check will most likely fail at runtime. 3295 // 3296 // Expanding the call directly bypasses the generation of the check. 3297 // So when we do not have have a full interpreter frame on the stack 3298 // expand_call should be passed true. 3299 3300 NOT_LP64( push(thread); ) 3301 3302 if (expand_call) { 3303 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 3304 pass_arg1(this, thread); 3305 pass_arg0(this, pre_val); 3306 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 3307 } else { 3308 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 3309 } 3310 3311 NOT_LP64( pop(thread); ) 3312 3313 // save the live input values 3314 if (pre_val != rax) 3315 pop(pre_val); 3316 3317 if (obj != noreg && obj != rax) 3318 pop(obj); 3319 3320 if(tosca_live) pop(rax); 3321 3322 bind(done); 3323 } 3324 3325 void MacroAssembler::g1_write_barrier_post(Register store_addr, 3326 Register new_val, 3327 Register thread, 3328 Register tmp, 3329 Register tmp2) { 3330 #ifdef _LP64 3331 assert(thread == r15_thread, "must be"); 3332 #endif // _LP64 3333 3334 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3335 PtrQueue::byte_offset_of_index())); 3336 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3337 PtrQueue::byte_offset_of_buf())); 3338 3339 BarrierSet* bs = Universe::heap()->barrier_set(); 3340 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 3341 Label done; 3342 Label runtime; 3343 3344 // Does store cross heap regions? 3345 3346 movptr(tmp, store_addr); 3347 xorptr(tmp, new_val); 3348 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 3349 jcc(Assembler::equal, done); 3350 3351 // crosses regions, storing NULL? 3352 3353 cmpptr(new_val, (int32_t) NULL_WORD); 3354 jcc(Assembler::equal, done); 3355 3356 // storing region crossing non-NULL, is card already dirty? 3357 3358 ExternalAddress cardtable((address) ct->byte_map_base); 3359 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3360 #ifdef _LP64 3361 const Register card_addr = tmp; 3362 3363 movq(card_addr, store_addr); 3364 shrq(card_addr, CardTableModRefBS::card_shift); 3365 3366 lea(tmp2, cardtable); 3367 3368 // get the address of the card 3369 addq(card_addr, tmp2); 3370 #else 3371 const Register card_index = tmp; 3372 3373 movl(card_index, store_addr); 3374 shrl(card_index, CardTableModRefBS::card_shift); 3375 3376 Address index(noreg, card_index, Address::times_1); 3377 const Register card_addr = tmp; 3378 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 3379 #endif 3380 cmpb(Address(card_addr, 0), 0); 3381 jcc(Assembler::equal, done); 3382 3383 // storing a region crossing, non-NULL oop, card is clean. 3384 // dirty card and log. 3385 3386 movb(Address(card_addr, 0), 0); 3387 3388 cmpl(queue_index, 0); 3389 jcc(Assembler::equal, runtime); 3390 subl(queue_index, wordSize); 3391 movptr(tmp2, buffer); 3392 #ifdef _LP64 3393 movslq(rscratch1, queue_index); 3394 addq(tmp2, rscratch1); 3395 movq(Address(tmp2, 0), card_addr); 3396 #else 3397 addl(tmp2, queue_index); 3398 movl(Address(tmp2, 0), card_index); 3399 #endif 3400 jmp(done); 3401 3402 bind(runtime); 3403 // save the live input values 3404 push(store_addr); 3405 push(new_val); 3406 #ifdef _LP64 3407 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 3408 #else 3409 push(thread); 3410 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 3411 pop(thread); 3412 #endif 3413 pop(new_val); 3414 pop(store_addr); 3415 3416 bind(done); 3417 } 3418 3419 #endif // SERIALGC 3420 ////////////////////////////////////////////////////////////////////////////////// 3421 3422 3423 void MacroAssembler::store_check(Register obj) { 3424 // Does a store check for the oop in register obj. The content of 3425 // register obj is destroyed afterwards. 3426 store_check_part_1(obj); 3427 store_check_part_2(obj); 3428 } 3429 3430 void MacroAssembler::store_check(Register obj, Address dst) { 3431 store_check(obj); 3432 } 3433 3434 3435 // split the store check operation so that other instructions can be scheduled inbetween 3436 void MacroAssembler::store_check_part_1(Register obj) { 3437 BarrierSet* bs = Universe::heap()->barrier_set(); 3438 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 3439 shrptr(obj, CardTableModRefBS::card_shift); 3440 } 3441 3442 void MacroAssembler::store_check_part_2(Register obj) { 3443 BarrierSet* bs = Universe::heap()->barrier_set(); 3444 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 3445 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 3446 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3447 3448 // The calculation for byte_map_base is as follows: 3449 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 3450 // So this essentially converts an address to a displacement and 3451 // it will never need to be relocated. On 64bit however the value may be too 3452 // large for a 32bit displacement 3453 3454 intptr_t disp = (intptr_t) ct->byte_map_base; 3455 if (is_simm32(disp)) { 3456 Address cardtable(noreg, obj, Address::times_1, disp); 3457 movb(cardtable, 0); 3458 } else { 3459 // By doing it as an ExternalAddress disp could be converted to a rip-relative 3460 // displacement and done in a single instruction given favorable mapping and 3461 // a smarter version of as_Address. Worst case it is two instructions which 3462 // is no worse off then loading disp into a register and doing as a simple 3463 // Address() as above. 3464 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 3465 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 3466 // in some cases we'll get a single instruction version. 3467 3468 ExternalAddress cardtable((address)disp); 3469 Address index(noreg, obj, Address::times_1); 3470 movb(as_Address(ArrayAddress(cardtable, index)), 0); 3471 } 3472 } 3473 3474 void MacroAssembler::subptr(Register dst, int32_t imm32) { 3475 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 3476 } 3477 3478 // Force generation of a 4 byte immediate value even if it fits into 8bit 3479 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 3480 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 3481 } 3482 3483 void MacroAssembler::subptr(Register dst, Register src) { 3484 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 3485 } 3486 3487 // C++ bool manipulation 3488 void MacroAssembler::testbool(Register dst) { 3489 if(sizeof(bool) == 1) 3490 testb(dst, 0xff); 3491 else if(sizeof(bool) == 2) { 3492 // testw implementation needed for two byte bools 3493 ShouldNotReachHere(); 3494 } else if(sizeof(bool) == 4) 3495 testl(dst, dst); 3496 else 3497 // unsupported 3498 ShouldNotReachHere(); 3499 } 3500 3501 void MacroAssembler::testptr(Register dst, Register src) { 3502 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 3503 } 3504 3505 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 3506 void MacroAssembler::tlab_allocate(Register obj, 3507 Register var_size_in_bytes, 3508 int con_size_in_bytes, 3509 Register t1, 3510 Register t2, 3511 Label& slow_case) { 3512 assert_different_registers(obj, t1, t2); 3513 assert_different_registers(obj, var_size_in_bytes, t1); 3514 Register end = t2; 3515 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 3516 3517 verify_tlab(); 3518 3519 NOT_LP64(get_thread(thread)); 3520 3521 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 3522 if (var_size_in_bytes == noreg) { 3523 lea(end, Address(obj, con_size_in_bytes)); 3524 } else { 3525 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 3526 } 3527 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 3528 jcc(Assembler::above, slow_case); 3529 3530 // update the tlab top pointer 3531 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 3532 3533 // recover var_size_in_bytes if necessary 3534 if (var_size_in_bytes == end) { 3535 subptr(var_size_in_bytes, obj); 3536 } 3537 verify_tlab(); 3538 } 3539 3540 // Preserves rbx, and rdx. 3541 Register MacroAssembler::tlab_refill(Label& retry, 3542 Label& try_eden, 3543 Label& slow_case) { 3544 Register top = rax; 3545 Register t1 = rcx; 3546 Register t2 = rsi; 3547 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 3548 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 3549 Label do_refill, discard_tlab; 3550 3551 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 3552 // No allocation in the shared eden. 3553 jmp(slow_case); 3554 } 3555 3556 NOT_LP64(get_thread(thread_reg)); 3557 3558 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 3559 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 3560 3561 // calculate amount of free space 3562 subptr(t1, top); 3563 shrptr(t1, LogHeapWordSize); 3564 3565 // Retain tlab and allocate object in shared space if 3566 // the amount free in the tlab is too large to discard. 3567 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 3568 jcc(Assembler::lessEqual, discard_tlab); 3569 3570 // Retain 3571 // %%% yuck as movptr... 3572 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 3573 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 3574 if (TLABStats) { 3575 // increment number of slow_allocations 3576 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 3577 } 3578 jmp(try_eden); 3579 3580 bind(discard_tlab); 3581 if (TLABStats) { 3582 // increment number of refills 3583 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 3584 // accumulate wastage -- t1 is amount free in tlab 3585 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 3586 } 3587 3588 // if tlab is currently allocated (top or end != null) then 3589 // fill [top, end + alignment_reserve) with array object 3590 testptr(top, top); 3591 jcc(Assembler::zero, do_refill); 3592 3593 // set up the mark word 3594 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 3595 // set the length to the remaining space 3596 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 3597 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 3598 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 3599 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 3600 // set klass to intArrayKlass 3601 // dubious reloc why not an oop reloc? 3602 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 3603 // store klass last. concurrent gcs assumes klass length is valid if 3604 // klass field is not null. 3605 store_klass(top, t1); 3606 3607 movptr(t1, top); 3608 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 3609 incr_allocated_bytes(thread_reg, t1, 0); 3610 3611 // refill the tlab with an eden allocation 3612 bind(do_refill); 3613 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 3614 shlptr(t1, LogHeapWordSize); 3615 // allocate new tlab, address returned in top 3616 eden_allocate(top, t1, 0, t2, slow_case); 3617 3618 // Check that t1 was preserved in eden_allocate. 3619 #ifdef ASSERT 3620 if (UseTLAB) { 3621 Label ok; 3622 Register tsize = rsi; 3623 assert_different_registers(tsize, thread_reg, t1); 3624 push(tsize); 3625 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 3626 shlptr(tsize, LogHeapWordSize); 3627 cmpptr(t1, tsize); 3628 jcc(Assembler::equal, ok); 3629 STOP("assert(t1 != tlab size)"); 3630 should_not_reach_here(); 3631 3632 bind(ok); 3633 pop(tsize); 3634 } 3635 #endif 3636 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 3637 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 3638 addptr(top, t1); 3639 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 3640 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 3641 verify_tlab(); 3642 jmp(retry); 3643 3644 return thread_reg; // for use by caller 3645 } 3646 3647 void MacroAssembler::incr_allocated_bytes(Register thread, 3648 Register var_size_in_bytes, 3649 int con_size_in_bytes, 3650 Register t1) { 3651 if (!thread->is_valid()) { 3652 #ifdef _LP64 3653 thread = r15_thread; 3654 #else 3655 assert(t1->is_valid(), "need temp reg"); 3656 thread = t1; 3657 get_thread(thread); 3658 #endif 3659 } 3660 3661 #ifdef _LP64 3662 if (var_size_in_bytes->is_valid()) { 3663 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 3664 } else { 3665 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 3666 } 3667 #else 3668 if (var_size_in_bytes->is_valid()) { 3669 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 3670 } else { 3671 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 3672 } 3673 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 3674 #endif 3675 } 3676 3677 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 3678 pusha(); 3679 3680 // if we are coming from c1, xmm registers may be live 3681 int off = 0; 3682 if (UseSSE == 1) { 3683 subptr(rsp, sizeof(jdouble)*8); 3684 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 3685 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 3686 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 3687 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 3688 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 3689 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 3690 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 3691 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 3692 } else if (UseSSE >= 2) { 3693 #ifdef COMPILER2 3694 if (MaxVectorSize > 16) { 3695 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 3696 // Save upper half of YMM registes 3697 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3698 vextractf128h(Address(rsp, 0),xmm0); 3699 vextractf128h(Address(rsp, 16),xmm1); 3700 vextractf128h(Address(rsp, 32),xmm2); 3701 vextractf128h(Address(rsp, 48),xmm3); 3702 vextractf128h(Address(rsp, 64),xmm4); 3703 vextractf128h(Address(rsp, 80),xmm5); 3704 vextractf128h(Address(rsp, 96),xmm6); 3705 vextractf128h(Address(rsp,112),xmm7); 3706 #ifdef _LP64 3707 vextractf128h(Address(rsp,128),xmm8); 3708 vextractf128h(Address(rsp,144),xmm9); 3709 vextractf128h(Address(rsp,160),xmm10); 3710 vextractf128h(Address(rsp,176),xmm11); 3711 vextractf128h(Address(rsp,192),xmm12); 3712 vextractf128h(Address(rsp,208),xmm13); 3713 vextractf128h(Address(rsp,224),xmm14); 3714 vextractf128h(Address(rsp,240),xmm15); 3715 #endif 3716 } 3717 #endif 3718 // Save whole 128bit (16 bytes) XMM regiters 3719 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3720 movdqu(Address(rsp,off++*16),xmm0); 3721 movdqu(Address(rsp,off++*16),xmm1); 3722 movdqu(Address(rsp,off++*16),xmm2); 3723 movdqu(Address(rsp,off++*16),xmm3); 3724 movdqu(Address(rsp,off++*16),xmm4); 3725 movdqu(Address(rsp,off++*16),xmm5); 3726 movdqu(Address(rsp,off++*16),xmm6); 3727 movdqu(Address(rsp,off++*16),xmm7); 3728 #ifdef _LP64 3729 movdqu(Address(rsp,off++*16),xmm8); 3730 movdqu(Address(rsp,off++*16),xmm9); 3731 movdqu(Address(rsp,off++*16),xmm10); 3732 movdqu(Address(rsp,off++*16),xmm11); 3733 movdqu(Address(rsp,off++*16),xmm12); 3734 movdqu(Address(rsp,off++*16),xmm13); 3735 movdqu(Address(rsp,off++*16),xmm14); 3736 movdqu(Address(rsp,off++*16),xmm15); 3737 #endif 3738 } 3739 3740 // Preserve registers across runtime call 3741 int incoming_argument_and_return_value_offset = -1; 3742 if (num_fpu_regs_in_use > 1) { 3743 // Must preserve all other FPU regs (could alternatively convert 3744 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 3745 // FPU state, but can not trust C compiler) 3746 NEEDS_CLEANUP; 3747 // NOTE that in this case we also push the incoming argument(s) to 3748 // the stack and restore it later; we also use this stack slot to 3749 // hold the return value from dsin, dcos etc. 3750 for (int i = 0; i < num_fpu_regs_in_use; i++) { 3751 subptr(rsp, sizeof(jdouble)); 3752 fstp_d(Address(rsp, 0)); 3753 } 3754 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 3755 for (int i = nb_args-1; i >= 0; i--) { 3756 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 3757 } 3758 } 3759 3760 subptr(rsp, nb_args*sizeof(jdouble)); 3761 for (int i = 0; i < nb_args; i++) { 3762 fstp_d(Address(rsp, i*sizeof(jdouble))); 3763 } 3764 3765 #ifdef _LP64 3766 if (nb_args > 0) { 3767 movdbl(xmm0, Address(rsp, 0)); 3768 } 3769 if (nb_args > 1) { 3770 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 3771 } 3772 assert(nb_args <= 2, "unsupported number of args"); 3773 #endif // _LP64 3774 3775 // NOTE: we must not use call_VM_leaf here because that requires a 3776 // complete interpreter frame in debug mode -- same bug as 4387334 3777 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 3778 // do proper 64bit abi 3779 3780 NEEDS_CLEANUP; 3781 // Need to add stack banging before this runtime call if it needs to 3782 // be taken; however, there is no generic stack banging routine at 3783 // the MacroAssembler level 3784 3785 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 3786 3787 #ifdef _LP64 3788 movsd(Address(rsp, 0), xmm0); 3789 fld_d(Address(rsp, 0)); 3790 #endif // _LP64 3791 addptr(rsp, sizeof(jdouble) * nb_args); 3792 if (num_fpu_regs_in_use > 1) { 3793 // Must save return value to stack and then restore entire FPU 3794 // stack except incoming arguments 3795 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 3796 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 3797 fld_d(Address(rsp, 0)); 3798 addptr(rsp, sizeof(jdouble)); 3799 } 3800 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 3801 addptr(rsp, sizeof(jdouble) * nb_args); 3802 } 3803 3804 off = 0; 3805 if (UseSSE == 1) { 3806 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 3807 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 3808 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 3809 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 3810 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 3811 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 3812 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 3813 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 3814 addptr(rsp, sizeof(jdouble)*8); 3815 } else if (UseSSE >= 2) { 3816 // Restore whole 128bit (16 bytes) XMM regiters 3817 movdqu(xmm0, Address(rsp,off++*16)); 3818 movdqu(xmm1, Address(rsp,off++*16)); 3819 movdqu(xmm2, Address(rsp,off++*16)); 3820 movdqu(xmm3, Address(rsp,off++*16)); 3821 movdqu(xmm4, Address(rsp,off++*16)); 3822 movdqu(xmm5, Address(rsp,off++*16)); 3823 movdqu(xmm6, Address(rsp,off++*16)); 3824 movdqu(xmm7, Address(rsp,off++*16)); 3825 #ifdef _LP64 3826 movdqu(xmm8, Address(rsp,off++*16)); 3827 movdqu(xmm9, Address(rsp,off++*16)); 3828 movdqu(xmm10, Address(rsp,off++*16)); 3829 movdqu(xmm11, Address(rsp,off++*16)); 3830 movdqu(xmm12, Address(rsp,off++*16)); 3831 movdqu(xmm13, Address(rsp,off++*16)); 3832 movdqu(xmm14, Address(rsp,off++*16)); 3833 movdqu(xmm15, Address(rsp,off++*16)); 3834 #endif 3835 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3836 #ifdef COMPILER2 3837 if (MaxVectorSize > 16) { 3838 // Restore upper half of YMM registes. 3839 vinsertf128h(xmm0, Address(rsp, 0)); 3840 vinsertf128h(xmm1, Address(rsp, 16)); 3841 vinsertf128h(xmm2, Address(rsp, 32)); 3842 vinsertf128h(xmm3, Address(rsp, 48)); 3843 vinsertf128h(xmm4, Address(rsp, 64)); 3844 vinsertf128h(xmm5, Address(rsp, 80)); 3845 vinsertf128h(xmm6, Address(rsp, 96)); 3846 vinsertf128h(xmm7, Address(rsp,112)); 3847 #ifdef _LP64 3848 vinsertf128h(xmm8, Address(rsp,128)); 3849 vinsertf128h(xmm9, Address(rsp,144)); 3850 vinsertf128h(xmm10, Address(rsp,160)); 3851 vinsertf128h(xmm11, Address(rsp,176)); 3852 vinsertf128h(xmm12, Address(rsp,192)); 3853 vinsertf128h(xmm13, Address(rsp,208)); 3854 vinsertf128h(xmm14, Address(rsp,224)); 3855 vinsertf128h(xmm15, Address(rsp,240)); 3856 #endif 3857 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3858 } 3859 #endif 3860 } 3861 popa(); 3862 } 3863 3864 static const double pi_4 = 0.7853981633974483; 3865 3866 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 3867 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 3868 // was attempted in this code; unfortunately it appears that the 3869 // switch to 80-bit precision and back causes this to be 3870 // unprofitable compared with simply performing a runtime call if 3871 // the argument is out of the (-pi/4, pi/4) range. 3872 3873 Register tmp = noreg; 3874 if (!VM_Version::supports_cmov()) { 3875 // fcmp needs a temporary so preserve rbx, 3876 tmp = rbx; 3877 push(tmp); 3878 } 3879 3880 Label slow_case, done; 3881 3882 ExternalAddress pi4_adr = (address)&pi_4; 3883 if (reachable(pi4_adr)) { 3884 // x ?<= pi/4 3885 fld_d(pi4_adr); 3886 fld_s(1); // Stack: X PI/4 X 3887 fabs(); // Stack: |X| PI/4 X 3888 fcmp(tmp); 3889 jcc(Assembler::above, slow_case); 3890 3891 // fastest case: -pi/4 <= x <= pi/4 3892 switch(trig) { 3893 case 's': 3894 fsin(); 3895 break; 3896 case 'c': 3897 fcos(); 3898 break; 3899 case 't': 3900 ftan(); 3901 break; 3902 default: 3903 assert(false, "bad intrinsic"); 3904 break; 3905 } 3906 jmp(done); 3907 } 3908 3909 // slow case: runtime call 3910 bind(slow_case); 3911 3912 switch(trig) { 3913 case 's': 3914 { 3915 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 3916 } 3917 break; 3918 case 'c': 3919 { 3920 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 3921 } 3922 break; 3923 case 't': 3924 { 3925 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 3926 } 3927 break; 3928 default: 3929 assert(false, "bad intrinsic"); 3930 break; 3931 } 3932 3933 // Come here with result in F-TOS 3934 bind(done); 3935 3936 if (tmp != noreg) { 3937 pop(tmp); 3938 } 3939 } 3940 3941 3942 // Look up the method for a megamorphic invokeinterface call. 3943 // The target method is determined by <intf_klass, itable_index>. 3944 // The receiver klass is in recv_klass. 3945 // On success, the result will be in method_result, and execution falls through. 3946 // On failure, execution transfers to the given label. 3947 void MacroAssembler::lookup_interface_method(Register recv_klass, 3948 Register intf_klass, 3949 RegisterOrConstant itable_index, 3950 Register method_result, 3951 Register scan_temp, 3952 Label& L_no_such_interface) { 3953 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 3954 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 3955 "caller must use same register for non-constant itable index as for method"); 3956 3957 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 3958 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 3959 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 3960 int scan_step = itableOffsetEntry::size() * wordSize; 3961 int vte_size = vtableEntry::size() * wordSize; 3962 Address::ScaleFactor times_vte_scale = Address::times_ptr; 3963 assert(vte_size == wordSize, "else adjust times_vte_scale"); 3964 3965 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 3966 3967 // %%% Could store the aligned, prescaled offset in the klassoop. 3968 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 3969 if (HeapWordsPerLong > 1) { 3970 // Round up to align_object_offset boundary 3971 // see code for InstanceKlass::start_of_itable! 3972 round_to(scan_temp, BytesPerLong); 3973 } 3974 3975 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 3976 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 3977 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 3978 3979 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 3980 // if (scan->interface() == intf) { 3981 // result = (klass + scan->offset() + itable_index); 3982 // } 3983 // } 3984 Label search, found_method; 3985 3986 for (int peel = 1; peel >= 0; peel--) { 3987 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 3988 cmpptr(intf_klass, method_result); 3989 3990 if (peel) { 3991 jccb(Assembler::equal, found_method); 3992 } else { 3993 jccb(Assembler::notEqual, search); 3994 // (invert the test to fall through to found_method...) 3995 } 3996 3997 if (!peel) break; 3998 3999 bind(search); 4000 4001 // Check that the previous entry is non-null. A null entry means that 4002 // the receiver class doesn't implement the interface, and wasn't the 4003 // same as when the caller was compiled. 4004 testptr(method_result, method_result); 4005 jcc(Assembler::zero, L_no_such_interface); 4006 addptr(scan_temp, scan_step); 4007 } 4008 4009 bind(found_method); 4010 4011 // Got a hit. 4012 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 4013 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 4014 } 4015 4016 4017 // virtual method calling 4018 void MacroAssembler::lookup_virtual_method(Register recv_klass, 4019 RegisterOrConstant vtable_index, 4020 Register method_result) { 4021 const int base = InstanceKlass::vtable_start_offset() * wordSize; 4022 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 4023 Address vtable_entry_addr(recv_klass, 4024 vtable_index, Address::times_ptr, 4025 base + vtableEntry::method_offset_in_bytes()); 4026 movptr(method_result, vtable_entry_addr); 4027 } 4028 4029 4030 void MacroAssembler::check_klass_subtype(Register sub_klass, 4031 Register super_klass, 4032 Register temp_reg, 4033 Label& L_success) { 4034 Label L_failure; 4035 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 4036 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 4037 bind(L_failure); 4038 } 4039 4040 4041 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 4042 Register super_klass, 4043 Register temp_reg, 4044 Label* L_success, 4045 Label* L_failure, 4046 Label* L_slow_path, 4047 RegisterOrConstant super_check_offset) { 4048 assert_different_registers(sub_klass, super_klass, temp_reg); 4049 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 4050 if (super_check_offset.is_register()) { 4051 assert_different_registers(sub_klass, super_klass, 4052 super_check_offset.as_register()); 4053 } else if (must_load_sco) { 4054 assert(temp_reg != noreg, "supply either a temp or a register offset"); 4055 } 4056 4057 Label L_fallthrough; 4058 int label_nulls = 0; 4059 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 4060 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 4061 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 4062 assert(label_nulls <= 1, "at most one NULL in the batch"); 4063 4064 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 4065 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 4066 Address super_check_offset_addr(super_klass, sco_offset); 4067 4068 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 4069 // range of a jccb. If this routine grows larger, reconsider at 4070 // least some of these. 4071 #define local_jcc(assembler_cond, label) \ 4072 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 4073 else jcc( assembler_cond, label) /*omit semi*/ 4074 4075 // Hacked jmp, which may only be used just before L_fallthrough. 4076 #define final_jmp(label) \ 4077 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 4078 else jmp(label) /*omit semi*/ 4079 4080 // If the pointers are equal, we are done (e.g., String[] elements). 4081 // This self-check enables sharing of secondary supertype arrays among 4082 // non-primary types such as array-of-interface. Otherwise, each such 4083 // type would need its own customized SSA. 4084 // We move this check to the front of the fast path because many 4085 // type checks are in fact trivially successful in this manner, 4086 // so we get a nicely predicted branch right at the start of the check. 4087 cmpptr(sub_klass, super_klass); 4088 local_jcc(Assembler::equal, *L_success); 4089 4090 // Check the supertype display: 4091 if (must_load_sco) { 4092 // Positive movl does right thing on LP64. 4093 movl(temp_reg, super_check_offset_addr); 4094 super_check_offset = RegisterOrConstant(temp_reg); 4095 } 4096 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 4097 cmpptr(super_klass, super_check_addr); // load displayed supertype 4098 4099 // This check has worked decisively for primary supers. 4100 // Secondary supers are sought in the super_cache ('super_cache_addr'). 4101 // (Secondary supers are interfaces and very deeply nested subtypes.) 4102 // This works in the same check above because of a tricky aliasing 4103 // between the super_cache and the primary super display elements. 4104 // (The 'super_check_addr' can address either, as the case requires.) 4105 // Note that the cache is updated below if it does not help us find 4106 // what we need immediately. 4107 // So if it was a primary super, we can just fail immediately. 4108 // Otherwise, it's the slow path for us (no success at this point). 4109 4110 if (super_check_offset.is_register()) { 4111 local_jcc(Assembler::equal, *L_success); 4112 cmpl(super_check_offset.as_register(), sc_offset); 4113 if (L_failure == &L_fallthrough) { 4114 local_jcc(Assembler::equal, *L_slow_path); 4115 } else { 4116 local_jcc(Assembler::notEqual, *L_failure); 4117 final_jmp(*L_slow_path); 4118 } 4119 } else if (super_check_offset.as_constant() == sc_offset) { 4120 // Need a slow path; fast failure is impossible. 4121 if (L_slow_path == &L_fallthrough) { 4122 local_jcc(Assembler::equal, *L_success); 4123 } else { 4124 local_jcc(Assembler::notEqual, *L_slow_path); 4125 final_jmp(*L_success); 4126 } 4127 } else { 4128 // No slow path; it's a fast decision. 4129 if (L_failure == &L_fallthrough) { 4130 local_jcc(Assembler::equal, *L_success); 4131 } else { 4132 local_jcc(Assembler::notEqual, *L_failure); 4133 final_jmp(*L_success); 4134 } 4135 } 4136 4137 bind(L_fallthrough); 4138 4139 #undef local_jcc 4140 #undef final_jmp 4141 } 4142 4143 4144 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 4145 Register super_klass, 4146 Register temp_reg, 4147 Register temp2_reg, 4148 Label* L_success, 4149 Label* L_failure, 4150 bool set_cond_codes) { 4151 assert_different_registers(sub_klass, super_klass, temp_reg); 4152 if (temp2_reg != noreg) 4153 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 4154 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 4155 4156 Label L_fallthrough; 4157 int label_nulls = 0; 4158 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 4159 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 4160 assert(label_nulls <= 1, "at most one NULL in the batch"); 4161 4162 // a couple of useful fields in sub_klass: 4163 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 4164 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 4165 Address secondary_supers_addr(sub_klass, ss_offset); 4166 Address super_cache_addr( sub_klass, sc_offset); 4167 4168 // Do a linear scan of the secondary super-klass chain. 4169 // This code is rarely used, so simplicity is a virtue here. 4170 // The repne_scan instruction uses fixed registers, which we must spill. 4171 // Don't worry too much about pre-existing connections with the input regs. 4172 4173 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 4174 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 4175 4176 // Get super_klass value into rax (even if it was in rdi or rcx). 4177 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 4178 if (super_klass != rax || UseCompressedOops) { 4179 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 4180 mov(rax, super_klass); 4181 } 4182 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 4183 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 4184 4185 #ifndef PRODUCT 4186 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 4187 ExternalAddress pst_counter_addr((address) pst_counter); 4188 NOT_LP64( incrementl(pst_counter_addr) ); 4189 LP64_ONLY( lea(rcx, pst_counter_addr) ); 4190 LP64_ONLY( incrementl(Address(rcx, 0)) ); 4191 #endif //PRODUCT 4192 4193 // We will consult the secondary-super array. 4194 movptr(rdi, secondary_supers_addr); 4195 // Load the array length. (Positive movl does right thing on LP64.) 4196 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); 4197 // Skip to start of data. 4198 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); 4199 4200 // Scan RCX words at [RDI] for an occurrence of RAX. 4201 // Set NZ/Z based on last compare. 4202 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 4203 // not change flags (only scas instruction which is repeated sets flags). 4204 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 4205 4206 testptr(rax,rax); // Set Z = 0 4207 repne_scan(); 4208 4209 // Unspill the temp. registers: 4210 if (pushed_rdi) pop(rdi); 4211 if (pushed_rcx) pop(rcx); 4212 if (pushed_rax) pop(rax); 4213 4214 if (set_cond_codes) { 4215 // Special hack for the AD files: rdi is guaranteed non-zero. 4216 assert(!pushed_rdi, "rdi must be left non-NULL"); 4217 // Also, the condition codes are properly set Z/NZ on succeed/failure. 4218 } 4219 4220 if (L_failure == &L_fallthrough) 4221 jccb(Assembler::notEqual, *L_failure); 4222 else jcc(Assembler::notEqual, *L_failure); 4223 4224 // Success. Cache the super we found and proceed in triumph. 4225 movptr(super_cache_addr, super_klass); 4226 4227 if (L_success != &L_fallthrough) { 4228 jmp(*L_success); 4229 } 4230 4231 #undef IS_A_TEMP 4232 4233 bind(L_fallthrough); 4234 } 4235 4236 4237 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 4238 if (VM_Version::supports_cmov()) { 4239 cmovl(cc, dst, src); 4240 } else { 4241 Label L; 4242 jccb(negate_condition(cc), L); 4243 movl(dst, src); 4244 bind(L); 4245 } 4246 } 4247 4248 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 4249 if (VM_Version::supports_cmov()) { 4250 cmovl(cc, dst, src); 4251 } else { 4252 Label L; 4253 jccb(negate_condition(cc), L); 4254 movl(dst, src); 4255 bind(L); 4256 } 4257 } 4258 4259 void MacroAssembler::verify_oop(Register reg, const char* s) { 4260 if (!VerifyOops) return; 4261 4262 // Pass register number to verify_oop_subroutine 4263 char* b = new char[strlen(s) + 50]; 4264 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 4265 BLOCK_COMMENT("verify_oop {"); 4266 #ifdef _LP64 4267 push(rscratch1); // save r10, trashed by movptr() 4268 #endif 4269 push(rax); // save rax, 4270 push(reg); // pass register argument 4271 ExternalAddress buffer((address) b); 4272 // avoid using pushptr, as it modifies scratch registers 4273 // and our contract is not to modify anything 4274 movptr(rax, buffer.addr()); 4275 push(rax); 4276 // call indirectly to solve generation ordering problem 4277 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 4278 call(rax); 4279 // Caller pops the arguments (oop, message) and restores rax, r10 4280 BLOCK_COMMENT("} verify_oop"); 4281 } 4282 4283 4284 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 4285 Register tmp, 4286 int offset) { 4287 intptr_t value = *delayed_value_addr; 4288 if (value != 0) 4289 return RegisterOrConstant(value + offset); 4290 4291 // load indirectly to solve generation ordering problem 4292 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 4293 4294 #ifdef ASSERT 4295 { Label L; 4296 testptr(tmp, tmp); 4297 if (WizardMode) { 4298 jcc(Assembler::notZero, L); 4299 char* buf = new char[40]; 4300 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 4301 STOP(buf); 4302 } else { 4303 jccb(Assembler::notZero, L); 4304 hlt(); 4305 } 4306 bind(L); 4307 } 4308 #endif 4309 4310 if (offset != 0) 4311 addptr(tmp, offset); 4312 4313 return RegisterOrConstant(tmp); 4314 } 4315 4316 4317 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 4318 int extra_slot_offset) { 4319 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 4320 int stackElementSize = Interpreter::stackElementSize; 4321 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 4322 #ifdef ASSERT 4323 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 4324 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 4325 #endif 4326 Register scale_reg = noreg; 4327 Address::ScaleFactor scale_factor = Address::no_scale; 4328 if (arg_slot.is_constant()) { 4329 offset += arg_slot.as_constant() * stackElementSize; 4330 } else { 4331 scale_reg = arg_slot.as_register(); 4332 scale_factor = Address::times(stackElementSize); 4333 } 4334 offset += wordSize; // return PC is on stack 4335 return Address(rsp, scale_reg, scale_factor, offset); 4336 } 4337 4338 4339 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 4340 if (!VerifyOops) return; 4341 4342 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 4343 // Pass register number to verify_oop_subroutine 4344 char* b = new char[strlen(s) + 50]; 4345 sprintf(b, "verify_oop_addr: %s", s); 4346 4347 #ifdef _LP64 4348 push(rscratch1); // save r10, trashed by movptr() 4349 #endif 4350 push(rax); // save rax, 4351 // addr may contain rsp so we will have to adjust it based on the push 4352 // we just did (and on 64 bit we do two pushes) 4353 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 4354 // stores rax into addr which is backwards of what was intended. 4355 if (addr.uses(rsp)) { 4356 lea(rax, addr); 4357 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 4358 } else { 4359 pushptr(addr); 4360 } 4361 4362 ExternalAddress buffer((address) b); 4363 // pass msg argument 4364 // avoid using pushptr, as it modifies scratch registers 4365 // and our contract is not to modify anything 4366 movptr(rax, buffer.addr()); 4367 push(rax); 4368 4369 // call indirectly to solve generation ordering problem 4370 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 4371 call(rax); 4372 // Caller pops the arguments (addr, message) and restores rax, r10. 4373 } 4374 4375 void MacroAssembler::verify_tlab() { 4376 #ifdef ASSERT 4377 if (UseTLAB && VerifyOops) { 4378 Label next, ok; 4379 Register t1 = rsi; 4380 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 4381 4382 push(t1); 4383 NOT_LP64(push(thread_reg)); 4384 NOT_LP64(get_thread(thread_reg)); 4385 4386 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 4387 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 4388 jcc(Assembler::aboveEqual, next); 4389 STOP("assert(top >= start)"); 4390 should_not_reach_here(); 4391 4392 bind(next); 4393 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 4394 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 4395 jcc(Assembler::aboveEqual, ok); 4396 STOP("assert(top <= end)"); 4397 should_not_reach_here(); 4398 4399 bind(ok); 4400 NOT_LP64(pop(thread_reg)); 4401 pop(t1); 4402 } 4403 #endif 4404 } 4405 4406 class ControlWord { 4407 public: 4408 int32_t _value; 4409 4410 int rounding_control() const { return (_value >> 10) & 3 ; } 4411 int precision_control() const { return (_value >> 8) & 3 ; } 4412 bool precision() const { return ((_value >> 5) & 1) != 0; } 4413 bool underflow() const { return ((_value >> 4) & 1) != 0; } 4414 bool overflow() const { return ((_value >> 3) & 1) != 0; } 4415 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 4416 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 4417 bool invalid() const { return ((_value >> 0) & 1) != 0; } 4418 4419 void print() const { 4420 // rounding control 4421 const char* rc; 4422 switch (rounding_control()) { 4423 case 0: rc = "round near"; break; 4424 case 1: rc = "round down"; break; 4425 case 2: rc = "round up "; break; 4426 case 3: rc = "chop "; break; 4427 }; 4428 // precision control 4429 const char* pc; 4430 switch (precision_control()) { 4431 case 0: pc = "24 bits "; break; 4432 case 1: pc = "reserved"; break; 4433 case 2: pc = "53 bits "; break; 4434 case 3: pc = "64 bits "; break; 4435 }; 4436 // flags 4437 char f[9]; 4438 f[0] = ' '; 4439 f[1] = ' '; 4440 f[2] = (precision ()) ? 'P' : 'p'; 4441 f[3] = (underflow ()) ? 'U' : 'u'; 4442 f[4] = (overflow ()) ? 'O' : 'o'; 4443 f[5] = (zero_divide ()) ? 'Z' : 'z'; 4444 f[6] = (denormalized()) ? 'D' : 'd'; 4445 f[7] = (invalid ()) ? 'I' : 'i'; 4446 f[8] = '\x0'; 4447 // output 4448 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 4449 } 4450 4451 }; 4452 4453 class StatusWord { 4454 public: 4455 int32_t _value; 4456 4457 bool busy() const { return ((_value >> 15) & 1) != 0; } 4458 bool C3() const { return ((_value >> 14) & 1) != 0; } 4459 bool C2() const { return ((_value >> 10) & 1) != 0; } 4460 bool C1() const { return ((_value >> 9) & 1) != 0; } 4461 bool C0() const { return ((_value >> 8) & 1) != 0; } 4462 int top() const { return (_value >> 11) & 7 ; } 4463 bool error_status() const { return ((_value >> 7) & 1) != 0; } 4464 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 4465 bool precision() const { return ((_value >> 5) & 1) != 0; } 4466 bool underflow() const { return ((_value >> 4) & 1) != 0; } 4467 bool overflow() const { return ((_value >> 3) & 1) != 0; } 4468 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 4469 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 4470 bool invalid() const { return ((_value >> 0) & 1) != 0; } 4471 4472 void print() const { 4473 // condition codes 4474 char c[5]; 4475 c[0] = (C3()) ? '3' : '-'; 4476 c[1] = (C2()) ? '2' : '-'; 4477 c[2] = (C1()) ? '1' : '-'; 4478 c[3] = (C0()) ? '0' : '-'; 4479 c[4] = '\x0'; 4480 // flags 4481 char f[9]; 4482 f[0] = (error_status()) ? 'E' : '-'; 4483 f[1] = (stack_fault ()) ? 'S' : '-'; 4484 f[2] = (precision ()) ? 'P' : '-'; 4485 f[3] = (underflow ()) ? 'U' : '-'; 4486 f[4] = (overflow ()) ? 'O' : '-'; 4487 f[5] = (zero_divide ()) ? 'Z' : '-'; 4488 f[6] = (denormalized()) ? 'D' : '-'; 4489 f[7] = (invalid ()) ? 'I' : '-'; 4490 f[8] = '\x0'; 4491 // output 4492 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 4493 } 4494 4495 }; 4496 4497 class TagWord { 4498 public: 4499 int32_t _value; 4500 4501 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 4502 4503 void print() const { 4504 printf("%04x", _value & 0xFFFF); 4505 } 4506 4507 }; 4508 4509 class FPU_Register { 4510 public: 4511 int32_t _m0; 4512 int32_t _m1; 4513 int16_t _ex; 4514 4515 bool is_indefinite() const { 4516 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 4517 } 4518 4519 void print() const { 4520 char sign = (_ex < 0) ? '-' : '+'; 4521 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 4522 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 4523 }; 4524 4525 }; 4526 4527 class FPU_State { 4528 public: 4529 enum { 4530 register_size = 10, 4531 number_of_registers = 8, 4532 register_mask = 7 4533 }; 4534 4535 ControlWord _control_word; 4536 StatusWord _status_word; 4537 TagWord _tag_word; 4538 int32_t _error_offset; 4539 int32_t _error_selector; 4540 int32_t _data_offset; 4541 int32_t _data_selector; 4542 int8_t _register[register_size * number_of_registers]; 4543 4544 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 4545 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 4546 4547 const char* tag_as_string(int tag) const { 4548 switch (tag) { 4549 case 0: return "valid"; 4550 case 1: return "zero"; 4551 case 2: return "special"; 4552 case 3: return "empty"; 4553 } 4554 ShouldNotReachHere(); 4555 return NULL; 4556 } 4557 4558 void print() const { 4559 // print computation registers 4560 { int t = _status_word.top(); 4561 for (int i = 0; i < number_of_registers; i++) { 4562 int j = (i - t) & register_mask; 4563 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 4564 st(j)->print(); 4565 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 4566 } 4567 } 4568 printf("\n"); 4569 // print control registers 4570 printf("ctrl = "); _control_word.print(); printf("\n"); 4571 printf("stat = "); _status_word .print(); printf("\n"); 4572 printf("tags = "); _tag_word .print(); printf("\n"); 4573 } 4574 4575 }; 4576 4577 class Flag_Register { 4578 public: 4579 int32_t _value; 4580 4581 bool overflow() const { return ((_value >> 11) & 1) != 0; } 4582 bool direction() const { return ((_value >> 10) & 1) != 0; } 4583 bool sign() const { return ((_value >> 7) & 1) != 0; } 4584 bool zero() const { return ((_value >> 6) & 1) != 0; } 4585 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 4586 bool parity() const { return ((_value >> 2) & 1) != 0; } 4587 bool carry() const { return ((_value >> 0) & 1) != 0; } 4588 4589 void print() const { 4590 // flags 4591 char f[8]; 4592 f[0] = (overflow ()) ? 'O' : '-'; 4593 f[1] = (direction ()) ? 'D' : '-'; 4594 f[2] = (sign ()) ? 'S' : '-'; 4595 f[3] = (zero ()) ? 'Z' : '-'; 4596 f[4] = (auxiliary_carry()) ? 'A' : '-'; 4597 f[5] = (parity ()) ? 'P' : '-'; 4598 f[6] = (carry ()) ? 'C' : '-'; 4599 f[7] = '\x0'; 4600 // output 4601 printf("%08x flags = %s", _value, f); 4602 } 4603 4604 }; 4605 4606 class IU_Register { 4607 public: 4608 int32_t _value; 4609 4610 void print() const { 4611 printf("%08x %11d", _value, _value); 4612 } 4613 4614 }; 4615 4616 class IU_State { 4617 public: 4618 Flag_Register _eflags; 4619 IU_Register _rdi; 4620 IU_Register _rsi; 4621 IU_Register _rbp; 4622 IU_Register _rsp; 4623 IU_Register _rbx; 4624 IU_Register _rdx; 4625 IU_Register _rcx; 4626 IU_Register _rax; 4627 4628 void print() const { 4629 // computation registers 4630 printf("rax, = "); _rax.print(); printf("\n"); 4631 printf("rbx, = "); _rbx.print(); printf("\n"); 4632 printf("rcx = "); _rcx.print(); printf("\n"); 4633 printf("rdx = "); _rdx.print(); printf("\n"); 4634 printf("rdi = "); _rdi.print(); printf("\n"); 4635 printf("rsi = "); _rsi.print(); printf("\n"); 4636 printf("rbp, = "); _rbp.print(); printf("\n"); 4637 printf("rsp = "); _rsp.print(); printf("\n"); 4638 printf("\n"); 4639 // control registers 4640 printf("flgs = "); _eflags.print(); printf("\n"); 4641 } 4642 }; 4643 4644 4645 class CPU_State { 4646 public: 4647 FPU_State _fpu_state; 4648 IU_State _iu_state; 4649 4650 void print() const { 4651 printf("--------------------------------------------------\n"); 4652 _iu_state .print(); 4653 printf("\n"); 4654 _fpu_state.print(); 4655 printf("--------------------------------------------------\n"); 4656 } 4657 4658 }; 4659 4660 4661 static void _print_CPU_state(CPU_State* state) { 4662 state->print(); 4663 }; 4664 4665 4666 void MacroAssembler::print_CPU_state() { 4667 push_CPU_state(); 4668 push(rsp); // pass CPU state 4669 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 4670 addptr(rsp, wordSize); // discard argument 4671 pop_CPU_state(); 4672 } 4673 4674 4675 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 4676 static int counter = 0; 4677 FPU_State* fs = &state->_fpu_state; 4678 counter++; 4679 // For leaf calls, only verify that the top few elements remain empty. 4680 // We only need 1 empty at the top for C2 code. 4681 if( stack_depth < 0 ) { 4682 if( fs->tag_for_st(7) != 3 ) { 4683 printf("FPR7 not empty\n"); 4684 state->print(); 4685 assert(false, "error"); 4686 return false; 4687 } 4688 return true; // All other stack states do not matter 4689 } 4690 4691 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 4692 "bad FPU control word"); 4693 4694 // compute stack depth 4695 int i = 0; 4696 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 4697 int d = i; 4698 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 4699 // verify findings 4700 if (i != FPU_State::number_of_registers) { 4701 // stack not contiguous 4702 printf("%s: stack not contiguous at ST%d\n", s, i); 4703 state->print(); 4704 assert(false, "error"); 4705 return false; 4706 } 4707 // check if computed stack depth corresponds to expected stack depth 4708 if (stack_depth < 0) { 4709 // expected stack depth is -stack_depth or less 4710 if (d > -stack_depth) { 4711 // too many elements on the stack 4712 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 4713 state->print(); 4714 assert(false, "error"); 4715 return false; 4716 } 4717 } else { 4718 // expected stack depth is stack_depth 4719 if (d != stack_depth) { 4720 // wrong stack depth 4721 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 4722 state->print(); 4723 assert(false, "error"); 4724 return false; 4725 } 4726 } 4727 // everything is cool 4728 return true; 4729 } 4730 4731 4732 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 4733 if (!VerifyFPU) return; 4734 push_CPU_state(); 4735 push(rsp); // pass CPU state 4736 ExternalAddress msg((address) s); 4737 // pass message string s 4738 pushptr(msg.addr()); 4739 push(stack_depth); // pass stack depth 4740 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 4741 addptr(rsp, 3 * wordSize); // discard arguments 4742 // check for error 4743 { Label L; 4744 testl(rax, rax); 4745 jcc(Assembler::notZero, L); 4746 int3(); // break if error condition 4747 bind(L); 4748 } 4749 pop_CPU_state(); 4750 } 4751 4752 void MacroAssembler::load_klass(Register dst, Register src) { 4753 #ifdef _LP64 4754 if (UseCompressedKlassPointers) { 4755 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4756 decode_klass_not_null(dst); 4757 } else 4758 #endif 4759 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4760 } 4761 4762 void MacroAssembler::load_prototype_header(Register dst, Register src) { 4763 #ifdef _LP64 4764 if (UseCompressedKlassPointers) { 4765 assert (Universe::heap() != NULL, "java heap should be initialized"); 4766 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4767 if (Universe::narrow_klass_shift() != 0) { 4768 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 4769 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 4770 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 4771 } else { 4772 movq(dst, Address(dst, Klass::prototype_header_offset())); 4773 } 4774 } else 4775 #endif 4776 { 4777 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4778 movptr(dst, Address(dst, Klass::prototype_header_offset())); 4779 } 4780 } 4781 4782 void MacroAssembler::store_klass(Register dst, Register src) { 4783 #ifdef _LP64 4784 if (UseCompressedKlassPointers) { 4785 encode_klass_not_null(src); 4786 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 4787 } else 4788 #endif 4789 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 4790 } 4791 4792 void MacroAssembler::load_heap_oop(Register dst, Address src) { 4793 #ifdef _LP64 4794 // FIXME: Must change all places where we try to load the klass. 4795 if (UseCompressedOops) { 4796 movl(dst, src); 4797 decode_heap_oop(dst); 4798 } else 4799 #endif 4800 movptr(dst, src); 4801 } 4802 4803 // Doesn't do verfication, generates fixed size code 4804 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 4805 #ifdef _LP64 4806 if (UseCompressedOops) { 4807 movl(dst, src); 4808 decode_heap_oop_not_null(dst); 4809 } else 4810 #endif 4811 movptr(dst, src); 4812 } 4813 4814 void MacroAssembler::store_heap_oop(Address dst, Register src) { 4815 #ifdef _LP64 4816 if (UseCompressedOops) { 4817 assert(!dst.uses(src), "not enough registers"); 4818 encode_heap_oop(src); 4819 movl(dst, src); 4820 } else 4821 #endif 4822 movptr(dst, src); 4823 } 4824 4825 void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { 4826 assert_different_registers(src1, tmp); 4827 #ifdef _LP64 4828 if (UseCompressedOops) { 4829 bool did_push = false; 4830 if (tmp == noreg) { 4831 tmp = rax; 4832 push(tmp); 4833 did_push = true; 4834 assert(!src2.uses(rsp), "can't push"); 4835 } 4836 load_heap_oop(tmp, src2); 4837 cmpptr(src1, tmp); 4838 if (did_push) pop(tmp); 4839 } else 4840 #endif 4841 cmpptr(src1, src2); 4842 } 4843 4844 // Used for storing NULLs. 4845 void MacroAssembler::store_heap_oop_null(Address dst) { 4846 #ifdef _LP64 4847 if (UseCompressedOops) { 4848 movl(dst, (int32_t)NULL_WORD); 4849 } else { 4850 movslq(dst, (int32_t)NULL_WORD); 4851 } 4852 #else 4853 movl(dst, (int32_t)NULL_WORD); 4854 #endif 4855 } 4856 4857 #ifdef _LP64 4858 void MacroAssembler::store_klass_gap(Register dst, Register src) { 4859 if (UseCompressedKlassPointers) { 4860 // Store to klass gap in destination 4861 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 4862 } 4863 } 4864 4865 #ifdef ASSERT 4866 void MacroAssembler::verify_heapbase(const char* msg) { 4867 assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed"); 4868 assert (Universe::heap() != NULL, "java heap should be initialized"); 4869 if (CheckCompressedOops) { 4870 Label ok; 4871 push(rscratch1); // cmpptr trashes rscratch1 4872 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 4873 jcc(Assembler::equal, ok); 4874 STOP(msg); 4875 bind(ok); 4876 pop(rscratch1); 4877 } 4878 } 4879 #endif 4880 4881 // Algorithm must match oop.inline.hpp encode_heap_oop. 4882 void MacroAssembler::encode_heap_oop(Register r) { 4883 #ifdef ASSERT 4884 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 4885 #endif 4886 verify_oop(r, "broken oop in encode_heap_oop"); 4887 if (Universe::narrow_oop_base() == NULL) { 4888 if (Universe::narrow_oop_shift() != 0) { 4889 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4890 shrq(r, LogMinObjAlignmentInBytes); 4891 } 4892 return; 4893 } 4894 testq(r, r); 4895 cmovq(Assembler::equal, r, r12_heapbase); 4896 subq(r, r12_heapbase); 4897 shrq(r, LogMinObjAlignmentInBytes); 4898 } 4899 4900 void MacroAssembler::encode_heap_oop_not_null(Register r) { 4901 #ifdef ASSERT 4902 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 4903 if (CheckCompressedOops) { 4904 Label ok; 4905 testq(r, r); 4906 jcc(Assembler::notEqual, ok); 4907 STOP("null oop passed to encode_heap_oop_not_null"); 4908 bind(ok); 4909 } 4910 #endif 4911 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 4912 if (Universe::narrow_oop_base() != NULL) { 4913 subq(r, r12_heapbase); 4914 } 4915 if (Universe::narrow_oop_shift() != 0) { 4916 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4917 shrq(r, LogMinObjAlignmentInBytes); 4918 } 4919 } 4920 4921 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 4922 #ifdef ASSERT 4923 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 4924 if (CheckCompressedOops) { 4925 Label ok; 4926 testq(src, src); 4927 jcc(Assembler::notEqual, ok); 4928 STOP("null oop passed to encode_heap_oop_not_null2"); 4929 bind(ok); 4930 } 4931 #endif 4932 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 4933 if (dst != src) { 4934 movq(dst, src); 4935 } 4936 if (Universe::narrow_oop_base() != NULL) { 4937 subq(dst, r12_heapbase); 4938 } 4939 if (Universe::narrow_oop_shift() != 0) { 4940 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4941 shrq(dst, LogMinObjAlignmentInBytes); 4942 } 4943 } 4944 4945 void MacroAssembler::decode_heap_oop(Register r) { 4946 #ifdef ASSERT 4947 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 4948 #endif 4949 if (Universe::narrow_oop_base() == NULL) { 4950 if (Universe::narrow_oop_shift() != 0) { 4951 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4952 shlq(r, LogMinObjAlignmentInBytes); 4953 } 4954 } else { 4955 Label done; 4956 shlq(r, LogMinObjAlignmentInBytes); 4957 jccb(Assembler::equal, done); 4958 addq(r, r12_heapbase); 4959 bind(done); 4960 } 4961 verify_oop(r, "broken oop in decode_heap_oop"); 4962 } 4963 4964 void MacroAssembler::decode_heap_oop_not_null(Register r) { 4965 // Note: it will change flags 4966 assert (UseCompressedOops, "should only be used for compressed headers"); 4967 assert (Universe::heap() != NULL, "java heap should be initialized"); 4968 // Cannot assert, unverified entry point counts instructions (see .ad file) 4969 // vtableStubs also counts instructions in pd_code_size_limit. 4970 // Also do not verify_oop as this is called by verify_oop. 4971 if (Universe::narrow_oop_shift() != 0) { 4972 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4973 shlq(r, LogMinObjAlignmentInBytes); 4974 if (Universe::narrow_oop_base() != NULL) { 4975 addq(r, r12_heapbase); 4976 } 4977 } else { 4978 assert (Universe::narrow_oop_base() == NULL, "sanity"); 4979 } 4980 } 4981 4982 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 4983 // Note: it will change flags 4984 assert (UseCompressedOops, "should only be used for compressed headers"); 4985 assert (Universe::heap() != NULL, "java heap should be initialized"); 4986 // Cannot assert, unverified entry point counts instructions (see .ad file) 4987 // vtableStubs also counts instructions in pd_code_size_limit. 4988 // Also do not verify_oop as this is called by verify_oop. 4989 if (Universe::narrow_oop_shift() != 0) { 4990 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4991 if (LogMinObjAlignmentInBytes == Address::times_8) { 4992 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 4993 } else { 4994 if (dst != src) { 4995 movq(dst, src); 4996 } 4997 shlq(dst, LogMinObjAlignmentInBytes); 4998 if (Universe::narrow_oop_base() != NULL) { 4999 addq(dst, r12_heapbase); 5000 } 5001 } 5002 } else { 5003 assert (Universe::narrow_oop_base() == NULL, "sanity"); 5004 if (dst != src) { 5005 movq(dst, src); 5006 } 5007 } 5008 } 5009 5010 void MacroAssembler::encode_klass_not_null(Register r) { 5011 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5012 #ifdef ASSERT 5013 verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?"); 5014 #endif 5015 if (Universe::narrow_klass_base() != NULL) { 5016 subq(r, r12_heapbase); 5017 } 5018 if (Universe::narrow_klass_shift() != 0) { 5019 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5020 shrq(r, LogKlassAlignmentInBytes); 5021 } 5022 } 5023 5024 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 5025 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5026 #ifdef ASSERT 5027 verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); 5028 #endif 5029 if (dst != src) { 5030 movq(dst, src); 5031 } 5032 if (Universe::narrow_klass_base() != NULL) { 5033 subq(dst, r12_heapbase); 5034 } 5035 if (Universe::narrow_klass_shift() != 0) { 5036 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5037 shrq(dst, LogKlassAlignmentInBytes); 5038 } 5039 } 5040 5041 void MacroAssembler::decode_klass_not_null(Register r) { 5042 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5043 // Note: it will change flags 5044 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5045 // Cannot assert, unverified entry point counts instructions (see .ad file) 5046 // vtableStubs also counts instructions in pd_code_size_limit. 5047 // Also do not verify_oop as this is called by verify_oop. 5048 if (Universe::narrow_klass_shift() != 0) { 5049 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5050 shlq(r, LogKlassAlignmentInBytes); 5051 if (Universe::narrow_klass_base() != NULL) { 5052 addq(r, r12_heapbase); 5053 } 5054 } else { 5055 assert (Universe::narrow_klass_base() == NULL, "sanity"); 5056 } 5057 } 5058 5059 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 5060 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5061 // Note: it will change flags 5062 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5063 // Cannot assert, unverified entry point counts instructions (see .ad file) 5064 // vtableStubs also counts instructions in pd_code_size_limit. 5065 // Also do not verify_oop as this is called by verify_oop. 5066 if (Universe::narrow_klass_shift() != 0) { 5067 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5068 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 5069 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 5070 } else { 5071 assert (Universe::narrow_klass_base() == NULL, "sanity"); 5072 if (dst != src) { 5073 movq(dst, src); 5074 } 5075 } 5076 } 5077 5078 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 5079 assert (UseCompressedOops, "should only be used for compressed headers"); 5080 assert (Universe::heap() != NULL, "java heap should be initialized"); 5081 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5082 int oop_index = oop_recorder()->find_index(obj); 5083 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5084 mov_narrow_oop(dst, oop_index, rspec); 5085 } 5086 5087 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 5088 assert (UseCompressedOops, "should only be used for compressed headers"); 5089 assert (Universe::heap() != NULL, "java heap should be initialized"); 5090 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5091 int oop_index = oop_recorder()->find_index(obj); 5092 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5093 mov_narrow_oop(dst, oop_index, rspec); 5094 } 5095 5096 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 5097 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5098 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5099 int klass_index = oop_recorder()->find_index(k); 5100 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5101 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5102 } 5103 5104 void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { 5105 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5106 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5107 int klass_index = oop_recorder()->find_index(k); 5108 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5109 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5110 } 5111 5112 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 5113 assert (UseCompressedOops, "should only be used for compressed headers"); 5114 assert (Universe::heap() != NULL, "java heap should be initialized"); 5115 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5116 int oop_index = oop_recorder()->find_index(obj); 5117 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5118 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 5119 } 5120 5121 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 5122 assert (UseCompressedOops, "should only be used for compressed headers"); 5123 assert (Universe::heap() != NULL, "java heap should be initialized"); 5124 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5125 int oop_index = oop_recorder()->find_index(obj); 5126 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5127 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 5128 } 5129 5130 void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { 5131 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5132 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5133 int klass_index = oop_recorder()->find_index(k); 5134 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5135 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5136 } 5137 5138 void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { 5139 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5140 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5141 int klass_index = oop_recorder()->find_index(k); 5142 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5143 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5144 } 5145 5146 void MacroAssembler::reinit_heapbase() { 5147 if (UseCompressedOops || UseCompressedKlassPointers) { 5148 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 5149 } 5150 } 5151 #endif // _LP64 5152 5153 5154 // C2 compiled method's prolog code. 5155 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 5156 5157 // WARNING: Initial instruction MUST be 5 bytes or longer so that 5158 // NativeJump::patch_verified_entry will be able to patch out the entry 5159 // code safely. The push to verify stack depth is ok at 5 bytes, 5160 // the frame allocation can be either 3 or 6 bytes. So if we don't do 5161 // stack bang then we must use the 6 byte frame allocation even if 5162 // we have no frame. :-( 5163 5164 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 5165 // Remove word for return addr 5166 framesize -= wordSize; 5167 5168 // Calls to C2R adapters often do not accept exceptional returns. 5169 // We require that their callers must bang for them. But be careful, because 5170 // some VM calls (such as call site linkage) can use several kilobytes of 5171 // stack. But the stack safety zone should account for that. 5172 // See bugs 4446381, 4468289, 4497237. 5173 if (stack_bang) { 5174 generate_stack_overflow_check(framesize); 5175 5176 // We always push rbp, so that on return to interpreter rbp, will be 5177 // restored correctly and we can correct the stack. 5178 push(rbp); 5179 // Remove word for ebp 5180 framesize -= wordSize; 5181 5182 // Create frame 5183 if (framesize) { 5184 subptr(rsp, framesize); 5185 } 5186 } else { 5187 // Create frame (force generation of a 4 byte immediate value) 5188 subptr_imm32(rsp, framesize); 5189 5190 // Save RBP register now. 5191 framesize -= wordSize; 5192 movptr(Address(rsp, framesize), rbp); 5193 } 5194 5195 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 5196 framesize -= wordSize; 5197 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 5198 } 5199 5200 #ifndef _LP64 5201 // If method sets FPU control word do it now 5202 if (fp_mode_24b) { 5203 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 5204 } 5205 if (UseSSE >= 2 && VerifyFPU) { 5206 verify_FPU(0, "FPU stack must be clean on entry"); 5207 } 5208 #endif 5209 5210 #ifdef ASSERT 5211 if (VerifyStackAtCalls) { 5212 Label L; 5213 push(rax); 5214 mov(rax, rsp); 5215 andptr(rax, StackAlignmentInBytes-1); 5216 cmpptr(rax, StackAlignmentInBytes-wordSize); 5217 pop(rax); 5218 jcc(Assembler::equal, L); 5219 STOP("Stack is not properly aligned!"); 5220 bind(L); 5221 } 5222 #endif 5223 5224 } 5225 5226 5227 // IndexOf for constant substrings with size >= 8 chars 5228 // which don't need to be loaded through stack. 5229 void MacroAssembler::string_indexofC8(Register str1, Register str2, 5230 Register cnt1, Register cnt2, 5231 int int_cnt2, Register result, 5232 XMMRegister vec, Register tmp) { 5233 ShortBranchVerifier sbv(this); 5234 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 5235 5236 // This method uses pcmpestri inxtruction with bound registers 5237 // inputs: 5238 // xmm - substring 5239 // rax - substring length (elements count) 5240 // mem - scanned string 5241 // rdx - string length (elements count) 5242 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 5243 // outputs: 5244 // rcx - matched index in string 5245 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5246 5247 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 5248 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 5249 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 5250 5251 // Note, inline_string_indexOf() generates checks: 5252 // if (substr.count > string.count) return -1; 5253 // if (substr.count == 0) return 0; 5254 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 5255 5256 // Load substring. 5257 movdqu(vec, Address(str2, 0)); 5258 movl(cnt2, int_cnt2); 5259 movptr(result, str1); // string addr 5260 5261 if (int_cnt2 > 8) { 5262 jmpb(SCAN_TO_SUBSTR); 5263 5264 // Reload substr for rescan, this code 5265 // is executed only for large substrings (> 8 chars) 5266 bind(RELOAD_SUBSTR); 5267 movdqu(vec, Address(str2, 0)); 5268 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 5269 5270 bind(RELOAD_STR); 5271 // We came here after the beginning of the substring was 5272 // matched but the rest of it was not so we need to search 5273 // again. Start from the next element after the previous match. 5274 5275 // cnt2 is number of substring reminding elements and 5276 // cnt1 is number of string reminding elements when cmp failed. 5277 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 5278 subl(cnt1, cnt2); 5279 addl(cnt1, int_cnt2); 5280 movl(cnt2, int_cnt2); // Now restore cnt2 5281 5282 decrementl(cnt1); // Shift to next element 5283 cmpl(cnt1, cnt2); 5284 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5285 5286 addptr(result, 2); 5287 5288 } // (int_cnt2 > 8) 5289 5290 // Scan string for start of substr in 16-byte vectors 5291 bind(SCAN_TO_SUBSTR); 5292 pcmpestri(vec, Address(result, 0), 0x0d); 5293 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 5294 subl(cnt1, 8); 5295 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 5296 cmpl(cnt1, cnt2); 5297 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5298 addptr(result, 16); 5299 jmpb(SCAN_TO_SUBSTR); 5300 5301 // Found a potential substr 5302 bind(FOUND_CANDIDATE); 5303 // Matched whole vector if first element matched (tmp(rcx) == 0). 5304 if (int_cnt2 == 8) { 5305 jccb(Assembler::overflow, RET_FOUND); // OF == 1 5306 } else { // int_cnt2 > 8 5307 jccb(Assembler::overflow, FOUND_SUBSTR); 5308 } 5309 // After pcmpestri tmp(rcx) contains matched element index 5310 // Compute start addr of substr 5311 lea(result, Address(result, tmp, Address::times_2)); 5312 5313 // Make sure string is still long enough 5314 subl(cnt1, tmp); 5315 cmpl(cnt1, cnt2); 5316 if (int_cnt2 == 8) { 5317 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 5318 } else { // int_cnt2 > 8 5319 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 5320 } 5321 // Left less then substring. 5322 5323 bind(RET_NOT_FOUND); 5324 movl(result, -1); 5325 jmpb(EXIT); 5326 5327 if (int_cnt2 > 8) { 5328 // This code is optimized for the case when whole substring 5329 // is matched if its head is matched. 5330 bind(MATCH_SUBSTR_HEAD); 5331 pcmpestri(vec, Address(result, 0), 0x0d); 5332 // Reload only string if does not match 5333 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 5334 5335 Label CONT_SCAN_SUBSTR; 5336 // Compare the rest of substring (> 8 chars). 5337 bind(FOUND_SUBSTR); 5338 // First 8 chars are already matched. 5339 negptr(cnt2); 5340 addptr(cnt2, 8); 5341 5342 bind(SCAN_SUBSTR); 5343 subl(cnt1, 8); 5344 cmpl(cnt2, -8); // Do not read beyond substring 5345 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 5346 // Back-up strings to avoid reading beyond substring: 5347 // cnt1 = cnt1 - cnt2 + 8 5348 addl(cnt1, cnt2); // cnt2 is negative 5349 addl(cnt1, 8); 5350 movl(cnt2, 8); negptr(cnt2); 5351 bind(CONT_SCAN_SUBSTR); 5352 if (int_cnt2 < (int)G) { 5353 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 5354 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 5355 } else { 5356 // calculate index in register to avoid integer overflow (int_cnt2*2) 5357 movl(tmp, int_cnt2); 5358 addptr(tmp, cnt2); 5359 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 5360 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 5361 } 5362 // Need to reload strings pointers if not matched whole vector 5363 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 5364 addptr(cnt2, 8); 5365 jcc(Assembler::negative, SCAN_SUBSTR); 5366 // Fall through if found full substring 5367 5368 } // (int_cnt2 > 8) 5369 5370 bind(RET_FOUND); 5371 // Found result if we matched full small substring. 5372 // Compute substr offset 5373 subptr(result, str1); 5374 shrl(result, 1); // index 5375 bind(EXIT); 5376 5377 } // string_indexofC8 5378 5379 // Small strings are loaded through stack if they cross page boundary. 5380 void MacroAssembler::string_indexof(Register str1, Register str2, 5381 Register cnt1, Register cnt2, 5382 int int_cnt2, Register result, 5383 XMMRegister vec, Register tmp) { 5384 ShortBranchVerifier sbv(this); 5385 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 5386 // 5387 // int_cnt2 is length of small (< 8 chars) constant substring 5388 // or (-1) for non constant substring in which case its length 5389 // is in cnt2 register. 5390 // 5391 // Note, inline_string_indexOf() generates checks: 5392 // if (substr.count > string.count) return -1; 5393 // if (substr.count == 0) return 0; 5394 // 5395 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 5396 5397 // This method uses pcmpestri inxtruction with bound registers 5398 // inputs: 5399 // xmm - substring 5400 // rax - substring length (elements count) 5401 // mem - scanned string 5402 // rdx - string length (elements count) 5403 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 5404 // outputs: 5405 // rcx - matched index in string 5406 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5407 5408 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 5409 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 5410 FOUND_CANDIDATE; 5411 5412 { //======================================================== 5413 // We don't know where these strings are located 5414 // and we can't read beyond them. Load them through stack. 5415 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 5416 5417 movptr(tmp, rsp); // save old SP 5418 5419 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 5420 if (int_cnt2 == 1) { // One char 5421 load_unsigned_short(result, Address(str2, 0)); 5422 movdl(vec, result); // move 32 bits 5423 } else if (int_cnt2 == 2) { // Two chars 5424 movdl(vec, Address(str2, 0)); // move 32 bits 5425 } else if (int_cnt2 == 4) { // Four chars 5426 movq(vec, Address(str2, 0)); // move 64 bits 5427 } else { // cnt2 = { 3, 5, 6, 7 } 5428 // Array header size is 12 bytes in 32-bit VM 5429 // + 6 bytes for 3 chars == 18 bytes, 5430 // enough space to load vec and shift. 5431 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); 5432 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 5433 psrldq(vec, 16-(int_cnt2*2)); 5434 } 5435 } else { // not constant substring 5436 cmpl(cnt2, 8); 5437 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 5438 5439 // We can read beyond string if srt+16 does not cross page boundary 5440 // since heaps are aligned and mapped by pages. 5441 assert(os::vm_page_size() < (int)G, "default page should be small"); 5442 movl(result, str2); // We need only low 32 bits 5443 andl(result, (os::vm_page_size()-1)); 5444 cmpl(result, (os::vm_page_size()-16)); 5445 jccb(Assembler::belowEqual, CHECK_STR); 5446 5447 // Move small strings to stack to allow load 16 bytes into vec. 5448 subptr(rsp, 16); 5449 int stk_offset = wordSize-2; 5450 push(cnt2); 5451 5452 bind(COPY_SUBSTR); 5453 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 5454 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 5455 decrement(cnt2); 5456 jccb(Assembler::notZero, COPY_SUBSTR); 5457 5458 pop(cnt2); 5459 movptr(str2, rsp); // New substring address 5460 } // non constant 5461 5462 bind(CHECK_STR); 5463 cmpl(cnt1, 8); 5464 jccb(Assembler::aboveEqual, BIG_STRINGS); 5465 5466 // Check cross page boundary. 5467 movl(result, str1); // We need only low 32 bits 5468 andl(result, (os::vm_page_size()-1)); 5469 cmpl(result, (os::vm_page_size()-16)); 5470 jccb(Assembler::belowEqual, BIG_STRINGS); 5471 5472 subptr(rsp, 16); 5473 int stk_offset = -2; 5474 if (int_cnt2 < 0) { // not constant 5475 push(cnt2); 5476 stk_offset += wordSize; 5477 } 5478 movl(cnt2, cnt1); 5479 5480 bind(COPY_STR); 5481 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 5482 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 5483 decrement(cnt2); 5484 jccb(Assembler::notZero, COPY_STR); 5485 5486 if (int_cnt2 < 0) { // not constant 5487 pop(cnt2); 5488 } 5489 movptr(str1, rsp); // New string address 5490 5491 bind(BIG_STRINGS); 5492 // Load substring. 5493 if (int_cnt2 < 0) { // -1 5494 movdqu(vec, Address(str2, 0)); 5495 push(cnt2); // substr count 5496 push(str2); // substr addr 5497 push(str1); // string addr 5498 } else { 5499 // Small (< 8 chars) constant substrings are loaded already. 5500 movl(cnt2, int_cnt2); 5501 } 5502 push(tmp); // original SP 5503 5504 } // Finished loading 5505 5506 //======================================================== 5507 // Start search 5508 // 5509 5510 movptr(result, str1); // string addr 5511 5512 if (int_cnt2 < 0) { // Only for non constant substring 5513 jmpb(SCAN_TO_SUBSTR); 5514 5515 // SP saved at sp+0 5516 // String saved at sp+1*wordSize 5517 // Substr saved at sp+2*wordSize 5518 // Substr count saved at sp+3*wordSize 5519 5520 // Reload substr for rescan, this code 5521 // is executed only for large substrings (> 8 chars) 5522 bind(RELOAD_SUBSTR); 5523 movptr(str2, Address(rsp, 2*wordSize)); 5524 movl(cnt2, Address(rsp, 3*wordSize)); 5525 movdqu(vec, Address(str2, 0)); 5526 // We came here after the beginning of the substring was 5527 // matched but the rest of it was not so we need to search 5528 // again. Start from the next element after the previous match. 5529 subptr(str1, result); // Restore counter 5530 shrl(str1, 1); 5531 addl(cnt1, str1); 5532 decrementl(cnt1); // Shift to next element 5533 cmpl(cnt1, cnt2); 5534 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5535 5536 addptr(result, 2); 5537 } // non constant 5538 5539 // Scan string for start of substr in 16-byte vectors 5540 bind(SCAN_TO_SUBSTR); 5541 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5542 pcmpestri(vec, Address(result, 0), 0x0d); 5543 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 5544 subl(cnt1, 8); 5545 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 5546 cmpl(cnt1, cnt2); 5547 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5548 addptr(result, 16); 5549 5550 bind(ADJUST_STR); 5551 cmpl(cnt1, 8); // Do not read beyond string 5552 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 5553 // Back-up string to avoid reading beyond string. 5554 lea(result, Address(result, cnt1, Address::times_2, -16)); 5555 movl(cnt1, 8); 5556 jmpb(SCAN_TO_SUBSTR); 5557 5558 // Found a potential substr 5559 bind(FOUND_CANDIDATE); 5560 // After pcmpestri tmp(rcx) contains matched element index 5561 5562 // Make sure string is still long enough 5563 subl(cnt1, tmp); 5564 cmpl(cnt1, cnt2); 5565 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 5566 // Left less then substring. 5567 5568 bind(RET_NOT_FOUND); 5569 movl(result, -1); 5570 jmpb(CLEANUP); 5571 5572 bind(FOUND_SUBSTR); 5573 // Compute start addr of substr 5574 lea(result, Address(result, tmp, Address::times_2)); 5575 5576 if (int_cnt2 > 0) { // Constant substring 5577 // Repeat search for small substring (< 8 chars) 5578 // from new point without reloading substring. 5579 // Have to check that we don't read beyond string. 5580 cmpl(tmp, 8-int_cnt2); 5581 jccb(Assembler::greater, ADJUST_STR); 5582 // Fall through if matched whole substring. 5583 } else { // non constant 5584 assert(int_cnt2 == -1, "should be != 0"); 5585 5586 addl(tmp, cnt2); 5587 // Found result if we matched whole substring. 5588 cmpl(tmp, 8); 5589 jccb(Assembler::lessEqual, RET_FOUND); 5590 5591 // Repeat search for small substring (<= 8 chars) 5592 // from new point 'str1' without reloading substring. 5593 cmpl(cnt2, 8); 5594 // Have to check that we don't read beyond string. 5595 jccb(Assembler::lessEqual, ADJUST_STR); 5596 5597 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 5598 // Compare the rest of substring (> 8 chars). 5599 movptr(str1, result); 5600 5601 cmpl(tmp, cnt2); 5602 // First 8 chars are already matched. 5603 jccb(Assembler::equal, CHECK_NEXT); 5604 5605 bind(SCAN_SUBSTR); 5606 pcmpestri(vec, Address(str1, 0), 0x0d); 5607 // Need to reload strings pointers if not matched whole vector 5608 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 5609 5610 bind(CHECK_NEXT); 5611 subl(cnt2, 8); 5612 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 5613 addptr(str1, 16); 5614 addptr(str2, 16); 5615 subl(cnt1, 8); 5616 cmpl(cnt2, 8); // Do not read beyond substring 5617 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 5618 // Back-up strings to avoid reading beyond substring. 5619 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 5620 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 5621 subl(cnt1, cnt2); 5622 movl(cnt2, 8); 5623 addl(cnt1, 8); 5624 bind(CONT_SCAN_SUBSTR); 5625 movdqu(vec, Address(str2, 0)); 5626 jmpb(SCAN_SUBSTR); 5627 5628 bind(RET_FOUND_LONG); 5629 movptr(str1, Address(rsp, wordSize)); 5630 } // non constant 5631 5632 bind(RET_FOUND); 5633 // Compute substr offset 5634 subptr(result, str1); 5635 shrl(result, 1); // index 5636 5637 bind(CLEANUP); 5638 pop(rsp); // restore SP 5639 5640 } // string_indexof 5641 5642 // Compare strings. 5643 void MacroAssembler::string_compare(Register str1, Register str2, 5644 Register cnt1, Register cnt2, Register result, 5645 XMMRegister vec1) { 5646 ShortBranchVerifier sbv(this); 5647 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 5648 5649 // Compute the minimum of the string lengths and the 5650 // difference of the string lengths (stack). 5651 // Do the conditional move stuff 5652 movl(result, cnt1); 5653 subl(cnt1, cnt2); 5654 push(cnt1); 5655 cmov32(Assembler::lessEqual, cnt2, result); 5656 5657 // Is the minimum length zero? 5658 testl(cnt2, cnt2); 5659 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 5660 5661 // Load first characters 5662 load_unsigned_short(result, Address(str1, 0)); 5663 load_unsigned_short(cnt1, Address(str2, 0)); 5664 5665 // Compare first characters 5666 subl(result, cnt1); 5667 jcc(Assembler::notZero, POP_LABEL); 5668 decrementl(cnt2); 5669 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 5670 5671 { 5672 // Check after comparing first character to see if strings are equivalent 5673 Label LSkip2; 5674 // Check if the strings start at same location 5675 cmpptr(str1, str2); 5676 jccb(Assembler::notEqual, LSkip2); 5677 5678 // Check if the length difference is zero (from stack) 5679 cmpl(Address(rsp, 0), 0x0); 5680 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 5681 5682 // Strings might not be equivalent 5683 bind(LSkip2); 5684 } 5685 5686 Address::ScaleFactor scale = Address::times_2; 5687 int stride = 8; 5688 5689 // Advance to next element 5690 addptr(str1, 16/stride); 5691 addptr(str2, 16/stride); 5692 5693 if (UseSSE42Intrinsics) { 5694 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 5695 int pcmpmask = 0x19; 5696 // Setup to compare 16-byte vectors 5697 movl(result, cnt2); 5698 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 5699 jccb(Assembler::zero, COMPARE_TAIL); 5700 5701 lea(str1, Address(str1, result, scale)); 5702 lea(str2, Address(str2, result, scale)); 5703 negptr(result); 5704 5705 // pcmpestri 5706 // inputs: 5707 // vec1- substring 5708 // rax - negative string length (elements count) 5709 // mem - scaned string 5710 // rdx - string length (elements count) 5711 // pcmpmask - cmp mode: 11000 (string compare with negated result) 5712 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 5713 // outputs: 5714 // rcx - first mismatched element index 5715 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 5716 5717 bind(COMPARE_WIDE_VECTORS); 5718 movdqu(vec1, Address(str1, result, scale)); 5719 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 5720 // After pcmpestri cnt1(rcx) contains mismatched element index 5721 5722 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 5723 addptr(result, stride); 5724 subptr(cnt2, stride); 5725 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 5726 5727 // compare wide vectors tail 5728 testl(result, result); 5729 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5730 5731 movl(cnt2, stride); 5732 movl(result, stride); 5733 negptr(result); 5734 movdqu(vec1, Address(str1, result, scale)); 5735 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 5736 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 5737 5738 // Mismatched characters in the vectors 5739 bind(VECTOR_NOT_EQUAL); 5740 addptr(result, cnt1); 5741 movptr(cnt2, result); 5742 load_unsigned_short(result, Address(str1, cnt2, scale)); 5743 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 5744 subl(result, cnt1); 5745 jmpb(POP_LABEL); 5746 5747 bind(COMPARE_TAIL); // limit is zero 5748 movl(cnt2, result); 5749 // Fallthru to tail compare 5750 } 5751 5752 // Shift str2 and str1 to the end of the arrays, negate min 5753 lea(str1, Address(str1, cnt2, scale, 0)); 5754 lea(str2, Address(str2, cnt2, scale, 0)); 5755 negptr(cnt2); 5756 5757 // Compare the rest of the elements 5758 bind(WHILE_HEAD_LABEL); 5759 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 5760 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 5761 subl(result, cnt1); 5762 jccb(Assembler::notZero, POP_LABEL); 5763 increment(cnt2); 5764 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 5765 5766 // Strings are equal up to min length. Return the length difference. 5767 bind(LENGTH_DIFF_LABEL); 5768 pop(result); 5769 jmpb(DONE_LABEL); 5770 5771 // Discard the stored length difference 5772 bind(POP_LABEL); 5773 pop(cnt1); 5774 5775 // That's it 5776 bind(DONE_LABEL); 5777 } 5778 5779 // Compare char[] arrays aligned to 4 bytes or substrings. 5780 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 5781 Register limit, Register result, Register chr, 5782 XMMRegister vec1, XMMRegister vec2) { 5783 ShortBranchVerifier sbv(this); 5784 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 5785 5786 int length_offset = arrayOopDesc::length_offset_in_bytes(); 5787 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 5788 5789 // Check the input args 5790 cmpptr(ary1, ary2); 5791 jcc(Assembler::equal, TRUE_LABEL); 5792 5793 if (is_array_equ) { 5794 // Need additional checks for arrays_equals. 5795 testptr(ary1, ary1); 5796 jcc(Assembler::zero, FALSE_LABEL); 5797 testptr(ary2, ary2); 5798 jcc(Assembler::zero, FALSE_LABEL); 5799 5800 // Check the lengths 5801 movl(limit, Address(ary1, length_offset)); 5802 cmpl(limit, Address(ary2, length_offset)); 5803 jcc(Assembler::notEqual, FALSE_LABEL); 5804 } 5805 5806 // count == 0 5807 testl(limit, limit); 5808 jcc(Assembler::zero, TRUE_LABEL); 5809 5810 if (is_array_equ) { 5811 // Load array address 5812 lea(ary1, Address(ary1, base_offset)); 5813 lea(ary2, Address(ary2, base_offset)); 5814 } 5815 5816 shll(limit, 1); // byte count != 0 5817 movl(result, limit); // copy 5818 5819 if (UseSSE42Intrinsics) { 5820 // With SSE4.2, use double quad vector compare 5821 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 5822 5823 // Compare 16-byte vectors 5824 andl(result, 0x0000000e); // tail count (in bytes) 5825 andl(limit, 0xfffffff0); // vector count (in bytes) 5826 jccb(Assembler::zero, COMPARE_TAIL); 5827 5828 lea(ary1, Address(ary1, limit, Address::times_1)); 5829 lea(ary2, Address(ary2, limit, Address::times_1)); 5830 negptr(limit); 5831 5832 bind(COMPARE_WIDE_VECTORS); 5833 movdqu(vec1, Address(ary1, limit, Address::times_1)); 5834 movdqu(vec2, Address(ary2, limit, Address::times_1)); 5835 pxor(vec1, vec2); 5836 5837 ptest(vec1, vec1); 5838 jccb(Assembler::notZero, FALSE_LABEL); 5839 addptr(limit, 16); 5840 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 5841 5842 testl(result, result); 5843 jccb(Assembler::zero, TRUE_LABEL); 5844 5845 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 5846 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 5847 pxor(vec1, vec2); 5848 5849 ptest(vec1, vec1); 5850 jccb(Assembler::notZero, FALSE_LABEL); 5851 jmpb(TRUE_LABEL); 5852 5853 bind(COMPARE_TAIL); // limit is zero 5854 movl(limit, result); 5855 // Fallthru to tail compare 5856 } 5857 5858 // Compare 4-byte vectors 5859 andl(limit, 0xfffffffc); // vector count (in bytes) 5860 jccb(Assembler::zero, COMPARE_CHAR); 5861 5862 lea(ary1, Address(ary1, limit, Address::times_1)); 5863 lea(ary2, Address(ary2, limit, Address::times_1)); 5864 negptr(limit); 5865 5866 bind(COMPARE_VECTORS); 5867 movl(chr, Address(ary1, limit, Address::times_1)); 5868 cmpl(chr, Address(ary2, limit, Address::times_1)); 5869 jccb(Assembler::notEqual, FALSE_LABEL); 5870 addptr(limit, 4); 5871 jcc(Assembler::notZero, COMPARE_VECTORS); 5872 5873 // Compare trailing char (final 2 bytes), if any 5874 bind(COMPARE_CHAR); 5875 testl(result, 0x2); // tail char 5876 jccb(Assembler::zero, TRUE_LABEL); 5877 load_unsigned_short(chr, Address(ary1, 0)); 5878 load_unsigned_short(limit, Address(ary2, 0)); 5879 cmpl(chr, limit); 5880 jccb(Assembler::notEqual, FALSE_LABEL); 5881 5882 bind(TRUE_LABEL); 5883 movl(result, 1); // return true 5884 jmpb(DONE); 5885 5886 bind(FALSE_LABEL); 5887 xorl(result, result); // return false 5888 5889 // That's it 5890 bind(DONE); 5891 } 5892 5893 void MacroAssembler::generate_fill(BasicType t, bool aligned, 5894 Register to, Register value, Register count, 5895 Register rtmp, XMMRegister xtmp) { 5896 ShortBranchVerifier sbv(this); 5897 assert_different_registers(to, value, count, rtmp); 5898 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 5899 Label L_fill_2_bytes, L_fill_4_bytes; 5900 5901 int shift = -1; 5902 switch (t) { 5903 case T_BYTE: 5904 shift = 2; 5905 break; 5906 case T_SHORT: 5907 shift = 1; 5908 break; 5909 case T_INT: 5910 shift = 0; 5911 break; 5912 default: ShouldNotReachHere(); 5913 } 5914 5915 if (t == T_BYTE) { 5916 andl(value, 0xff); 5917 movl(rtmp, value); 5918 shll(rtmp, 8); 5919 orl(value, rtmp); 5920 } 5921 if (t == T_SHORT) { 5922 andl(value, 0xffff); 5923 } 5924 if (t == T_BYTE || t == T_SHORT) { 5925 movl(rtmp, value); 5926 shll(rtmp, 16); 5927 orl(value, rtmp); 5928 } 5929 5930 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 5931 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 5932 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 5933 // align source address at 4 bytes address boundary 5934 if (t == T_BYTE) { 5935 // One byte misalignment happens only for byte arrays 5936 testptr(to, 1); 5937 jccb(Assembler::zero, L_skip_align1); 5938 movb(Address(to, 0), value); 5939 increment(to); 5940 decrement(count); 5941 BIND(L_skip_align1); 5942 } 5943 // Two bytes misalignment happens only for byte and short (char) arrays 5944 testptr(to, 2); 5945 jccb(Assembler::zero, L_skip_align2); 5946 movw(Address(to, 0), value); 5947 addptr(to, 2); 5948 subl(count, 1<<(shift-1)); 5949 BIND(L_skip_align2); 5950 } 5951 if (UseSSE < 2) { 5952 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 5953 // Fill 32-byte chunks 5954 subl(count, 8 << shift); 5955 jcc(Assembler::less, L_check_fill_8_bytes); 5956 align(16); 5957 5958 BIND(L_fill_32_bytes_loop); 5959 5960 for (int i = 0; i < 32; i += 4) { 5961 movl(Address(to, i), value); 5962 } 5963 5964 addptr(to, 32); 5965 subl(count, 8 << shift); 5966 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 5967 BIND(L_check_fill_8_bytes); 5968 addl(count, 8 << shift); 5969 jccb(Assembler::zero, L_exit); 5970 jmpb(L_fill_8_bytes); 5971 5972 // 5973 // length is too short, just fill qwords 5974 // 5975 BIND(L_fill_8_bytes_loop); 5976 movl(Address(to, 0), value); 5977 movl(Address(to, 4), value); 5978 addptr(to, 8); 5979 BIND(L_fill_8_bytes); 5980 subl(count, 1 << (shift + 1)); 5981 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 5982 // fall through to fill 4 bytes 5983 } else { 5984 Label L_fill_32_bytes; 5985 if (!UseUnalignedLoadStores) { 5986 // align to 8 bytes, we know we are 4 byte aligned to start 5987 testptr(to, 4); 5988 jccb(Assembler::zero, L_fill_32_bytes); 5989 movl(Address(to, 0), value); 5990 addptr(to, 4); 5991 subl(count, 1<<shift); 5992 } 5993 BIND(L_fill_32_bytes); 5994 { 5995 assert( UseSSE >= 2, "supported cpu only" ); 5996 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 5997 // Fill 32-byte chunks 5998 movdl(xtmp, value); 5999 pshufd(xtmp, xtmp, 0); 6000 6001 subl(count, 8 << shift); 6002 jcc(Assembler::less, L_check_fill_8_bytes); 6003 align(16); 6004 6005 BIND(L_fill_32_bytes_loop); 6006 6007 if (UseUnalignedLoadStores) { 6008 movdqu(Address(to, 0), xtmp); 6009 movdqu(Address(to, 16), xtmp); 6010 } else { 6011 movq(Address(to, 0), xtmp); 6012 movq(Address(to, 8), xtmp); 6013 movq(Address(to, 16), xtmp); 6014 movq(Address(to, 24), xtmp); 6015 } 6016 6017 addptr(to, 32); 6018 subl(count, 8 << shift); 6019 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 6020 BIND(L_check_fill_8_bytes); 6021 addl(count, 8 << shift); 6022 jccb(Assembler::zero, L_exit); 6023 jmpb(L_fill_8_bytes); 6024 6025 // 6026 // length is too short, just fill qwords 6027 // 6028 BIND(L_fill_8_bytes_loop); 6029 movq(Address(to, 0), xtmp); 6030 addptr(to, 8); 6031 BIND(L_fill_8_bytes); 6032 subl(count, 1 << (shift + 1)); 6033 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 6034 } 6035 } 6036 // fill trailing 4 bytes 6037 BIND(L_fill_4_bytes); 6038 testl(count, 1<<shift); 6039 jccb(Assembler::zero, L_fill_2_bytes); 6040 movl(Address(to, 0), value); 6041 if (t == T_BYTE || t == T_SHORT) { 6042 addptr(to, 4); 6043 BIND(L_fill_2_bytes); 6044 // fill trailing 2 bytes 6045 testl(count, 1<<(shift-1)); 6046 jccb(Assembler::zero, L_fill_byte); 6047 movw(Address(to, 0), value); 6048 if (t == T_BYTE) { 6049 addptr(to, 2); 6050 BIND(L_fill_byte); 6051 // fill trailing byte 6052 testl(count, 1); 6053 jccb(Assembler::zero, L_exit); 6054 movb(Address(to, 0), value); 6055 } else { 6056 BIND(L_fill_byte); 6057 } 6058 } else { 6059 BIND(L_fill_2_bytes); 6060 } 6061 BIND(L_exit); 6062 } 6063 #undef BIND 6064 #undef BLOCK_COMMENT 6065 6066 6067 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 6068 switch (cond) { 6069 // Note some conditions are synonyms for others 6070 case Assembler::zero: return Assembler::notZero; 6071 case Assembler::notZero: return Assembler::zero; 6072 case Assembler::less: return Assembler::greaterEqual; 6073 case Assembler::lessEqual: return Assembler::greater; 6074 case Assembler::greater: return Assembler::lessEqual; 6075 case Assembler::greaterEqual: return Assembler::less; 6076 case Assembler::below: return Assembler::aboveEqual; 6077 case Assembler::belowEqual: return Assembler::above; 6078 case Assembler::above: return Assembler::belowEqual; 6079 case Assembler::aboveEqual: return Assembler::below; 6080 case Assembler::overflow: return Assembler::noOverflow; 6081 case Assembler::noOverflow: return Assembler::overflow; 6082 case Assembler::negative: return Assembler::positive; 6083 case Assembler::positive: return Assembler::negative; 6084 case Assembler::parity: return Assembler::noParity; 6085 case Assembler::noParity: return Assembler::parity; 6086 } 6087 ShouldNotReachHere(); return Assembler::overflow; 6088 } 6089 6090 SkipIfEqual::SkipIfEqual( 6091 MacroAssembler* masm, const bool* flag_addr, bool value) { 6092 _masm = masm; 6093 _masm->cmp8(ExternalAddress((address)flag_addr), value); 6094 _masm->jcc(Assembler::equal, _label); 6095 } 6096 6097 SkipIfEqual::~SkipIfEqual() { 6098 _masm->bind(_label); 6099 }