1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "compiler/disassembler.hpp" 29 #include "gc_interface/collectedHeap.inline.hpp" 30 #include "interpreter/interpreter.hpp" 31 #include "memory/cardTableModRefBS.hpp" 32 #include "memory/resourceArea.hpp" 33 #include "prims/methodHandles.hpp" 34 #include "runtime/biasedLocking.hpp" 35 #include "runtime/interfaceSupport.hpp" 36 #include "runtime/objectMonitor.hpp" 37 #include "runtime/os.hpp" 38 #include "runtime/sharedRuntime.hpp" 39 #include "runtime/stubRoutines.hpp" 40 #include "utilities/macros.hpp" 41 #if INCLUDE_ALL_GCS 42 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 43 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 44 #include "gc_implementation/g1/heapRegion.hpp" 45 #endif // INCLUDE_ALL_GCS 46 47 #ifdef PRODUCT 48 #define BLOCK_COMMENT(str) /* nothing */ 49 #define STOP(error) stop(error) 50 #else 51 #define BLOCK_COMMENT(str) block_comment(str) 52 #define STOP(error) block_comment(error); stop(error) 53 #endif 54 55 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 56 57 58 #ifdef ASSERT 59 bool AbstractAssembler::pd_check_instruction_mark() { return true; } 60 #endif 61 62 static Assembler::Condition reverse[] = { 63 Assembler::noOverflow /* overflow = 0x0 */ , 64 Assembler::overflow /* noOverflow = 0x1 */ , 65 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 66 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 67 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 68 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 69 Assembler::above /* belowEqual = 0x6 */ , 70 Assembler::belowEqual /* above = 0x7 */ , 71 Assembler::positive /* negative = 0x8 */ , 72 Assembler::negative /* positive = 0x9 */ , 73 Assembler::noParity /* parity = 0xa */ , 74 Assembler::parity /* noParity = 0xb */ , 75 Assembler::greaterEqual /* less = 0xc */ , 76 Assembler::less /* greaterEqual = 0xd */ , 77 Assembler::greater /* lessEqual = 0xe */ , 78 Assembler::lessEqual /* greater = 0xf, */ 79 80 }; 81 82 83 // Implementation of MacroAssembler 84 85 // First all the versions that have distinct versions depending on 32/64 bit 86 // Unless the difference is trivial (1 line or so). 87 88 #ifndef _LP64 89 90 // 32bit versions 91 92 Address MacroAssembler::as_Address(AddressLiteral adr) { 93 return Address(adr.target(), adr.rspec()); 94 } 95 96 Address MacroAssembler::as_Address(ArrayAddress adr) { 97 return Address::make_array(adr); 98 } 99 100 int MacroAssembler::biased_locking_enter(Register lock_reg, 101 Register obj_reg, 102 Register swap_reg, 103 Register tmp_reg, 104 bool swap_reg_contains_mark, 105 Label& done, 106 Label* slow_case, 107 BiasedLockingCounters* counters) { 108 assert(UseBiasedLocking, "why call this otherwise?"); 109 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 110 assert_different_registers(lock_reg, obj_reg, swap_reg); 111 112 if (PrintBiasedLockingStatistics && counters == NULL) 113 counters = BiasedLocking::counters(); 114 115 bool need_tmp_reg = false; 116 if (tmp_reg == noreg) { 117 need_tmp_reg = true; 118 tmp_reg = lock_reg; 119 } else { 120 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 121 } 122 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 123 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 124 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 125 Address saved_mark_addr(lock_reg, 0); 126 127 // Biased locking 128 // See whether the lock is currently biased toward our thread and 129 // whether the epoch is still valid 130 // Note that the runtime guarantees sufficient alignment of JavaThread 131 // pointers to allow age to be placed into low bits 132 // First check to see whether biasing is even enabled for this object 133 Label cas_label; 134 int null_check_offset = -1; 135 if (!swap_reg_contains_mark) { 136 null_check_offset = offset(); 137 movl(swap_reg, mark_addr); 138 } 139 if (need_tmp_reg) { 140 push(tmp_reg); 141 } 142 movl(tmp_reg, swap_reg); 143 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 144 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 145 if (need_tmp_reg) { 146 pop(tmp_reg); 147 } 148 jcc(Assembler::notEqual, cas_label); 149 // The bias pattern is present in the object's header. Need to check 150 // whether the bias owner and the epoch are both still current. 151 // Note that because there is no current thread register on x86 we 152 // need to store off the mark word we read out of the object to 153 // avoid reloading it and needing to recheck invariants below. This 154 // store is unfortunate but it makes the overall code shorter and 155 // simpler. 156 movl(saved_mark_addr, swap_reg); 157 if (need_tmp_reg) { 158 push(tmp_reg); 159 } 160 get_thread(tmp_reg); 161 xorl(swap_reg, tmp_reg); 162 if (swap_reg_contains_mark) { 163 null_check_offset = offset(); 164 } 165 movl(tmp_reg, klass_addr); 166 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 167 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 168 if (need_tmp_reg) { 169 pop(tmp_reg); 170 } 171 if (counters != NULL) { 172 cond_inc32(Assembler::zero, 173 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 174 } 175 jcc(Assembler::equal, done); 176 177 Label try_revoke_bias; 178 Label try_rebias; 179 180 // At this point we know that the header has the bias pattern and 181 // that we are not the bias owner in the current epoch. We need to 182 // figure out more details about the state of the header in order to 183 // know what operations can be legally performed on the object's 184 // header. 185 186 // If the low three bits in the xor result aren't clear, that means 187 // the prototype header is no longer biased and we have to revoke 188 // the bias on this object. 189 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 190 jcc(Assembler::notZero, try_revoke_bias); 191 192 // Biasing is still enabled for this data type. See whether the 193 // epoch of the current bias is still valid, meaning that the epoch 194 // bits of the mark word are equal to the epoch bits of the 195 // prototype header. (Note that the prototype header's epoch bits 196 // only change at a safepoint.) If not, attempt to rebias the object 197 // toward the current thread. Note that we must be absolutely sure 198 // that the current epoch is invalid in order to do this because 199 // otherwise the manipulations it performs on the mark word are 200 // illegal. 201 testl(swap_reg, markOopDesc::epoch_mask_in_place); 202 jcc(Assembler::notZero, try_rebias); 203 204 // The epoch of the current bias is still valid but we know nothing 205 // about the owner; it might be set or it might be clear. Try to 206 // acquire the bias of the object using an atomic operation. If this 207 // fails we will go in to the runtime to revoke the object's bias. 208 // Note that we first construct the presumed unbiased header so we 209 // don't accidentally blow away another thread's valid bias. 210 movl(swap_reg, saved_mark_addr); 211 andl(swap_reg, 212 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 213 if (need_tmp_reg) { 214 push(tmp_reg); 215 } 216 get_thread(tmp_reg); 217 orl(tmp_reg, swap_reg); 218 if (os::is_MP()) { 219 lock(); 220 } 221 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 222 if (need_tmp_reg) { 223 pop(tmp_reg); 224 } 225 // If the biasing toward our thread failed, this means that 226 // another thread succeeded in biasing it toward itself and we 227 // need to revoke that bias. The revocation will occur in the 228 // interpreter runtime in the slow case. 229 if (counters != NULL) { 230 cond_inc32(Assembler::zero, 231 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 232 } 233 if (slow_case != NULL) { 234 jcc(Assembler::notZero, *slow_case); 235 } 236 jmp(done); 237 238 bind(try_rebias); 239 // At this point we know the epoch has expired, meaning that the 240 // current "bias owner", if any, is actually invalid. Under these 241 // circumstances _only_, we are allowed to use the current header's 242 // value as the comparison value when doing the cas to acquire the 243 // bias in the current epoch. In other words, we allow transfer of 244 // the bias from one thread to another directly in this situation. 245 // 246 // FIXME: due to a lack of registers we currently blow away the age 247 // bits in this situation. Should attempt to preserve them. 248 if (need_tmp_reg) { 249 push(tmp_reg); 250 } 251 get_thread(tmp_reg); 252 movl(swap_reg, klass_addr); 253 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 254 movl(swap_reg, saved_mark_addr); 255 if (os::is_MP()) { 256 lock(); 257 } 258 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 259 if (need_tmp_reg) { 260 pop(tmp_reg); 261 } 262 // If the biasing toward our thread failed, then another thread 263 // succeeded in biasing it toward itself and we need to revoke that 264 // bias. The revocation will occur in the runtime in the slow case. 265 if (counters != NULL) { 266 cond_inc32(Assembler::zero, 267 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 268 } 269 if (slow_case != NULL) { 270 jcc(Assembler::notZero, *slow_case); 271 } 272 jmp(done); 273 274 bind(try_revoke_bias); 275 // The prototype mark in the klass doesn't have the bias bit set any 276 // more, indicating that objects of this data type are not supposed 277 // to be biased any more. We are going to try to reset the mark of 278 // this object to the prototype value and fall through to the 279 // CAS-based locking scheme. Note that if our CAS fails, it means 280 // that another thread raced us for the privilege of revoking the 281 // bias of this particular object, so it's okay to continue in the 282 // normal locking code. 283 // 284 // FIXME: due to a lack of registers we currently blow away the age 285 // bits in this situation. Should attempt to preserve them. 286 movl(swap_reg, saved_mark_addr); 287 if (need_tmp_reg) { 288 push(tmp_reg); 289 } 290 movl(tmp_reg, klass_addr); 291 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 292 if (os::is_MP()) { 293 lock(); 294 } 295 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 296 if (need_tmp_reg) { 297 pop(tmp_reg); 298 } 299 // Fall through to the normal CAS-based lock, because no matter what 300 // the result of the above CAS, some thread must have succeeded in 301 // removing the bias bit from the object's header. 302 if (counters != NULL) { 303 cond_inc32(Assembler::zero, 304 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 305 } 306 307 bind(cas_label); 308 309 return null_check_offset; 310 } 311 void MacroAssembler::call_VM_leaf_base(address entry_point, 312 int number_of_arguments) { 313 call(RuntimeAddress(entry_point)); 314 increment(rsp, number_of_arguments * wordSize); 315 } 316 317 void MacroAssembler::cmpklass(Address src1, Metadata* obj) { 318 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 319 } 320 321 void MacroAssembler::cmpklass(Register src1, Metadata* obj) { 322 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 323 } 324 325 void MacroAssembler::cmpoop(Address src1, jobject obj) { 326 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 327 } 328 329 void MacroAssembler::cmpoop(Register src1, jobject obj) { 330 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 331 } 332 333 void MacroAssembler::extend_sign(Register hi, Register lo) { 334 // According to Intel Doc. AP-526, "Integer Divide", p.18. 335 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 336 cdql(); 337 } else { 338 movl(hi, lo); 339 sarl(hi, 31); 340 } 341 } 342 343 void MacroAssembler::jC2(Register tmp, Label& L) { 344 // set parity bit if FPU flag C2 is set (via rax) 345 save_rax(tmp); 346 fwait(); fnstsw_ax(); 347 sahf(); 348 restore_rax(tmp); 349 // branch 350 jcc(Assembler::parity, L); 351 } 352 353 void MacroAssembler::jnC2(Register tmp, Label& L) { 354 // set parity bit if FPU flag C2 is set (via rax) 355 save_rax(tmp); 356 fwait(); fnstsw_ax(); 357 sahf(); 358 restore_rax(tmp); 359 // branch 360 jcc(Assembler::noParity, L); 361 } 362 363 // 32bit can do a case table jump in one instruction but we no longer allow the base 364 // to be installed in the Address class 365 void MacroAssembler::jump(ArrayAddress entry) { 366 jmp(as_Address(entry)); 367 } 368 369 // Note: y_lo will be destroyed 370 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 371 // Long compare for Java (semantics as described in JVM spec.) 372 Label high, low, done; 373 374 cmpl(x_hi, y_hi); 375 jcc(Assembler::less, low); 376 jcc(Assembler::greater, high); 377 // x_hi is the return register 378 xorl(x_hi, x_hi); 379 cmpl(x_lo, y_lo); 380 jcc(Assembler::below, low); 381 jcc(Assembler::equal, done); 382 383 bind(high); 384 xorl(x_hi, x_hi); 385 increment(x_hi); 386 jmp(done); 387 388 bind(low); 389 xorl(x_hi, x_hi); 390 decrementl(x_hi); 391 392 bind(done); 393 } 394 395 void MacroAssembler::lea(Register dst, AddressLiteral src) { 396 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 397 } 398 399 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 400 // leal(dst, as_Address(adr)); 401 // see note in movl as to why we must use a move 402 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 403 } 404 405 void MacroAssembler::leave() { 406 mov(rsp, rbp); 407 pop(rbp); 408 } 409 410 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 411 // Multiplication of two Java long values stored on the stack 412 // as illustrated below. Result is in rdx:rax. 413 // 414 // rsp ---> [ ?? ] \ \ 415 // .... | y_rsp_offset | 416 // [ y_lo ] / (in bytes) | x_rsp_offset 417 // [ y_hi ] | (in bytes) 418 // .... | 419 // [ x_lo ] / 420 // [ x_hi ] 421 // .... 422 // 423 // Basic idea: lo(result) = lo(x_lo * y_lo) 424 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 425 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 426 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 427 Label quick; 428 // load x_hi, y_hi and check if quick 429 // multiplication is possible 430 movl(rbx, x_hi); 431 movl(rcx, y_hi); 432 movl(rax, rbx); 433 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 434 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 435 // do full multiplication 436 // 1st step 437 mull(y_lo); // x_hi * y_lo 438 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 439 // 2nd step 440 movl(rax, x_lo); 441 mull(rcx); // x_lo * y_hi 442 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 443 // 3rd step 444 bind(quick); // note: rbx, = 0 if quick multiply! 445 movl(rax, x_lo); 446 mull(y_lo); // x_lo * y_lo 447 addl(rdx, rbx); // correct hi(x_lo * y_lo) 448 } 449 450 void MacroAssembler::lneg(Register hi, Register lo) { 451 negl(lo); 452 adcl(hi, 0); 453 negl(hi); 454 } 455 456 void MacroAssembler::lshl(Register hi, Register lo) { 457 // Java shift left long support (semantics as described in JVM spec., p.305) 458 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 459 // shift value is in rcx ! 460 assert(hi != rcx, "must not use rcx"); 461 assert(lo != rcx, "must not use rcx"); 462 const Register s = rcx; // shift count 463 const int n = BitsPerWord; 464 Label L; 465 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 466 cmpl(s, n); // if (s < n) 467 jcc(Assembler::less, L); // else (s >= n) 468 movl(hi, lo); // x := x << n 469 xorl(lo, lo); 470 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 471 bind(L); // s (mod n) < n 472 shldl(hi, lo); // x := x << s 473 shll(lo); 474 } 475 476 477 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 478 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 479 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 480 assert(hi != rcx, "must not use rcx"); 481 assert(lo != rcx, "must not use rcx"); 482 const Register s = rcx; // shift count 483 const int n = BitsPerWord; 484 Label L; 485 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 486 cmpl(s, n); // if (s < n) 487 jcc(Assembler::less, L); // else (s >= n) 488 movl(lo, hi); // x := x >> n 489 if (sign_extension) sarl(hi, 31); 490 else xorl(hi, hi); 491 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 492 bind(L); // s (mod n) < n 493 shrdl(lo, hi); // x := x >> s 494 if (sign_extension) sarl(hi); 495 else shrl(hi); 496 } 497 498 void MacroAssembler::movoop(Register dst, jobject obj) { 499 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 500 } 501 502 void MacroAssembler::movoop(Address dst, jobject obj) { 503 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 504 } 505 506 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 507 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 508 } 509 510 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 511 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 512 } 513 514 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 515 if (src.is_lval()) { 516 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 517 } else { 518 movl(dst, as_Address(src)); 519 } 520 } 521 522 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 523 movl(as_Address(dst), src); 524 } 525 526 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 527 movl(dst, as_Address(src)); 528 } 529 530 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 531 void MacroAssembler::movptr(Address dst, intptr_t src) { 532 movl(dst, src); 533 } 534 535 536 void MacroAssembler::pop_callee_saved_registers() { 537 pop(rcx); 538 pop(rdx); 539 pop(rdi); 540 pop(rsi); 541 } 542 543 void MacroAssembler::pop_fTOS() { 544 fld_d(Address(rsp, 0)); 545 addl(rsp, 2 * wordSize); 546 } 547 548 void MacroAssembler::push_callee_saved_registers() { 549 push(rsi); 550 push(rdi); 551 push(rdx); 552 push(rcx); 553 } 554 555 void MacroAssembler::push_fTOS() { 556 subl(rsp, 2 * wordSize); 557 fstp_d(Address(rsp, 0)); 558 } 559 560 561 void MacroAssembler::pushoop(jobject obj) { 562 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 563 } 564 565 void MacroAssembler::pushklass(Metadata* obj) { 566 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); 567 } 568 569 void MacroAssembler::pushptr(AddressLiteral src) { 570 if (src.is_lval()) { 571 push_literal32((int32_t)src.target(), src.rspec()); 572 } else { 573 pushl(as_Address(src)); 574 } 575 } 576 577 void MacroAssembler::set_word_if_not_zero(Register dst) { 578 xorl(dst, dst); 579 set_byte_if_not_zero(dst); 580 } 581 582 static void pass_arg0(MacroAssembler* masm, Register arg) { 583 masm->push(arg); 584 } 585 586 static void pass_arg1(MacroAssembler* masm, Register arg) { 587 masm->push(arg); 588 } 589 590 static void pass_arg2(MacroAssembler* masm, Register arg) { 591 masm->push(arg); 592 } 593 594 static void pass_arg3(MacroAssembler* masm, Register arg) { 595 masm->push(arg); 596 } 597 598 #ifndef PRODUCT 599 extern "C" void findpc(intptr_t x); 600 #endif 601 602 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 603 // In order to get locks to work, we need to fake a in_VM state 604 JavaThread* thread = JavaThread::current(); 605 JavaThreadState saved_state = thread->thread_state(); 606 thread->set_thread_state(_thread_in_vm); 607 if (ShowMessageBoxOnError) { 608 JavaThread* thread = JavaThread::current(); 609 JavaThreadState saved_state = thread->thread_state(); 610 thread->set_thread_state(_thread_in_vm); 611 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 612 ttyLocker ttyl; 613 BytecodeCounter::print(); 614 } 615 // To see where a verify_oop failed, get $ebx+40/X for this frame. 616 // This is the value of eip which points to where verify_oop will return. 617 if (os::message_box(msg, "Execution stopped, print registers?")) { 618 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); 619 BREAKPOINT; 620 } 621 } else { 622 ttyLocker ttyl; 623 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 624 } 625 // Don't assert holding the ttyLock 626 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 627 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 628 } 629 630 void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { 631 ttyLocker ttyl; 632 FlagSetting fs(Debugging, true); 633 tty->print_cr("eip = 0x%08x", eip); 634 #ifndef PRODUCT 635 if ((WizardMode || Verbose) && PrintMiscellaneous) { 636 tty->cr(); 637 findpc(eip); 638 tty->cr(); 639 } 640 #endif 641 #define PRINT_REG(rax) \ 642 { tty->print("%s = ", #rax); os::print_location(tty, rax); } 643 PRINT_REG(rax); 644 PRINT_REG(rbx); 645 PRINT_REG(rcx); 646 PRINT_REG(rdx); 647 PRINT_REG(rdi); 648 PRINT_REG(rsi); 649 PRINT_REG(rbp); 650 PRINT_REG(rsp); 651 #undef PRINT_REG 652 // Print some words near top of staack. 653 int* dump_sp = (int*) rsp; 654 for (int col1 = 0; col1 < 8; col1++) { 655 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 656 os::print_location(tty, *dump_sp++); 657 } 658 for (int row = 0; row < 16; row++) { 659 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 660 for (int col = 0; col < 8; col++) { 661 tty->print(" 0x%08x", *dump_sp++); 662 } 663 tty->cr(); 664 } 665 // Print some instructions around pc: 666 Disassembler::decode((address)eip-64, (address)eip); 667 tty->print_cr("--------"); 668 Disassembler::decode((address)eip, (address)eip+32); 669 } 670 671 void MacroAssembler::stop(const char* msg) { 672 ExternalAddress message((address)msg); 673 // push address of message 674 pushptr(message.addr()); 675 { Label L; call(L, relocInfo::none); bind(L); } // push eip 676 pusha(); // push registers 677 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 678 hlt(); 679 } 680 681 void MacroAssembler::warn(const char* msg) { 682 push_CPU_state(); 683 684 ExternalAddress message((address) msg); 685 // push address of message 686 pushptr(message.addr()); 687 688 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 689 addl(rsp, wordSize); // discard argument 690 pop_CPU_state(); 691 } 692 693 void MacroAssembler::print_state() { 694 { Label L; call(L, relocInfo::none); bind(L); } // push eip 695 pusha(); // push registers 696 697 push_CPU_state(); 698 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); 699 pop_CPU_state(); 700 701 popa(); 702 addl(rsp, wordSize); 703 } 704 705 #else // _LP64 706 707 // 64 bit versions 708 709 Address MacroAssembler::as_Address(AddressLiteral adr) { 710 // amd64 always does this as a pc-rel 711 // we can be absolute or disp based on the instruction type 712 // jmp/call are displacements others are absolute 713 assert(!adr.is_lval(), "must be rval"); 714 assert(reachable(adr), "must be"); 715 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 716 717 } 718 719 Address MacroAssembler::as_Address(ArrayAddress adr) { 720 AddressLiteral base = adr.base(); 721 lea(rscratch1, base); 722 Address index = adr.index(); 723 assert(index._disp == 0, "must not have disp"); // maybe it can? 724 Address array(rscratch1, index._index, index._scale, index._disp); 725 return array; 726 } 727 728 int MacroAssembler::biased_locking_enter(Register lock_reg, 729 Register obj_reg, 730 Register swap_reg, 731 Register tmp_reg, 732 bool swap_reg_contains_mark, 733 Label& done, 734 Label* slow_case, 735 BiasedLockingCounters* counters) { 736 assert(UseBiasedLocking, "why call this otherwise?"); 737 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 738 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 739 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 740 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 741 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 742 Address saved_mark_addr(lock_reg, 0); 743 744 if (PrintBiasedLockingStatistics && counters == NULL) 745 counters = BiasedLocking::counters(); 746 747 // Biased locking 748 // See whether the lock is currently biased toward our thread and 749 // whether the epoch is still valid 750 // Note that the runtime guarantees sufficient alignment of JavaThread 751 // pointers to allow age to be placed into low bits 752 // First check to see whether biasing is even enabled for this object 753 Label cas_label; 754 int null_check_offset = -1; 755 if (!swap_reg_contains_mark) { 756 null_check_offset = offset(); 757 movq(swap_reg, mark_addr); 758 } 759 movq(tmp_reg, swap_reg); 760 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 761 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 762 jcc(Assembler::notEqual, cas_label); 763 // The bias pattern is present in the object's header. Need to check 764 // whether the bias owner and the epoch are both still current. 765 load_prototype_header(tmp_reg, obj_reg); 766 orq(tmp_reg, r15_thread); 767 xorq(tmp_reg, swap_reg); 768 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 769 if (counters != NULL) { 770 cond_inc32(Assembler::zero, 771 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 772 } 773 jcc(Assembler::equal, done); 774 775 Label try_revoke_bias; 776 Label try_rebias; 777 778 // At this point we know that the header has the bias pattern and 779 // that we are not the bias owner in the current epoch. We need to 780 // figure out more details about the state of the header in order to 781 // know what operations can be legally performed on the object's 782 // header. 783 784 // If the low three bits in the xor result aren't clear, that means 785 // the prototype header is no longer biased and we have to revoke 786 // the bias on this object. 787 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 788 jcc(Assembler::notZero, try_revoke_bias); 789 790 // Biasing is still enabled for this data type. See whether the 791 // epoch of the current bias is still valid, meaning that the epoch 792 // bits of the mark word are equal to the epoch bits of the 793 // prototype header. (Note that the prototype header's epoch bits 794 // only change at a safepoint.) If not, attempt to rebias the object 795 // toward the current thread. Note that we must be absolutely sure 796 // that the current epoch is invalid in order to do this because 797 // otherwise the manipulations it performs on the mark word are 798 // illegal. 799 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 800 jcc(Assembler::notZero, try_rebias); 801 802 // The epoch of the current bias is still valid but we know nothing 803 // about the owner; it might be set or it might be clear. Try to 804 // acquire the bias of the object using an atomic operation. If this 805 // fails we will go in to the runtime to revoke the object's bias. 806 // Note that we first construct the presumed unbiased header so we 807 // don't accidentally blow away another thread's valid bias. 808 andq(swap_reg, 809 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 810 movq(tmp_reg, swap_reg); 811 orq(tmp_reg, r15_thread); 812 if (os::is_MP()) { 813 lock(); 814 } 815 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 816 // If the biasing toward our thread failed, this means that 817 // another thread succeeded in biasing it toward itself and we 818 // need to revoke that bias. The revocation will occur in the 819 // interpreter runtime in the slow case. 820 if (counters != NULL) { 821 cond_inc32(Assembler::zero, 822 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 823 } 824 if (slow_case != NULL) { 825 jcc(Assembler::notZero, *slow_case); 826 } 827 jmp(done); 828 829 bind(try_rebias); 830 // At this point we know the epoch has expired, meaning that the 831 // current "bias owner", if any, is actually invalid. Under these 832 // circumstances _only_, we are allowed to use the current header's 833 // value as the comparison value when doing the cas to acquire the 834 // bias in the current epoch. In other words, we allow transfer of 835 // the bias from one thread to another directly in this situation. 836 // 837 // FIXME: due to a lack of registers we currently blow away the age 838 // bits in this situation. Should attempt to preserve them. 839 load_prototype_header(tmp_reg, obj_reg); 840 orq(tmp_reg, r15_thread); 841 if (os::is_MP()) { 842 lock(); 843 } 844 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 845 // If the biasing toward our thread failed, then another thread 846 // succeeded in biasing it toward itself and we need to revoke that 847 // bias. The revocation will occur in the runtime in the slow case. 848 if (counters != NULL) { 849 cond_inc32(Assembler::zero, 850 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 851 } 852 if (slow_case != NULL) { 853 jcc(Assembler::notZero, *slow_case); 854 } 855 jmp(done); 856 857 bind(try_revoke_bias); 858 // The prototype mark in the klass doesn't have the bias bit set any 859 // more, indicating that objects of this data type are not supposed 860 // to be biased any more. We are going to try to reset the mark of 861 // this object to the prototype value and fall through to the 862 // CAS-based locking scheme. Note that if our CAS fails, it means 863 // that another thread raced us for the privilege of revoking the 864 // bias of this particular object, so it's okay to continue in the 865 // normal locking code. 866 // 867 // FIXME: due to a lack of registers we currently blow away the age 868 // bits in this situation. Should attempt to preserve them. 869 load_prototype_header(tmp_reg, obj_reg); 870 if (os::is_MP()) { 871 lock(); 872 } 873 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 874 // Fall through to the normal CAS-based lock, because no matter what 875 // the result of the above CAS, some thread must have succeeded in 876 // removing the bias bit from the object's header. 877 if (counters != NULL) { 878 cond_inc32(Assembler::zero, 879 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 880 } 881 882 bind(cas_label); 883 884 return null_check_offset; 885 } 886 887 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 888 Label L, E; 889 890 #ifdef _WIN64 891 // Windows always allocates space for it's register args 892 assert(num_args <= 4, "only register arguments supported"); 893 subq(rsp, frame::arg_reg_save_area_bytes); 894 #endif 895 896 // Align stack if necessary 897 testl(rsp, 15); 898 jcc(Assembler::zero, L); 899 900 subq(rsp, 8); 901 { 902 call(RuntimeAddress(entry_point)); 903 } 904 addq(rsp, 8); 905 jmp(E); 906 907 bind(L); 908 { 909 call(RuntimeAddress(entry_point)); 910 } 911 912 bind(E); 913 914 #ifdef _WIN64 915 // restore stack pointer 916 addq(rsp, frame::arg_reg_save_area_bytes); 917 #endif 918 919 } 920 921 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 922 assert(!src2.is_lval(), "should use cmpptr"); 923 924 if (reachable(src2)) { 925 cmpq(src1, as_Address(src2)); 926 } else { 927 lea(rscratch1, src2); 928 Assembler::cmpq(src1, Address(rscratch1, 0)); 929 } 930 } 931 932 int MacroAssembler::corrected_idivq(Register reg) { 933 // Full implementation of Java ldiv and lrem; checks for special 934 // case as described in JVM spec., p.243 & p.271. The function 935 // returns the (pc) offset of the idivl instruction - may be needed 936 // for implicit exceptions. 937 // 938 // normal case special case 939 // 940 // input : rax: dividend min_long 941 // reg: divisor (may not be eax/edx) -1 942 // 943 // output: rax: quotient (= rax idiv reg) min_long 944 // rdx: remainder (= rax irem reg) 0 945 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 946 static const int64_t min_long = 0x8000000000000000; 947 Label normal_case, special_case; 948 949 // check for special case 950 cmp64(rax, ExternalAddress((address) &min_long)); 951 jcc(Assembler::notEqual, normal_case); 952 xorl(rdx, rdx); // prepare rdx for possible special case (where 953 // remainder = 0) 954 cmpq(reg, -1); 955 jcc(Assembler::equal, special_case); 956 957 // handle normal case 958 bind(normal_case); 959 cdqq(); 960 int idivq_offset = offset(); 961 idivq(reg); 962 963 // normal and special case exit 964 bind(special_case); 965 966 return idivq_offset; 967 } 968 969 void MacroAssembler::decrementq(Register reg, int value) { 970 if (value == min_jint) { subq(reg, value); return; } 971 if (value < 0) { incrementq(reg, -value); return; } 972 if (value == 0) { ; return; } 973 if (value == 1 && UseIncDec) { decq(reg) ; return; } 974 /* else */ { subq(reg, value) ; return; } 975 } 976 977 void MacroAssembler::decrementq(Address dst, int value) { 978 if (value == min_jint) { subq(dst, value); return; } 979 if (value < 0) { incrementq(dst, -value); return; } 980 if (value == 0) { ; return; } 981 if (value == 1 && UseIncDec) { decq(dst) ; return; } 982 /* else */ { subq(dst, value) ; return; } 983 } 984 985 void MacroAssembler::incrementq(Register reg, int value) { 986 if (value == min_jint) { addq(reg, value); return; } 987 if (value < 0) { decrementq(reg, -value); return; } 988 if (value == 0) { ; return; } 989 if (value == 1 && UseIncDec) { incq(reg) ; return; } 990 /* else */ { addq(reg, value) ; return; } 991 } 992 993 void MacroAssembler::incrementq(Address dst, int value) { 994 if (value == min_jint) { addq(dst, value); return; } 995 if (value < 0) { decrementq(dst, -value); return; } 996 if (value == 0) { ; return; } 997 if (value == 1 && UseIncDec) { incq(dst) ; return; } 998 /* else */ { addq(dst, value) ; return; } 999 } 1000 1001 // 32bit can do a case table jump in one instruction but we no longer allow the base 1002 // to be installed in the Address class 1003 void MacroAssembler::jump(ArrayAddress entry) { 1004 lea(rscratch1, entry.base()); 1005 Address dispatch = entry.index(); 1006 assert(dispatch._base == noreg, "must be"); 1007 dispatch._base = rscratch1; 1008 jmp(dispatch); 1009 } 1010 1011 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 1012 ShouldNotReachHere(); // 64bit doesn't use two regs 1013 cmpq(x_lo, y_lo); 1014 } 1015 1016 void MacroAssembler::lea(Register dst, AddressLiteral src) { 1017 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 1018 } 1019 1020 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 1021 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 1022 movptr(dst, rscratch1); 1023 } 1024 1025 void MacroAssembler::leave() { 1026 // %%% is this really better? Why not on 32bit too? 1027 emit_int8((unsigned char)0xC9); // LEAVE 1028 } 1029 1030 void MacroAssembler::lneg(Register hi, Register lo) { 1031 ShouldNotReachHere(); // 64bit doesn't use two regs 1032 negq(lo); 1033 } 1034 1035 void MacroAssembler::movoop(Register dst, jobject obj) { 1036 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 1037 } 1038 1039 void MacroAssembler::movoop(Address dst, jobject obj) { 1040 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 1041 movq(dst, rscratch1); 1042 } 1043 1044 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 1045 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 1046 } 1047 1048 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 1049 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 1050 movq(dst, rscratch1); 1051 } 1052 1053 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 1054 if (src.is_lval()) { 1055 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 1056 } else { 1057 if (reachable(src)) { 1058 movq(dst, as_Address(src)); 1059 } else { 1060 lea(rscratch1, src); 1061 movq(dst, Address(rscratch1,0)); 1062 } 1063 } 1064 } 1065 1066 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 1067 movq(as_Address(dst), src); 1068 } 1069 1070 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 1071 movq(dst, as_Address(src)); 1072 } 1073 1074 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 1075 void MacroAssembler::movptr(Address dst, intptr_t src) { 1076 mov64(rscratch1, src); 1077 movq(dst, rscratch1); 1078 } 1079 1080 // These are mostly for initializing NULL 1081 void MacroAssembler::movptr(Address dst, int32_t src) { 1082 movslq(dst, src); 1083 } 1084 1085 void MacroAssembler::movptr(Register dst, int32_t src) { 1086 mov64(dst, (intptr_t)src); 1087 } 1088 1089 void MacroAssembler::pushoop(jobject obj) { 1090 movoop(rscratch1, obj); 1091 push(rscratch1); 1092 } 1093 1094 void MacroAssembler::pushklass(Metadata* obj) { 1095 mov_metadata(rscratch1, obj); 1096 push(rscratch1); 1097 } 1098 1099 void MacroAssembler::pushptr(AddressLiteral src) { 1100 lea(rscratch1, src); 1101 if (src.is_lval()) { 1102 push(rscratch1); 1103 } else { 1104 pushq(Address(rscratch1, 0)); 1105 } 1106 } 1107 1108 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 1109 bool clear_pc) { 1110 // we must set sp to zero to clear frame 1111 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 1112 // must clear fp, so that compiled frames are not confused; it is 1113 // possible that we need it only for debugging 1114 if (clear_fp) { 1115 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 1116 } 1117 1118 if (clear_pc) { 1119 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 1120 } 1121 } 1122 1123 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 1124 Register last_java_fp, 1125 address last_java_pc) { 1126 // determine last_java_sp register 1127 if (!last_java_sp->is_valid()) { 1128 last_java_sp = rsp; 1129 } 1130 1131 // last_java_fp is optional 1132 if (last_java_fp->is_valid()) { 1133 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 1134 last_java_fp); 1135 } 1136 1137 // last_java_pc is optional 1138 if (last_java_pc != NULL) { 1139 Address java_pc(r15_thread, 1140 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 1141 lea(rscratch1, InternalAddress(last_java_pc)); 1142 movptr(java_pc, rscratch1); 1143 } 1144 1145 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 1146 } 1147 1148 static void pass_arg0(MacroAssembler* masm, Register arg) { 1149 if (c_rarg0 != arg ) { 1150 masm->mov(c_rarg0, arg); 1151 } 1152 } 1153 1154 static void pass_arg1(MacroAssembler* masm, Register arg) { 1155 if (c_rarg1 != arg ) { 1156 masm->mov(c_rarg1, arg); 1157 } 1158 } 1159 1160 static void pass_arg2(MacroAssembler* masm, Register arg) { 1161 if (c_rarg2 != arg ) { 1162 masm->mov(c_rarg2, arg); 1163 } 1164 } 1165 1166 static void pass_arg3(MacroAssembler* masm, Register arg) { 1167 if (c_rarg3 != arg ) { 1168 masm->mov(c_rarg3, arg); 1169 } 1170 } 1171 1172 void MacroAssembler::stop(const char* msg) { 1173 address rip = pc(); 1174 pusha(); // get regs on stack 1175 lea(c_rarg0, ExternalAddress((address) msg)); 1176 lea(c_rarg1, InternalAddress(rip)); 1177 movq(c_rarg2, rsp); // pass pointer to regs array 1178 andq(rsp, -16); // align stack as required by ABI 1179 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 1180 hlt(); 1181 } 1182 1183 void MacroAssembler::warn(const char* msg) { 1184 push(rbp); 1185 movq(rbp, rsp); 1186 andq(rsp, -16); // align stack as required by push_CPU_state and call 1187 push_CPU_state(); // keeps alignment at 16 bytes 1188 lea(c_rarg0, ExternalAddress((address) msg)); 1189 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 1190 pop_CPU_state(); 1191 mov(rsp, rbp); 1192 pop(rbp); 1193 } 1194 1195 void MacroAssembler::print_state() { 1196 address rip = pc(); 1197 pusha(); // get regs on stack 1198 push(rbp); 1199 movq(rbp, rsp); 1200 andq(rsp, -16); // align stack as required by push_CPU_state and call 1201 push_CPU_state(); // keeps alignment at 16 bytes 1202 1203 lea(c_rarg0, InternalAddress(rip)); 1204 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array 1205 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); 1206 1207 pop_CPU_state(); 1208 mov(rsp, rbp); 1209 pop(rbp); 1210 popa(); 1211 } 1212 1213 #ifndef PRODUCT 1214 extern "C" void findpc(intptr_t x); 1215 #endif 1216 1217 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 1218 // In order to get locks to work, we need to fake a in_VM state 1219 if (ShowMessageBoxOnError) { 1220 JavaThread* thread = JavaThread::current(); 1221 JavaThreadState saved_state = thread->thread_state(); 1222 thread->set_thread_state(_thread_in_vm); 1223 #ifndef PRODUCT 1224 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1225 ttyLocker ttyl; 1226 BytecodeCounter::print(); 1227 } 1228 #endif 1229 // To see where a verify_oop failed, get $ebx+40/X for this frame. 1230 // XXX correct this offset for amd64 1231 // This is the value of eip which points to where verify_oop will return. 1232 if (os::message_box(msg, "Execution stopped, print registers?")) { 1233 print_state64(pc, regs); 1234 BREAKPOINT; 1235 assert(false, "start up GDB"); 1236 } 1237 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 1238 } else { 1239 ttyLocker ttyl; 1240 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 1241 msg); 1242 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 1243 } 1244 } 1245 1246 void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { 1247 ttyLocker ttyl; 1248 FlagSetting fs(Debugging, true); 1249 tty->print_cr("rip = 0x%016lx", pc); 1250 #ifndef PRODUCT 1251 tty->cr(); 1252 findpc(pc); 1253 tty->cr(); 1254 #endif 1255 #define PRINT_REG(rax, value) \ 1256 { tty->print("%s = ", #rax); os::print_location(tty, value); } 1257 PRINT_REG(rax, regs[15]); 1258 PRINT_REG(rbx, regs[12]); 1259 PRINT_REG(rcx, regs[14]); 1260 PRINT_REG(rdx, regs[13]); 1261 PRINT_REG(rdi, regs[8]); 1262 PRINT_REG(rsi, regs[9]); 1263 PRINT_REG(rbp, regs[10]); 1264 PRINT_REG(rsp, regs[11]); 1265 PRINT_REG(r8 , regs[7]); 1266 PRINT_REG(r9 , regs[6]); 1267 PRINT_REG(r10, regs[5]); 1268 PRINT_REG(r11, regs[4]); 1269 PRINT_REG(r12, regs[3]); 1270 PRINT_REG(r13, regs[2]); 1271 PRINT_REG(r14, regs[1]); 1272 PRINT_REG(r15, regs[0]); 1273 #undef PRINT_REG 1274 // Print some words near top of staack. 1275 int64_t* rsp = (int64_t*) regs[11]; 1276 int64_t* dump_sp = rsp; 1277 for (int col1 = 0; col1 < 8; col1++) { 1278 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 1279 os::print_location(tty, *dump_sp++); 1280 } 1281 for (int row = 0; row < 25; row++) { 1282 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 1283 for (int col = 0; col < 4; col++) { 1284 tty->print(" 0x%016lx", *dump_sp++); 1285 } 1286 tty->cr(); 1287 } 1288 // Print some instructions around pc: 1289 Disassembler::decode((address)pc-64, (address)pc); 1290 tty->print_cr("--------"); 1291 Disassembler::decode((address)pc, (address)pc+32); 1292 } 1293 1294 #endif // _LP64 1295 1296 // Now versions that are common to 32/64 bit 1297 1298 void MacroAssembler::addptr(Register dst, int32_t imm32) { 1299 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 1300 } 1301 1302 void MacroAssembler::addptr(Register dst, Register src) { 1303 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 1304 } 1305 1306 void MacroAssembler::addptr(Address dst, Register src) { 1307 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 1308 } 1309 1310 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 1311 if (reachable(src)) { 1312 Assembler::addsd(dst, as_Address(src)); 1313 } else { 1314 lea(rscratch1, src); 1315 Assembler::addsd(dst, Address(rscratch1, 0)); 1316 } 1317 } 1318 1319 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 1320 if (reachable(src)) { 1321 addss(dst, as_Address(src)); 1322 } else { 1323 lea(rscratch1, src); 1324 addss(dst, Address(rscratch1, 0)); 1325 } 1326 } 1327 1328 void MacroAssembler::align(int modulus) { 1329 if (offset() % modulus != 0) { 1330 nop(modulus - (offset() % modulus)); 1331 } 1332 } 1333 1334 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 1335 // Used in sign-masking with aligned address. 1336 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 1337 if (reachable(src)) { 1338 Assembler::andpd(dst, as_Address(src)); 1339 } else { 1340 lea(rscratch1, src); 1341 Assembler::andpd(dst, Address(rscratch1, 0)); 1342 } 1343 } 1344 1345 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 1346 // Used in sign-masking with aligned address. 1347 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 1348 if (reachable(src)) { 1349 Assembler::andps(dst, as_Address(src)); 1350 } else { 1351 lea(rscratch1, src); 1352 Assembler::andps(dst, Address(rscratch1, 0)); 1353 } 1354 } 1355 1356 void MacroAssembler::andptr(Register dst, int32_t imm32) { 1357 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 1358 } 1359 1360 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 1361 pushf(); 1362 if (os::is_MP()) 1363 lock(); 1364 incrementl(counter_addr); 1365 popf(); 1366 } 1367 1368 // Writes to stack successive pages until offset reached to check for 1369 // stack overflow + shadow pages. This clobbers tmp. 1370 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 1371 movptr(tmp, rsp); 1372 // Bang stack for total size given plus shadow page size. 1373 // Bang one page at a time because large size can bang beyond yellow and 1374 // red zones. 1375 Label loop; 1376 bind(loop); 1377 movl(Address(tmp, (-os::vm_page_size())), size ); 1378 subptr(tmp, os::vm_page_size()); 1379 subl(size, os::vm_page_size()); 1380 jcc(Assembler::greater, loop); 1381 1382 // Bang down shadow pages too. 1383 // The -1 because we already subtracted 1 page. 1384 for (int i = 0; i< StackShadowPages-1; i++) { 1385 // this could be any sized move but this is can be a debugging crumb 1386 // so the bigger the better. 1387 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 1388 } 1389 } 1390 1391 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 1392 assert(UseBiasedLocking, "why call this otherwise?"); 1393 1394 // Check for biased locking unlock case, which is a no-op 1395 // Note: we do not have to check the thread ID for two reasons. 1396 // First, the interpreter checks for IllegalMonitorStateException at 1397 // a higher level. Second, if the bias was revoked while we held the 1398 // lock, the object could not be rebiased toward another thread, so 1399 // the bias bit would be clear. 1400 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1401 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 1402 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 1403 jcc(Assembler::equal, done); 1404 } 1405 1406 void MacroAssembler::c2bool(Register x) { 1407 // implements x == 0 ? 0 : 1 1408 // note: must only look at least-significant byte of x 1409 // since C-style booleans are stored in one byte 1410 // only! (was bug) 1411 andl(x, 0xFF); 1412 setb(Assembler::notZero, x); 1413 } 1414 1415 // Wouldn't need if AddressLiteral version had new name 1416 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 1417 Assembler::call(L, rtype); 1418 } 1419 1420 void MacroAssembler::call(Register entry) { 1421 Assembler::call(entry); 1422 } 1423 1424 void MacroAssembler::call(AddressLiteral entry) { 1425 if (reachable(entry)) { 1426 Assembler::call_literal(entry.target(), entry.rspec()); 1427 } else { 1428 lea(rscratch1, entry); 1429 Assembler::call(rscratch1); 1430 } 1431 } 1432 1433 void MacroAssembler::ic_call(address entry) { 1434 RelocationHolder rh = virtual_call_Relocation::spec(pc()); 1435 movptr(rax, (intptr_t)Universe::non_oop_word()); 1436 call(AddressLiteral(entry, rh)); 1437 } 1438 1439 // Implementation of call_VM versions 1440 1441 void MacroAssembler::call_VM(Register oop_result, 1442 address entry_point, 1443 bool check_exceptions) { 1444 Label C, E; 1445 call(C, relocInfo::none); 1446 jmp(E); 1447 1448 bind(C); 1449 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 1450 ret(0); 1451 1452 bind(E); 1453 } 1454 1455 void MacroAssembler::call_VM(Register oop_result, 1456 address entry_point, 1457 Register arg_1, 1458 bool check_exceptions) { 1459 Label C, E; 1460 call(C, relocInfo::none); 1461 jmp(E); 1462 1463 bind(C); 1464 pass_arg1(this, arg_1); 1465 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 1466 ret(0); 1467 1468 bind(E); 1469 } 1470 1471 void MacroAssembler::call_VM(Register oop_result, 1472 address entry_point, 1473 Register arg_1, 1474 Register arg_2, 1475 bool check_exceptions) { 1476 Label C, E; 1477 call(C, relocInfo::none); 1478 jmp(E); 1479 1480 bind(C); 1481 1482 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1483 1484 pass_arg2(this, arg_2); 1485 pass_arg1(this, arg_1); 1486 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 1487 ret(0); 1488 1489 bind(E); 1490 } 1491 1492 void MacroAssembler::call_VM(Register oop_result, 1493 address entry_point, 1494 Register arg_1, 1495 Register arg_2, 1496 Register arg_3, 1497 bool check_exceptions) { 1498 Label C, E; 1499 call(C, relocInfo::none); 1500 jmp(E); 1501 1502 bind(C); 1503 1504 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1505 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1506 pass_arg3(this, arg_3); 1507 1508 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1509 pass_arg2(this, arg_2); 1510 1511 pass_arg1(this, arg_1); 1512 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 1513 ret(0); 1514 1515 bind(E); 1516 } 1517 1518 void MacroAssembler::call_VM(Register oop_result, 1519 Register last_java_sp, 1520 address entry_point, 1521 int number_of_arguments, 1522 bool check_exceptions) { 1523 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 1524 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1525 } 1526 1527 void MacroAssembler::call_VM(Register oop_result, 1528 Register last_java_sp, 1529 address entry_point, 1530 Register arg_1, 1531 bool check_exceptions) { 1532 pass_arg1(this, arg_1); 1533 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1534 } 1535 1536 void MacroAssembler::call_VM(Register oop_result, 1537 Register last_java_sp, 1538 address entry_point, 1539 Register arg_1, 1540 Register arg_2, 1541 bool check_exceptions) { 1542 1543 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1544 pass_arg2(this, arg_2); 1545 pass_arg1(this, arg_1); 1546 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1547 } 1548 1549 void MacroAssembler::call_VM(Register oop_result, 1550 Register last_java_sp, 1551 address entry_point, 1552 Register arg_1, 1553 Register arg_2, 1554 Register arg_3, 1555 bool check_exceptions) { 1556 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1557 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1558 pass_arg3(this, arg_3); 1559 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1560 pass_arg2(this, arg_2); 1561 pass_arg1(this, arg_1); 1562 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1563 } 1564 1565 void MacroAssembler::super_call_VM(Register oop_result, 1566 Register last_java_sp, 1567 address entry_point, 1568 int number_of_arguments, 1569 bool check_exceptions) { 1570 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 1571 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1572 } 1573 1574 void MacroAssembler::super_call_VM(Register oop_result, 1575 Register last_java_sp, 1576 address entry_point, 1577 Register arg_1, 1578 bool check_exceptions) { 1579 pass_arg1(this, arg_1); 1580 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1581 } 1582 1583 void MacroAssembler::super_call_VM(Register oop_result, 1584 Register last_java_sp, 1585 address entry_point, 1586 Register arg_1, 1587 Register arg_2, 1588 bool check_exceptions) { 1589 1590 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1591 pass_arg2(this, arg_2); 1592 pass_arg1(this, arg_1); 1593 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1594 } 1595 1596 void MacroAssembler::super_call_VM(Register oop_result, 1597 Register last_java_sp, 1598 address entry_point, 1599 Register arg_1, 1600 Register arg_2, 1601 Register arg_3, 1602 bool check_exceptions) { 1603 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1604 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1605 pass_arg3(this, arg_3); 1606 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1607 pass_arg2(this, arg_2); 1608 pass_arg1(this, arg_1); 1609 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1610 } 1611 1612 void MacroAssembler::call_VM_base(Register oop_result, 1613 Register java_thread, 1614 Register last_java_sp, 1615 address entry_point, 1616 int number_of_arguments, 1617 bool check_exceptions) { 1618 // determine java_thread register 1619 if (!java_thread->is_valid()) { 1620 #ifdef _LP64 1621 java_thread = r15_thread; 1622 #else 1623 java_thread = rdi; 1624 get_thread(java_thread); 1625 #endif // LP64 1626 } 1627 // determine last_java_sp register 1628 if (!last_java_sp->is_valid()) { 1629 last_java_sp = rsp; 1630 } 1631 // debugging support 1632 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 1633 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 1634 #ifdef ASSERT 1635 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 1636 // r12 is the heapbase. 1637 LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) 1638 #endif // ASSERT 1639 1640 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 1641 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 1642 1643 // push java thread (becomes first argument of C function) 1644 1645 NOT_LP64(push(java_thread); number_of_arguments++); 1646 LP64_ONLY(mov(c_rarg0, r15_thread)); 1647 1648 // set last Java frame before call 1649 assert(last_java_sp != rbp, "can't use ebp/rbp"); 1650 1651 // Only interpreter should have to set fp 1652 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 1653 1654 // do the call, remove parameters 1655 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 1656 1657 // restore the thread (cannot use the pushed argument since arguments 1658 // may be overwritten by C code generated by an optimizing compiler); 1659 // however can use the register value directly if it is callee saved. 1660 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 1661 // rdi & rsi (also r15) are callee saved -> nothing to do 1662 #ifdef ASSERT 1663 guarantee(java_thread != rax, "change this code"); 1664 push(rax); 1665 { Label L; 1666 get_thread(rax); 1667 cmpptr(java_thread, rax); 1668 jcc(Assembler::equal, L); 1669 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); 1670 bind(L); 1671 } 1672 pop(rax); 1673 #endif 1674 } else { 1675 get_thread(java_thread); 1676 } 1677 // reset last Java frame 1678 // Only interpreter should have to clear fp 1679 reset_last_Java_frame(java_thread, true, false); 1680 1681 #ifndef CC_INTERP 1682 // C++ interp handles this in the interpreter 1683 check_and_handle_popframe(java_thread); 1684 check_and_handle_earlyret(java_thread); 1685 #endif /* CC_INTERP */ 1686 1687 if (check_exceptions) { 1688 // check for pending exceptions (java_thread is set upon return) 1689 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 1690 #ifndef _LP64 1691 jump_cc(Assembler::notEqual, 1692 RuntimeAddress(StubRoutines::forward_exception_entry())); 1693 #else 1694 // This used to conditionally jump to forward_exception however it is 1695 // possible if we relocate that the branch will not reach. So we must jump 1696 // around so we can always reach 1697 1698 Label ok; 1699 jcc(Assembler::equal, ok); 1700 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1701 bind(ok); 1702 #endif // LP64 1703 } 1704 1705 // get oop result if there is one and reset the value in the thread 1706 if (oop_result->is_valid()) { 1707 get_vm_result(oop_result, java_thread); 1708 } 1709 } 1710 1711 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 1712 1713 // Calculate the value for last_Java_sp 1714 // somewhat subtle. call_VM does an intermediate call 1715 // which places a return address on the stack just under the 1716 // stack pointer as the user finsihed with it. This allows 1717 // use to retrieve last_Java_pc from last_Java_sp[-1]. 1718 // On 32bit we then have to push additional args on the stack to accomplish 1719 // the actual requested call. On 64bit call_VM only can use register args 1720 // so the only extra space is the return address that call_VM created. 1721 // This hopefully explains the calculations here. 1722 1723 #ifdef _LP64 1724 // We've pushed one address, correct last_Java_sp 1725 lea(rax, Address(rsp, wordSize)); 1726 #else 1727 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 1728 #endif // LP64 1729 1730 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 1731 1732 } 1733 1734 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 1735 call_VM_leaf_base(entry_point, number_of_arguments); 1736 } 1737 1738 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 1739 pass_arg0(this, arg_0); 1740 call_VM_leaf(entry_point, 1); 1741 } 1742 1743 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1744 1745 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1746 pass_arg1(this, arg_1); 1747 pass_arg0(this, arg_0); 1748 call_VM_leaf(entry_point, 2); 1749 } 1750 1751 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1752 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1753 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1754 pass_arg2(this, arg_2); 1755 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1756 pass_arg1(this, arg_1); 1757 pass_arg0(this, arg_0); 1758 call_VM_leaf(entry_point, 3); 1759 } 1760 1761 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 1762 pass_arg0(this, arg_0); 1763 MacroAssembler::call_VM_leaf_base(entry_point, 1); 1764 } 1765 1766 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1767 1768 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1769 pass_arg1(this, arg_1); 1770 pass_arg0(this, arg_0); 1771 MacroAssembler::call_VM_leaf_base(entry_point, 2); 1772 } 1773 1774 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1775 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1776 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1777 pass_arg2(this, arg_2); 1778 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1779 pass_arg1(this, arg_1); 1780 pass_arg0(this, arg_0); 1781 MacroAssembler::call_VM_leaf_base(entry_point, 3); 1782 } 1783 1784 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 1785 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 1786 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1787 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1788 pass_arg3(this, arg_3); 1789 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1790 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1791 pass_arg2(this, arg_2); 1792 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1793 pass_arg1(this, arg_1); 1794 pass_arg0(this, arg_0); 1795 MacroAssembler::call_VM_leaf_base(entry_point, 4); 1796 } 1797 1798 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 1799 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 1800 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 1801 verify_oop(oop_result, "broken oop in call_VM_base"); 1802 } 1803 1804 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 1805 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 1806 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); 1807 } 1808 1809 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 1810 } 1811 1812 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 1813 } 1814 1815 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 1816 if (reachable(src1)) { 1817 cmpl(as_Address(src1), imm); 1818 } else { 1819 lea(rscratch1, src1); 1820 cmpl(Address(rscratch1, 0), imm); 1821 } 1822 } 1823 1824 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 1825 assert(!src2.is_lval(), "use cmpptr"); 1826 if (reachable(src2)) { 1827 cmpl(src1, as_Address(src2)); 1828 } else { 1829 lea(rscratch1, src2); 1830 cmpl(src1, Address(rscratch1, 0)); 1831 } 1832 } 1833 1834 void MacroAssembler::cmp32(Register src1, int32_t imm) { 1835 Assembler::cmpl(src1, imm); 1836 } 1837 1838 void MacroAssembler::cmp32(Register src1, Address src2) { 1839 Assembler::cmpl(src1, src2); 1840 } 1841 1842 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1843 ucomisd(opr1, opr2); 1844 1845 Label L; 1846 if (unordered_is_less) { 1847 movl(dst, -1); 1848 jcc(Assembler::parity, L); 1849 jcc(Assembler::below , L); 1850 movl(dst, 0); 1851 jcc(Assembler::equal , L); 1852 increment(dst); 1853 } else { // unordered is greater 1854 movl(dst, 1); 1855 jcc(Assembler::parity, L); 1856 jcc(Assembler::above , L); 1857 movl(dst, 0); 1858 jcc(Assembler::equal , L); 1859 decrementl(dst); 1860 } 1861 bind(L); 1862 } 1863 1864 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1865 ucomiss(opr1, opr2); 1866 1867 Label L; 1868 if (unordered_is_less) { 1869 movl(dst, -1); 1870 jcc(Assembler::parity, L); 1871 jcc(Assembler::below , L); 1872 movl(dst, 0); 1873 jcc(Assembler::equal , L); 1874 increment(dst); 1875 } else { // unordered is greater 1876 movl(dst, 1); 1877 jcc(Assembler::parity, L); 1878 jcc(Assembler::above , L); 1879 movl(dst, 0); 1880 jcc(Assembler::equal , L); 1881 decrementl(dst); 1882 } 1883 bind(L); 1884 } 1885 1886 1887 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 1888 if (reachable(src1)) { 1889 cmpb(as_Address(src1), imm); 1890 } else { 1891 lea(rscratch1, src1); 1892 cmpb(Address(rscratch1, 0), imm); 1893 } 1894 } 1895 1896 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 1897 #ifdef _LP64 1898 if (src2.is_lval()) { 1899 movptr(rscratch1, src2); 1900 Assembler::cmpq(src1, rscratch1); 1901 } else if (reachable(src2)) { 1902 cmpq(src1, as_Address(src2)); 1903 } else { 1904 lea(rscratch1, src2); 1905 Assembler::cmpq(src1, Address(rscratch1, 0)); 1906 } 1907 #else 1908 if (src2.is_lval()) { 1909 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1910 } else { 1911 cmpl(src1, as_Address(src2)); 1912 } 1913 #endif // _LP64 1914 } 1915 1916 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 1917 assert(src2.is_lval(), "not a mem-mem compare"); 1918 #ifdef _LP64 1919 // moves src2's literal address 1920 movptr(rscratch1, src2); 1921 Assembler::cmpq(src1, rscratch1); 1922 #else 1923 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1924 #endif // _LP64 1925 } 1926 1927 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 1928 if (reachable(adr)) { 1929 if (os::is_MP()) 1930 lock(); 1931 cmpxchgptr(reg, as_Address(adr)); 1932 } else { 1933 lea(rscratch1, adr); 1934 if (os::is_MP()) 1935 lock(); 1936 cmpxchgptr(reg, Address(rscratch1, 0)); 1937 } 1938 } 1939 1940 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 1941 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 1942 } 1943 1944 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 1945 if (reachable(src)) { 1946 Assembler::comisd(dst, as_Address(src)); 1947 } else { 1948 lea(rscratch1, src); 1949 Assembler::comisd(dst, Address(rscratch1, 0)); 1950 } 1951 } 1952 1953 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 1954 if (reachable(src)) { 1955 Assembler::comiss(dst, as_Address(src)); 1956 } else { 1957 lea(rscratch1, src); 1958 Assembler::comiss(dst, Address(rscratch1, 0)); 1959 } 1960 } 1961 1962 1963 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 1964 Condition negated_cond = negate_condition(cond); 1965 Label L; 1966 jcc(negated_cond, L); 1967 atomic_incl(counter_addr); 1968 bind(L); 1969 } 1970 1971 int MacroAssembler::corrected_idivl(Register reg) { 1972 // Full implementation of Java idiv and irem; checks for 1973 // special case as described in JVM spec., p.243 & p.271. 1974 // The function returns the (pc) offset of the idivl 1975 // instruction - may be needed for implicit exceptions. 1976 // 1977 // normal case special case 1978 // 1979 // input : rax,: dividend min_int 1980 // reg: divisor (may not be rax,/rdx) -1 1981 // 1982 // output: rax,: quotient (= rax, idiv reg) min_int 1983 // rdx: remainder (= rax, irem reg) 0 1984 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 1985 const int min_int = 0x80000000; 1986 Label normal_case, special_case; 1987 1988 // check for special case 1989 cmpl(rax, min_int); 1990 jcc(Assembler::notEqual, normal_case); 1991 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 1992 cmpl(reg, -1); 1993 jcc(Assembler::equal, special_case); 1994 1995 // handle normal case 1996 bind(normal_case); 1997 cdql(); 1998 int idivl_offset = offset(); 1999 idivl(reg); 2000 2001 // normal and special case exit 2002 bind(special_case); 2003 2004 return idivl_offset; 2005 } 2006 2007 2008 2009 void MacroAssembler::decrementl(Register reg, int value) { 2010 if (value == min_jint) {subl(reg, value) ; return; } 2011 if (value < 0) { incrementl(reg, -value); return; } 2012 if (value == 0) { ; return; } 2013 if (value == 1 && UseIncDec) { decl(reg) ; return; } 2014 /* else */ { subl(reg, value) ; return; } 2015 } 2016 2017 void MacroAssembler::decrementl(Address dst, int value) { 2018 if (value == min_jint) {subl(dst, value) ; return; } 2019 if (value < 0) { incrementl(dst, -value); return; } 2020 if (value == 0) { ; return; } 2021 if (value == 1 && UseIncDec) { decl(dst) ; return; } 2022 /* else */ { subl(dst, value) ; return; } 2023 } 2024 2025 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 2026 assert (shift_value > 0, "illegal shift value"); 2027 Label _is_positive; 2028 testl (reg, reg); 2029 jcc (Assembler::positive, _is_positive); 2030 int offset = (1 << shift_value) - 1 ; 2031 2032 if (offset == 1) { 2033 incrementl(reg); 2034 } else { 2035 addl(reg, offset); 2036 } 2037 2038 bind (_is_positive); 2039 sarl(reg, shift_value); 2040 } 2041 2042 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 2043 if (reachable(src)) { 2044 Assembler::divsd(dst, as_Address(src)); 2045 } else { 2046 lea(rscratch1, src); 2047 Assembler::divsd(dst, Address(rscratch1, 0)); 2048 } 2049 } 2050 2051 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 2052 if (reachable(src)) { 2053 Assembler::divss(dst, as_Address(src)); 2054 } else { 2055 lea(rscratch1, src); 2056 Assembler::divss(dst, Address(rscratch1, 0)); 2057 } 2058 } 2059 2060 // !defined(COMPILER2) is because of stupid core builds 2061 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 2062 void MacroAssembler::empty_FPU_stack() { 2063 if (VM_Version::supports_mmx()) { 2064 emms(); 2065 } else { 2066 for (int i = 8; i-- > 0; ) ffree(i); 2067 } 2068 } 2069 #endif // !LP64 || C1 || !C2 2070 2071 2072 // Defines obj, preserves var_size_in_bytes 2073 void MacroAssembler::eden_allocate(Register obj, 2074 Register var_size_in_bytes, 2075 int con_size_in_bytes, 2076 Register t1, 2077 Label& slow_case) { 2078 assert(obj == rax, "obj must be in rax, for cmpxchg"); 2079 assert_different_registers(obj, var_size_in_bytes, t1); 2080 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 2081 jmp(slow_case); 2082 } else { 2083 Register end = t1; 2084 Label retry; 2085 bind(retry); 2086 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 2087 movptr(obj, heap_top); 2088 if (var_size_in_bytes == noreg) { 2089 lea(end, Address(obj, con_size_in_bytes)); 2090 } else { 2091 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 2092 } 2093 // if end < obj then we wrapped around => object too long => slow case 2094 cmpptr(end, obj); 2095 jcc(Assembler::below, slow_case); 2096 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 2097 jcc(Assembler::above, slow_case); 2098 // Compare obj with the top addr, and if still equal, store the new top addr in 2099 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 2100 // it otherwise. Use lock prefix for atomicity on MPs. 2101 locked_cmpxchgptr(end, heap_top); 2102 jcc(Assembler::notEqual, retry); 2103 } 2104 } 2105 2106 void MacroAssembler::enter() { 2107 push(rbp); 2108 mov(rbp, rsp); 2109 } 2110 2111 // A 5 byte nop that is safe for patching (see patch_verified_entry) 2112 void MacroAssembler::fat_nop() { 2113 if (UseAddressNop) { 2114 addr_nop_5(); 2115 } else { 2116 emit_int8(0x26); // es: 2117 emit_int8(0x2e); // cs: 2118 emit_int8(0x64); // fs: 2119 emit_int8(0x65); // gs: 2120 emit_int8((unsigned char)0x90); 2121 } 2122 } 2123 2124 void MacroAssembler::fcmp(Register tmp) { 2125 fcmp(tmp, 1, true, true); 2126 } 2127 2128 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 2129 assert(!pop_right || pop_left, "usage error"); 2130 if (VM_Version::supports_cmov()) { 2131 assert(tmp == noreg, "unneeded temp"); 2132 if (pop_left) { 2133 fucomip(index); 2134 } else { 2135 fucomi(index); 2136 } 2137 if (pop_right) { 2138 fpop(); 2139 } 2140 } else { 2141 assert(tmp != noreg, "need temp"); 2142 if (pop_left) { 2143 if (pop_right) { 2144 fcompp(); 2145 } else { 2146 fcomp(index); 2147 } 2148 } else { 2149 fcom(index); 2150 } 2151 // convert FPU condition into eflags condition via rax, 2152 save_rax(tmp); 2153 fwait(); fnstsw_ax(); 2154 sahf(); 2155 restore_rax(tmp); 2156 } 2157 // condition codes set as follows: 2158 // 2159 // CF (corresponds to C0) if x < y 2160 // PF (corresponds to C2) if unordered 2161 // ZF (corresponds to C3) if x = y 2162 } 2163 2164 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 2165 fcmp2int(dst, unordered_is_less, 1, true, true); 2166 } 2167 2168 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 2169 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 2170 Label L; 2171 if (unordered_is_less) { 2172 movl(dst, -1); 2173 jcc(Assembler::parity, L); 2174 jcc(Assembler::below , L); 2175 movl(dst, 0); 2176 jcc(Assembler::equal , L); 2177 increment(dst); 2178 } else { // unordered is greater 2179 movl(dst, 1); 2180 jcc(Assembler::parity, L); 2181 jcc(Assembler::above , L); 2182 movl(dst, 0); 2183 jcc(Assembler::equal , L); 2184 decrementl(dst); 2185 } 2186 bind(L); 2187 } 2188 2189 void MacroAssembler::fld_d(AddressLiteral src) { 2190 fld_d(as_Address(src)); 2191 } 2192 2193 void MacroAssembler::fld_s(AddressLiteral src) { 2194 fld_s(as_Address(src)); 2195 } 2196 2197 void MacroAssembler::fld_x(AddressLiteral src) { 2198 Assembler::fld_x(as_Address(src)); 2199 } 2200 2201 void MacroAssembler::fldcw(AddressLiteral src) { 2202 Assembler::fldcw(as_Address(src)); 2203 } 2204 2205 void MacroAssembler::pow_exp_core_encoding() { 2206 // kills rax, rcx, rdx 2207 subptr(rsp,sizeof(jdouble)); 2208 // computes 2^X. Stack: X ... 2209 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 2210 // keep it on the thread's stack to compute 2^int(X) later 2211 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 2212 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 2213 fld_s(0); // Stack: X X ... 2214 frndint(); // Stack: int(X) X ... 2215 fsuba(1); // Stack: int(X) X-int(X) ... 2216 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 2217 f2xm1(); // Stack: 2^(X-int(X))-1 ... 2218 fld1(); // Stack: 1 2^(X-int(X))-1 ... 2219 faddp(1); // Stack: 2^(X-int(X)) 2220 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 2221 // shift int(X)+1023 to exponent position. 2222 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 2223 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 2224 // values so detect them and set result to NaN. 2225 movl(rax,Address(rsp,0)); 2226 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 2227 addl(rax, 1023); 2228 movl(rdx,rax); 2229 shll(rax,20); 2230 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 2231 addl(rdx,1); 2232 // Check that 1 < int(X)+1023+1 < 2048 2233 // in 3 steps: 2234 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 2235 // 2- (int(X)+1023+1)&-2048 != 0 2236 // 3- (int(X)+1023+1)&-2048 != 1 2237 // Do 2- first because addl just updated the flags. 2238 cmov32(Assembler::equal,rax,rcx); 2239 cmpl(rdx,1); 2240 cmov32(Assembler::equal,rax,rcx); 2241 testl(rdx,rcx); 2242 cmov32(Assembler::notEqual,rax,rcx); 2243 movl(Address(rsp,4),rax); 2244 movl(Address(rsp,0),0); 2245 fmul_d(Address(rsp,0)); // Stack: 2^X ... 2246 addptr(rsp,sizeof(jdouble)); 2247 } 2248 2249 void MacroAssembler::increase_precision() { 2250 subptr(rsp, BytesPerWord); 2251 fnstcw(Address(rsp, 0)); 2252 movl(rax, Address(rsp, 0)); 2253 orl(rax, 0x300); 2254 push(rax); 2255 fldcw(Address(rsp, 0)); 2256 pop(rax); 2257 } 2258 2259 void MacroAssembler::restore_precision() { 2260 fldcw(Address(rsp, 0)); 2261 addptr(rsp, BytesPerWord); 2262 } 2263 2264 void MacroAssembler::fast_pow() { 2265 // computes X^Y = 2^(Y * log2(X)) 2266 // if fast computation is not possible, result is NaN. Requires 2267 // fallback from user of this macro. 2268 // increase precision for intermediate steps of the computation 2269 increase_precision(); 2270 fyl2x(); // Stack: (Y*log2(X)) ... 2271 pow_exp_core_encoding(); // Stack: exp(X) ... 2272 restore_precision(); 2273 } 2274 2275 void MacroAssembler::fast_exp() { 2276 // computes exp(X) = 2^(X * log2(e)) 2277 // if fast computation is not possible, result is NaN. Requires 2278 // fallback from user of this macro. 2279 // increase precision for intermediate steps of the computation 2280 increase_precision(); 2281 fldl2e(); // Stack: log2(e) X ... 2282 fmulp(1); // Stack: (X*log2(e)) ... 2283 pow_exp_core_encoding(); // Stack: exp(X) ... 2284 restore_precision(); 2285 } 2286 2287 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 2288 // kills rax, rcx, rdx 2289 // pow and exp needs 2 extra registers on the fpu stack. 2290 Label slow_case, done; 2291 Register tmp = noreg; 2292 if (!VM_Version::supports_cmov()) { 2293 // fcmp needs a temporary so preserve rdx, 2294 tmp = rdx; 2295 } 2296 Register tmp2 = rax; 2297 Register tmp3 = rcx; 2298 2299 if (is_exp) { 2300 // Stack: X 2301 fld_s(0); // duplicate argument for runtime call. Stack: X X 2302 fast_exp(); // Stack: exp(X) X 2303 fcmp(tmp, 0, false, false); // Stack: exp(X) X 2304 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 2305 jcc(Assembler::parity, slow_case); 2306 // get rid of duplicate argument. Stack: exp(X) 2307 if (num_fpu_regs_in_use > 0) { 2308 fxch(); 2309 fpop(); 2310 } else { 2311 ffree(1); 2312 } 2313 jmp(done); 2314 } else { 2315 // Stack: X Y 2316 Label x_negative, y_odd; 2317 2318 fldz(); // Stack: 0 X Y 2319 fcmp(tmp, 1, true, false); // Stack: X Y 2320 jcc(Assembler::above, x_negative); 2321 2322 // X >= 0 2323 2324 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 2325 fld_s(1); // Stack: X Y X Y 2326 fast_pow(); // Stack: X^Y X Y 2327 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 2328 // X^Y not equal to itself: X^Y is NaN go to slow case. 2329 jcc(Assembler::parity, slow_case); 2330 // get rid of duplicate arguments. Stack: X^Y 2331 if (num_fpu_regs_in_use > 0) { 2332 fxch(); fpop(); 2333 fxch(); fpop(); 2334 } else { 2335 ffree(2); 2336 ffree(1); 2337 } 2338 jmp(done); 2339 2340 // X <= 0 2341 bind(x_negative); 2342 2343 fld_s(1); // Stack: Y X Y 2344 frndint(); // Stack: int(Y) X Y 2345 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 2346 jcc(Assembler::notEqual, slow_case); 2347 2348 subptr(rsp, 8); 2349 2350 // For X^Y, when X < 0, Y has to be an integer and the final 2351 // result depends on whether it's odd or even. We just checked 2352 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 2353 // integer to test its parity. If int(Y) is huge and doesn't fit 2354 // in the 64 bit integer range, the integer indefinite value will 2355 // end up in the gp registers. Huge numbers are all even, the 2356 // integer indefinite number is even so it's fine. 2357 2358 #ifdef ASSERT 2359 // Let's check we don't end up with an integer indefinite number 2360 // when not expected. First test for huge numbers: check whether 2361 // int(Y)+1 == int(Y) which is true for very large numbers and 2362 // those are all even. A 64 bit integer is guaranteed to not 2363 // overflow for numbers where y+1 != y (when precision is set to 2364 // double precision). 2365 Label y_not_huge; 2366 2367 fld1(); // Stack: 1 int(Y) X Y 2368 fadd(1); // Stack: 1+int(Y) int(Y) X Y 2369 2370 #ifdef _LP64 2371 // trip to memory to force the precision down from double extended 2372 // precision 2373 fstp_d(Address(rsp, 0)); 2374 fld_d(Address(rsp, 0)); 2375 #endif 2376 2377 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 2378 #endif 2379 2380 // move int(Y) as 64 bit integer to thread's stack 2381 fistp_d(Address(rsp,0)); // Stack: X Y 2382 2383 #ifdef ASSERT 2384 jcc(Assembler::notEqual, y_not_huge); 2385 2386 // Y is huge so we know it's even. It may not fit in a 64 bit 2387 // integer and we don't want the debug code below to see the 2388 // integer indefinite value so overwrite int(Y) on the thread's 2389 // stack with 0. 2390 movl(Address(rsp, 0), 0); 2391 movl(Address(rsp, 4), 0); 2392 2393 bind(y_not_huge); 2394 #endif 2395 2396 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 2397 fld_s(1); // Stack: X Y X Y 2398 fabs(); // Stack: abs(X) Y X Y 2399 fast_pow(); // Stack: abs(X)^Y X Y 2400 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 2401 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 2402 2403 pop(tmp2); 2404 NOT_LP64(pop(tmp3)); 2405 jcc(Assembler::parity, slow_case); 2406 2407 #ifdef ASSERT 2408 // Check that int(Y) is not integer indefinite value (int 2409 // overflow). Shouldn't happen because for values that would 2410 // overflow, 1+int(Y)==Y which was tested earlier. 2411 #ifndef _LP64 2412 { 2413 Label integer; 2414 testl(tmp2, tmp2); 2415 jcc(Assembler::notZero, integer); 2416 cmpl(tmp3, 0x80000000); 2417 jcc(Assembler::notZero, integer); 2418 STOP("integer indefinite value shouldn't be seen here"); 2419 bind(integer); 2420 } 2421 #else 2422 { 2423 Label integer; 2424 mov(tmp3, tmp2); // preserve tmp2 for parity check below 2425 shlq(tmp3, 1); 2426 jcc(Assembler::carryClear, integer); 2427 jcc(Assembler::notZero, integer); 2428 STOP("integer indefinite value shouldn't be seen here"); 2429 bind(integer); 2430 } 2431 #endif 2432 #endif 2433 2434 // get rid of duplicate arguments. Stack: X^Y 2435 if (num_fpu_regs_in_use > 0) { 2436 fxch(); fpop(); 2437 fxch(); fpop(); 2438 } else { 2439 ffree(2); 2440 ffree(1); 2441 } 2442 2443 testl(tmp2, 1); 2444 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 2445 // X <= 0, Y even: X^Y = -abs(X)^Y 2446 2447 fchs(); // Stack: -abs(X)^Y Y 2448 jmp(done); 2449 } 2450 2451 // slow case: runtime call 2452 bind(slow_case); 2453 2454 fpop(); // pop incorrect result or int(Y) 2455 2456 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2457 is_exp ? 1 : 2, num_fpu_regs_in_use); 2458 2459 // Come here with result in F-TOS 2460 bind(done); 2461 } 2462 2463 void MacroAssembler::fpop() { 2464 ffree(); 2465 fincstp(); 2466 } 2467 2468 void MacroAssembler::fremr(Register tmp) { 2469 save_rax(tmp); 2470 { Label L; 2471 bind(L); 2472 fprem(); 2473 fwait(); fnstsw_ax(); 2474 #ifdef _LP64 2475 testl(rax, 0x400); 2476 jcc(Assembler::notEqual, L); 2477 #else 2478 sahf(); 2479 jcc(Assembler::parity, L); 2480 #endif // _LP64 2481 } 2482 restore_rax(tmp); 2483 // Result is in ST0. 2484 // Note: fxch & fpop to get rid of ST1 2485 // (otherwise FPU stack could overflow eventually) 2486 fxch(1); 2487 fpop(); 2488 } 2489 2490 2491 void MacroAssembler::incrementl(AddressLiteral dst) { 2492 if (reachable(dst)) { 2493 incrementl(as_Address(dst)); 2494 } else { 2495 lea(rscratch1, dst); 2496 incrementl(Address(rscratch1, 0)); 2497 } 2498 } 2499 2500 void MacroAssembler::incrementl(ArrayAddress dst) { 2501 incrementl(as_Address(dst)); 2502 } 2503 2504 void MacroAssembler::incrementl(Register reg, int value) { 2505 if (value == min_jint) {addl(reg, value) ; return; } 2506 if (value < 0) { decrementl(reg, -value); return; } 2507 if (value == 0) { ; return; } 2508 if (value == 1 && UseIncDec) { incl(reg) ; return; } 2509 /* else */ { addl(reg, value) ; return; } 2510 } 2511 2512 void MacroAssembler::incrementl(Address dst, int value) { 2513 if (value == min_jint) {addl(dst, value) ; return; } 2514 if (value < 0) { decrementl(dst, -value); return; } 2515 if (value == 0) { ; return; } 2516 if (value == 1 && UseIncDec) { incl(dst) ; return; } 2517 /* else */ { addl(dst, value) ; return; } 2518 } 2519 2520 void MacroAssembler::jump(AddressLiteral dst) { 2521 if (reachable(dst)) { 2522 jmp_literal(dst.target(), dst.rspec()); 2523 } else { 2524 lea(rscratch1, dst); 2525 jmp(rscratch1); 2526 } 2527 } 2528 2529 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 2530 if (reachable(dst)) { 2531 InstructionMark im(this); 2532 relocate(dst.reloc()); 2533 const int short_size = 2; 2534 const int long_size = 6; 2535 int offs = (intptr_t)dst.target() - ((intptr_t)pc()); 2536 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 2537 // 0111 tttn #8-bit disp 2538 emit_int8(0x70 | cc); 2539 emit_int8((offs - short_size) & 0xFF); 2540 } else { 2541 // 0000 1111 1000 tttn #32-bit disp 2542 emit_int8(0x0F); 2543 emit_int8((unsigned char)(0x80 | cc)); 2544 emit_int32(offs - long_size); 2545 } 2546 } else { 2547 #ifdef ASSERT 2548 warning("reversing conditional branch"); 2549 #endif /* ASSERT */ 2550 Label skip; 2551 jccb(reverse[cc], skip); 2552 lea(rscratch1, dst); 2553 Assembler::jmp(rscratch1); 2554 bind(skip); 2555 } 2556 } 2557 2558 void MacroAssembler::ldmxcsr(AddressLiteral src) { 2559 if (reachable(src)) { 2560 Assembler::ldmxcsr(as_Address(src)); 2561 } else { 2562 lea(rscratch1, src); 2563 Assembler::ldmxcsr(Address(rscratch1, 0)); 2564 } 2565 } 2566 2567 int MacroAssembler::load_signed_byte(Register dst, Address src) { 2568 int off; 2569 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2570 off = offset(); 2571 movsbl(dst, src); // movsxb 2572 } else { 2573 off = load_unsigned_byte(dst, src); 2574 shll(dst, 24); 2575 sarl(dst, 24); 2576 } 2577 return off; 2578 } 2579 2580 // Note: load_signed_short used to be called load_signed_word. 2581 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 2582 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 2583 // The term "word" in HotSpot means a 32- or 64-bit machine word. 2584 int MacroAssembler::load_signed_short(Register dst, Address src) { 2585 int off; 2586 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2587 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 2588 // version but this is what 64bit has always done. This seems to imply 2589 // that users are only using 32bits worth. 2590 off = offset(); 2591 movswl(dst, src); // movsxw 2592 } else { 2593 off = load_unsigned_short(dst, src); 2594 shll(dst, 16); 2595 sarl(dst, 16); 2596 } 2597 return off; 2598 } 2599 2600 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 2601 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 2602 // and "3.9 Partial Register Penalties", p. 22). 2603 int off; 2604 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 2605 off = offset(); 2606 movzbl(dst, src); // movzxb 2607 } else { 2608 xorl(dst, dst); 2609 off = offset(); 2610 movb(dst, src); 2611 } 2612 return off; 2613 } 2614 2615 // Note: load_unsigned_short used to be called load_unsigned_word. 2616 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 2617 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 2618 // and "3.9 Partial Register Penalties", p. 22). 2619 int off; 2620 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 2621 off = offset(); 2622 movzwl(dst, src); // movzxw 2623 } else { 2624 xorl(dst, dst); 2625 off = offset(); 2626 movw(dst, src); 2627 } 2628 return off; 2629 } 2630 2631 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 2632 switch (size_in_bytes) { 2633 #ifndef _LP64 2634 case 8: 2635 assert(dst2 != noreg, "second dest register required"); 2636 movl(dst, src); 2637 movl(dst2, src.plus_disp(BytesPerInt)); 2638 break; 2639 #else 2640 case 8: movq(dst, src); break; 2641 #endif 2642 case 4: movl(dst, src); break; 2643 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 2644 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 2645 default: ShouldNotReachHere(); 2646 } 2647 } 2648 2649 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 2650 switch (size_in_bytes) { 2651 #ifndef _LP64 2652 case 8: 2653 assert(src2 != noreg, "second source register required"); 2654 movl(dst, src); 2655 movl(dst.plus_disp(BytesPerInt), src2); 2656 break; 2657 #else 2658 case 8: movq(dst, src); break; 2659 #endif 2660 case 4: movl(dst, src); break; 2661 case 2: movw(dst, src); break; 2662 case 1: movb(dst, src); break; 2663 default: ShouldNotReachHere(); 2664 } 2665 } 2666 2667 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 2668 if (reachable(dst)) { 2669 movl(as_Address(dst), src); 2670 } else { 2671 lea(rscratch1, dst); 2672 movl(Address(rscratch1, 0), src); 2673 } 2674 } 2675 2676 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 2677 if (reachable(src)) { 2678 movl(dst, as_Address(src)); 2679 } else { 2680 lea(rscratch1, src); 2681 movl(dst, Address(rscratch1, 0)); 2682 } 2683 } 2684 2685 // C++ bool manipulation 2686 2687 void MacroAssembler::movbool(Register dst, Address src) { 2688 if(sizeof(bool) == 1) 2689 movb(dst, src); 2690 else if(sizeof(bool) == 2) 2691 movw(dst, src); 2692 else if(sizeof(bool) == 4) 2693 movl(dst, src); 2694 else 2695 // unsupported 2696 ShouldNotReachHere(); 2697 } 2698 2699 void MacroAssembler::movbool(Address dst, bool boolconst) { 2700 if(sizeof(bool) == 1) 2701 movb(dst, (int) boolconst); 2702 else if(sizeof(bool) == 2) 2703 movw(dst, (int) boolconst); 2704 else if(sizeof(bool) == 4) 2705 movl(dst, (int) boolconst); 2706 else 2707 // unsupported 2708 ShouldNotReachHere(); 2709 } 2710 2711 void MacroAssembler::movbool(Address dst, Register src) { 2712 if(sizeof(bool) == 1) 2713 movb(dst, src); 2714 else if(sizeof(bool) == 2) 2715 movw(dst, src); 2716 else if(sizeof(bool) == 4) 2717 movl(dst, src); 2718 else 2719 // unsupported 2720 ShouldNotReachHere(); 2721 } 2722 2723 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 2724 movb(as_Address(dst), src); 2725 } 2726 2727 void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { 2728 if (reachable(src)) { 2729 movdl(dst, as_Address(src)); 2730 } else { 2731 lea(rscratch1, src); 2732 movdl(dst, Address(rscratch1, 0)); 2733 } 2734 } 2735 2736 void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { 2737 if (reachable(src)) { 2738 movq(dst, as_Address(src)); 2739 } else { 2740 lea(rscratch1, src); 2741 movq(dst, Address(rscratch1, 0)); 2742 } 2743 } 2744 2745 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 2746 if (reachable(src)) { 2747 if (UseXmmLoadAndClearUpper) { 2748 movsd (dst, as_Address(src)); 2749 } else { 2750 movlpd(dst, as_Address(src)); 2751 } 2752 } else { 2753 lea(rscratch1, src); 2754 if (UseXmmLoadAndClearUpper) { 2755 movsd (dst, Address(rscratch1, 0)); 2756 } else { 2757 movlpd(dst, Address(rscratch1, 0)); 2758 } 2759 } 2760 } 2761 2762 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 2763 if (reachable(src)) { 2764 movss(dst, as_Address(src)); 2765 } else { 2766 lea(rscratch1, src); 2767 movss(dst, Address(rscratch1, 0)); 2768 } 2769 } 2770 2771 void MacroAssembler::movptr(Register dst, Register src) { 2772 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2773 } 2774 2775 void MacroAssembler::movptr(Register dst, Address src) { 2776 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2777 } 2778 2779 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 2780 void MacroAssembler::movptr(Register dst, intptr_t src) { 2781 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 2782 } 2783 2784 void MacroAssembler::movptr(Address dst, Register src) { 2785 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2786 } 2787 2788 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { 2789 if (reachable(src)) { 2790 Assembler::movdqu(dst, as_Address(src)); 2791 } else { 2792 lea(rscratch1, src); 2793 Assembler::movdqu(dst, Address(rscratch1, 0)); 2794 } 2795 } 2796 2797 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 2798 if (reachable(src)) { 2799 Assembler::movsd(dst, as_Address(src)); 2800 } else { 2801 lea(rscratch1, src); 2802 Assembler::movsd(dst, Address(rscratch1, 0)); 2803 } 2804 } 2805 2806 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 2807 if (reachable(src)) { 2808 Assembler::movss(dst, as_Address(src)); 2809 } else { 2810 lea(rscratch1, src); 2811 Assembler::movss(dst, Address(rscratch1, 0)); 2812 } 2813 } 2814 2815 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 2816 if (reachable(src)) { 2817 Assembler::mulsd(dst, as_Address(src)); 2818 } else { 2819 lea(rscratch1, src); 2820 Assembler::mulsd(dst, Address(rscratch1, 0)); 2821 } 2822 } 2823 2824 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 2825 if (reachable(src)) { 2826 Assembler::mulss(dst, as_Address(src)); 2827 } else { 2828 lea(rscratch1, src); 2829 Assembler::mulss(dst, Address(rscratch1, 0)); 2830 } 2831 } 2832 2833 void MacroAssembler::null_check(Register reg, int offset) { 2834 if (needs_explicit_null_check(offset)) { 2835 // provoke OS NULL exception if reg = NULL by 2836 // accessing M[reg] w/o changing any (non-CC) registers 2837 // NOTE: cmpl is plenty here to provoke a segv 2838 cmpptr(rax, Address(reg, 0)); 2839 // Note: should probably use testl(rax, Address(reg, 0)); 2840 // may be shorter code (however, this version of 2841 // testl needs to be implemented first) 2842 } else { 2843 // nothing to do, (later) access of M[reg + offset] 2844 // will provoke OS NULL exception if reg = NULL 2845 } 2846 } 2847 2848 void MacroAssembler::os_breakpoint() { 2849 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 2850 // (e.g., MSVC can't call ps() otherwise) 2851 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 2852 } 2853 2854 void MacroAssembler::pop_CPU_state() { 2855 pop_FPU_state(); 2856 pop_IU_state(); 2857 } 2858 2859 void MacroAssembler::pop_FPU_state() { 2860 NOT_LP64(frstor(Address(rsp, 0));) 2861 LP64_ONLY(fxrstor(Address(rsp, 0));) 2862 addptr(rsp, FPUStateSizeInWords * wordSize); 2863 } 2864 2865 void MacroAssembler::pop_IU_state() { 2866 popa(); 2867 LP64_ONLY(addq(rsp, 8)); 2868 popf(); 2869 } 2870 2871 // Save Integer and Float state 2872 // Warning: Stack must be 16 byte aligned (64bit) 2873 void MacroAssembler::push_CPU_state() { 2874 push_IU_state(); 2875 push_FPU_state(); 2876 } 2877 2878 void MacroAssembler::push_FPU_state() { 2879 subptr(rsp, FPUStateSizeInWords * wordSize); 2880 #ifndef _LP64 2881 fnsave(Address(rsp, 0)); 2882 fwait(); 2883 #else 2884 fxsave(Address(rsp, 0)); 2885 #endif // LP64 2886 } 2887 2888 void MacroAssembler::push_IU_state() { 2889 // Push flags first because pusha kills them 2890 pushf(); 2891 // Make sure rsp stays 16-byte aligned 2892 LP64_ONLY(subq(rsp, 8)); 2893 pusha(); 2894 } 2895 2896 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 2897 // determine java_thread register 2898 if (!java_thread->is_valid()) { 2899 java_thread = rdi; 2900 get_thread(java_thread); 2901 } 2902 // we must set sp to zero to clear frame 2903 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 2904 if (clear_fp) { 2905 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 2906 } 2907 2908 if (clear_pc) 2909 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 2910 2911 } 2912 2913 void MacroAssembler::restore_rax(Register tmp) { 2914 if (tmp == noreg) pop(rax); 2915 else if (tmp != rax) mov(rax, tmp); 2916 } 2917 2918 void MacroAssembler::round_to(Register reg, int modulus) { 2919 addptr(reg, modulus - 1); 2920 andptr(reg, -modulus); 2921 } 2922 2923 void MacroAssembler::save_rax(Register tmp) { 2924 if (tmp == noreg) push(rax); 2925 else if (tmp != rax) mov(tmp, rax); 2926 } 2927 2928 // Write serialization page so VM thread can do a pseudo remote membar. 2929 // We use the current thread pointer to calculate a thread specific 2930 // offset to write to within the page. This minimizes bus traffic 2931 // due to cache line collision. 2932 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 2933 movl(tmp, thread); 2934 shrl(tmp, os::get_serialize_page_shift_count()); 2935 andl(tmp, (os::vm_page_size() - sizeof(int))); 2936 2937 Address index(noreg, tmp, Address::times_1); 2938 ExternalAddress page(os::get_memory_serialize_page()); 2939 2940 // Size of store must match masking code above 2941 movl(as_Address(ArrayAddress(page, index)), tmp); 2942 } 2943 2944 // Calls to C land 2945 // 2946 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 2947 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 2948 // has to be reset to 0. This is required to allow proper stack traversal. 2949 void MacroAssembler::set_last_Java_frame(Register java_thread, 2950 Register last_java_sp, 2951 Register last_java_fp, 2952 address last_java_pc) { 2953 // determine java_thread register 2954 if (!java_thread->is_valid()) { 2955 java_thread = rdi; 2956 get_thread(java_thread); 2957 } 2958 // determine last_java_sp register 2959 if (!last_java_sp->is_valid()) { 2960 last_java_sp = rsp; 2961 } 2962 2963 // last_java_fp is optional 2964 2965 if (last_java_fp->is_valid()) { 2966 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 2967 } 2968 2969 // last_java_pc is optional 2970 2971 if (last_java_pc != NULL) { 2972 lea(Address(java_thread, 2973 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 2974 InternalAddress(last_java_pc)); 2975 2976 } 2977 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 2978 } 2979 2980 void MacroAssembler::shlptr(Register dst, int imm8) { 2981 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 2982 } 2983 2984 void MacroAssembler::shrptr(Register dst, int imm8) { 2985 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 2986 } 2987 2988 void MacroAssembler::sign_extend_byte(Register reg) { 2989 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 2990 movsbl(reg, reg); // movsxb 2991 } else { 2992 shll(reg, 24); 2993 sarl(reg, 24); 2994 } 2995 } 2996 2997 void MacroAssembler::sign_extend_short(Register reg) { 2998 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2999 movswl(reg, reg); // movsxw 3000 } else { 3001 shll(reg, 16); 3002 sarl(reg, 16); 3003 } 3004 } 3005 3006 void MacroAssembler::testl(Register dst, AddressLiteral src) { 3007 assert(reachable(src), "Address should be reachable"); 3008 testl(dst, as_Address(src)); 3009 } 3010 3011 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 3012 if (reachable(src)) { 3013 Assembler::sqrtsd(dst, as_Address(src)); 3014 } else { 3015 lea(rscratch1, src); 3016 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 3017 } 3018 } 3019 3020 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 3021 if (reachable(src)) { 3022 Assembler::sqrtss(dst, as_Address(src)); 3023 } else { 3024 lea(rscratch1, src); 3025 Assembler::sqrtss(dst, Address(rscratch1, 0)); 3026 } 3027 } 3028 3029 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 3030 if (reachable(src)) { 3031 Assembler::subsd(dst, as_Address(src)); 3032 } else { 3033 lea(rscratch1, src); 3034 Assembler::subsd(dst, Address(rscratch1, 0)); 3035 } 3036 } 3037 3038 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 3039 if (reachable(src)) { 3040 Assembler::subss(dst, as_Address(src)); 3041 } else { 3042 lea(rscratch1, src); 3043 Assembler::subss(dst, Address(rscratch1, 0)); 3044 } 3045 } 3046 3047 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 3048 if (reachable(src)) { 3049 Assembler::ucomisd(dst, as_Address(src)); 3050 } else { 3051 lea(rscratch1, src); 3052 Assembler::ucomisd(dst, Address(rscratch1, 0)); 3053 } 3054 } 3055 3056 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 3057 if (reachable(src)) { 3058 Assembler::ucomiss(dst, as_Address(src)); 3059 } else { 3060 lea(rscratch1, src); 3061 Assembler::ucomiss(dst, Address(rscratch1, 0)); 3062 } 3063 } 3064 3065 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 3066 // Used in sign-bit flipping with aligned address. 3067 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 3068 if (reachable(src)) { 3069 Assembler::xorpd(dst, as_Address(src)); 3070 } else { 3071 lea(rscratch1, src); 3072 Assembler::xorpd(dst, Address(rscratch1, 0)); 3073 } 3074 } 3075 3076 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 3077 // Used in sign-bit flipping with aligned address. 3078 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 3079 if (reachable(src)) { 3080 Assembler::xorps(dst, as_Address(src)); 3081 } else { 3082 lea(rscratch1, src); 3083 Assembler::xorps(dst, Address(rscratch1, 0)); 3084 } 3085 } 3086 3087 void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { 3088 // Used in sign-bit flipping with aligned address. 3089 bool aligned_adr = (((intptr_t)src.target() & 15) == 0); 3090 assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes"); 3091 if (reachable(src)) { 3092 Assembler::pshufb(dst, as_Address(src)); 3093 } else { 3094 lea(rscratch1, src); 3095 Assembler::pshufb(dst, Address(rscratch1, 0)); 3096 } 3097 } 3098 3099 // AVX 3-operands instructions 3100 3101 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3102 if (reachable(src)) { 3103 vaddsd(dst, nds, as_Address(src)); 3104 } else { 3105 lea(rscratch1, src); 3106 vaddsd(dst, nds, Address(rscratch1, 0)); 3107 } 3108 } 3109 3110 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3111 if (reachable(src)) { 3112 vaddss(dst, nds, as_Address(src)); 3113 } else { 3114 lea(rscratch1, src); 3115 vaddss(dst, nds, Address(rscratch1, 0)); 3116 } 3117 } 3118 3119 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3120 if (reachable(src)) { 3121 vandpd(dst, nds, as_Address(src), vector256); 3122 } else { 3123 lea(rscratch1, src); 3124 vandpd(dst, nds, Address(rscratch1, 0), vector256); 3125 } 3126 } 3127 3128 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3129 if (reachable(src)) { 3130 vandps(dst, nds, as_Address(src), vector256); 3131 } else { 3132 lea(rscratch1, src); 3133 vandps(dst, nds, Address(rscratch1, 0), vector256); 3134 } 3135 } 3136 3137 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3138 if (reachable(src)) { 3139 vdivsd(dst, nds, as_Address(src)); 3140 } else { 3141 lea(rscratch1, src); 3142 vdivsd(dst, nds, Address(rscratch1, 0)); 3143 } 3144 } 3145 3146 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3147 if (reachable(src)) { 3148 vdivss(dst, nds, as_Address(src)); 3149 } else { 3150 lea(rscratch1, src); 3151 vdivss(dst, nds, Address(rscratch1, 0)); 3152 } 3153 } 3154 3155 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3156 if (reachable(src)) { 3157 vmulsd(dst, nds, as_Address(src)); 3158 } else { 3159 lea(rscratch1, src); 3160 vmulsd(dst, nds, Address(rscratch1, 0)); 3161 } 3162 } 3163 3164 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3165 if (reachable(src)) { 3166 vmulss(dst, nds, as_Address(src)); 3167 } else { 3168 lea(rscratch1, src); 3169 vmulss(dst, nds, Address(rscratch1, 0)); 3170 } 3171 } 3172 3173 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3174 if (reachable(src)) { 3175 vsubsd(dst, nds, as_Address(src)); 3176 } else { 3177 lea(rscratch1, src); 3178 vsubsd(dst, nds, Address(rscratch1, 0)); 3179 } 3180 } 3181 3182 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3183 if (reachable(src)) { 3184 vsubss(dst, nds, as_Address(src)); 3185 } else { 3186 lea(rscratch1, src); 3187 vsubss(dst, nds, Address(rscratch1, 0)); 3188 } 3189 } 3190 3191 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3192 if (reachable(src)) { 3193 vxorpd(dst, nds, as_Address(src), vector256); 3194 } else { 3195 lea(rscratch1, src); 3196 vxorpd(dst, nds, Address(rscratch1, 0), vector256); 3197 } 3198 } 3199 3200 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3201 if (reachable(src)) { 3202 vxorps(dst, nds, as_Address(src), vector256); 3203 } else { 3204 lea(rscratch1, src); 3205 vxorps(dst, nds, Address(rscratch1, 0), vector256); 3206 } 3207 } 3208 3209 3210 ////////////////////////////////////////////////////////////////////////////////// 3211 #if INCLUDE_ALL_GCS 3212 3213 void MacroAssembler::g1_write_barrier_pre(Register obj, 3214 Register pre_val, 3215 Register thread, 3216 Register tmp, 3217 bool tosca_live, 3218 bool expand_call) { 3219 3220 // If expand_call is true then we expand the call_VM_leaf macro 3221 // directly to skip generating the check by 3222 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 3223 3224 #ifdef _LP64 3225 assert(thread == r15_thread, "must be"); 3226 #endif // _LP64 3227 3228 Label done; 3229 Label runtime; 3230 3231 assert(pre_val != noreg, "check this code"); 3232 3233 if (obj != noreg) { 3234 assert_different_registers(obj, pre_val, tmp); 3235 assert(pre_val != rax, "check this code"); 3236 } 3237 3238 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3239 PtrQueue::byte_offset_of_active())); 3240 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3241 PtrQueue::byte_offset_of_index())); 3242 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3243 PtrQueue::byte_offset_of_buf())); 3244 3245 3246 // Is marking active? 3247 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 3248 cmpl(in_progress, 0); 3249 } else { 3250 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 3251 cmpb(in_progress, 0); 3252 } 3253 jcc(Assembler::equal, done); 3254 3255 // Do we need to load the previous value? 3256 if (obj != noreg) { 3257 load_heap_oop(pre_val, Address(obj, 0)); 3258 } 3259 3260 // Is the previous value null? 3261 cmpptr(pre_val, (int32_t) NULL_WORD); 3262 jcc(Assembler::equal, done); 3263 3264 // Can we store original value in the thread's buffer? 3265 // Is index == 0? 3266 // (The index field is typed as size_t.) 3267 3268 movptr(tmp, index); // tmp := *index_adr 3269 cmpptr(tmp, 0); // tmp == 0? 3270 jcc(Assembler::equal, runtime); // If yes, goto runtime 3271 3272 subptr(tmp, wordSize); // tmp := tmp - wordSize 3273 movptr(index, tmp); // *index_adr := tmp 3274 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 3275 3276 // Record the previous value 3277 movptr(Address(tmp, 0), pre_val); 3278 jmp(done); 3279 3280 bind(runtime); 3281 // save the live input values 3282 if(tosca_live) push(rax); 3283 3284 if (obj != noreg && obj != rax) 3285 push(obj); 3286 3287 if (pre_val != rax) 3288 push(pre_val); 3289 3290 // Calling the runtime using the regular call_VM_leaf mechanism generates 3291 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 3292 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 3293 // 3294 // If we care generating the pre-barrier without a frame (e.g. in the 3295 // intrinsified Reference.get() routine) then ebp might be pointing to 3296 // the caller frame and so this check will most likely fail at runtime. 3297 // 3298 // Expanding the call directly bypasses the generation of the check. 3299 // So when we do not have have a full interpreter frame on the stack 3300 // expand_call should be passed true. 3301 3302 NOT_LP64( push(thread); ) 3303 3304 if (expand_call) { 3305 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 3306 pass_arg1(this, thread); 3307 pass_arg0(this, pre_val); 3308 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 3309 } else { 3310 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 3311 } 3312 3313 NOT_LP64( pop(thread); ) 3314 3315 // save the live input values 3316 if (pre_val != rax) 3317 pop(pre_val); 3318 3319 if (obj != noreg && obj != rax) 3320 pop(obj); 3321 3322 if(tosca_live) pop(rax); 3323 3324 bind(done); 3325 } 3326 3327 void MacroAssembler::g1_write_barrier_post(Register store_addr, 3328 Register new_val, 3329 Register thread, 3330 Register tmp, 3331 Register tmp2) { 3332 #ifdef _LP64 3333 assert(thread == r15_thread, "must be"); 3334 #endif // _LP64 3335 3336 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3337 PtrQueue::byte_offset_of_index())); 3338 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3339 PtrQueue::byte_offset_of_buf())); 3340 3341 BarrierSet* bs = Universe::heap()->barrier_set(); 3342 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 3343 Label done; 3344 Label runtime; 3345 3346 // Does store cross heap regions? 3347 3348 movptr(tmp, store_addr); 3349 xorptr(tmp, new_val); 3350 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 3351 jcc(Assembler::equal, done); 3352 3353 // crosses regions, storing NULL? 3354 3355 cmpptr(new_val, (int32_t) NULL_WORD); 3356 jcc(Assembler::equal, done); 3357 3358 // storing region crossing non-NULL, is card already dirty? 3359 3360 ExternalAddress cardtable((address) ct->byte_map_base); 3361 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3362 #ifdef _LP64 3363 const Register card_addr = tmp; 3364 3365 movq(card_addr, store_addr); 3366 shrq(card_addr, CardTableModRefBS::card_shift); 3367 3368 lea(tmp2, cardtable); 3369 3370 // get the address of the card 3371 addq(card_addr, tmp2); 3372 #else 3373 const Register card_index = tmp; 3374 3375 movl(card_index, store_addr); 3376 shrl(card_index, CardTableModRefBS::card_shift); 3377 3378 Address index(noreg, card_index, Address::times_1); 3379 const Register card_addr = tmp; 3380 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 3381 #endif 3382 cmpb(Address(card_addr, 0), 0); 3383 jcc(Assembler::equal, done); 3384 3385 // storing a region crossing, non-NULL oop, card is clean. 3386 // dirty card and log. 3387 3388 movb(Address(card_addr, 0), 0); 3389 3390 cmpl(queue_index, 0); 3391 jcc(Assembler::equal, runtime); 3392 subl(queue_index, wordSize); 3393 movptr(tmp2, buffer); 3394 #ifdef _LP64 3395 movslq(rscratch1, queue_index); 3396 addq(tmp2, rscratch1); 3397 movq(Address(tmp2, 0), card_addr); 3398 #else 3399 addl(tmp2, queue_index); 3400 movl(Address(tmp2, 0), card_index); 3401 #endif 3402 jmp(done); 3403 3404 bind(runtime); 3405 // save the live input values 3406 push(store_addr); 3407 push(new_val); 3408 #ifdef _LP64 3409 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 3410 #else 3411 push(thread); 3412 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 3413 pop(thread); 3414 #endif 3415 pop(new_val); 3416 pop(store_addr); 3417 3418 bind(done); 3419 } 3420 3421 #endif // INCLUDE_ALL_GCS 3422 ////////////////////////////////////////////////////////////////////////////////// 3423 3424 3425 void MacroAssembler::store_check(Register obj) { 3426 // Does a store check for the oop in register obj. The content of 3427 // register obj is destroyed afterwards. 3428 store_check_part_1(obj); 3429 store_check_part_2(obj); 3430 } 3431 3432 void MacroAssembler::store_check(Register obj, Address dst) { 3433 store_check(obj); 3434 } 3435 3436 3437 // split the store check operation so that other instructions can be scheduled inbetween 3438 void MacroAssembler::store_check_part_1(Register obj) { 3439 BarrierSet* bs = Universe::heap()->barrier_set(); 3440 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 3441 shrptr(obj, CardTableModRefBS::card_shift); 3442 } 3443 3444 void MacroAssembler::store_check_part_2(Register obj) { 3445 BarrierSet* bs = Universe::heap()->barrier_set(); 3446 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 3447 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 3448 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3449 3450 // The calculation for byte_map_base is as follows: 3451 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 3452 // So this essentially converts an address to a displacement and 3453 // it will never need to be relocated. On 64bit however the value may be too 3454 // large for a 32bit displacement 3455 3456 intptr_t disp = (intptr_t) ct->byte_map_base; 3457 if (is_simm32(disp)) { 3458 Address cardtable(noreg, obj, Address::times_1, disp); 3459 movb(cardtable, 0); 3460 } else { 3461 // By doing it as an ExternalAddress disp could be converted to a rip-relative 3462 // displacement and done in a single instruction given favorable mapping and 3463 // a smarter version of as_Address. Worst case it is two instructions which 3464 // is no worse off then loading disp into a register and doing as a simple 3465 // Address() as above. 3466 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 3467 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 3468 // in some cases we'll get a single instruction version. 3469 3470 ExternalAddress cardtable((address)disp); 3471 Address index(noreg, obj, Address::times_1); 3472 movb(as_Address(ArrayAddress(cardtable, index)), 0); 3473 } 3474 } 3475 3476 void MacroAssembler::subptr(Register dst, int32_t imm32) { 3477 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 3478 } 3479 3480 // Force generation of a 4 byte immediate value even if it fits into 8bit 3481 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 3482 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 3483 } 3484 3485 void MacroAssembler::subptr(Register dst, Register src) { 3486 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 3487 } 3488 3489 // C++ bool manipulation 3490 void MacroAssembler::testbool(Register dst) { 3491 if(sizeof(bool) == 1) 3492 testb(dst, 0xff); 3493 else if(sizeof(bool) == 2) { 3494 // testw implementation needed for two byte bools 3495 ShouldNotReachHere(); 3496 } else if(sizeof(bool) == 4) 3497 testl(dst, dst); 3498 else 3499 // unsupported 3500 ShouldNotReachHere(); 3501 } 3502 3503 void MacroAssembler::testptr(Register dst, Register src) { 3504 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 3505 } 3506 3507 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 3508 void MacroAssembler::tlab_allocate(Register obj, 3509 Register var_size_in_bytes, 3510 int con_size_in_bytes, 3511 Register t1, 3512 Register t2, 3513 Label& slow_case) { 3514 assert_different_registers(obj, t1, t2); 3515 assert_different_registers(obj, var_size_in_bytes, t1); 3516 Register end = t2; 3517 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 3518 3519 verify_tlab(); 3520 3521 NOT_LP64(get_thread(thread)); 3522 3523 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 3524 if (var_size_in_bytes == noreg) { 3525 lea(end, Address(obj, con_size_in_bytes)); 3526 } else { 3527 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 3528 } 3529 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 3530 jcc(Assembler::above, slow_case); 3531 3532 // update the tlab top pointer 3533 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 3534 3535 // recover var_size_in_bytes if necessary 3536 if (var_size_in_bytes == end) { 3537 subptr(var_size_in_bytes, obj); 3538 } 3539 verify_tlab(); 3540 } 3541 3542 // Preserves rbx, and rdx. 3543 Register MacroAssembler::tlab_refill(Label& retry, 3544 Label& try_eden, 3545 Label& slow_case) { 3546 Register top = rax; 3547 Register t1 = rcx; 3548 Register t2 = rsi; 3549 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 3550 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 3551 Label do_refill, discard_tlab; 3552 3553 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 3554 // No allocation in the shared eden. 3555 jmp(slow_case); 3556 } 3557 3558 NOT_LP64(get_thread(thread_reg)); 3559 3560 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 3561 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 3562 3563 // calculate amount of free space 3564 subptr(t1, top); 3565 shrptr(t1, LogHeapWordSize); 3566 3567 // Retain tlab and allocate object in shared space if 3568 // the amount free in the tlab is too large to discard. 3569 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 3570 jcc(Assembler::lessEqual, discard_tlab); 3571 3572 // Retain 3573 // %%% yuck as movptr... 3574 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 3575 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 3576 if (TLABStats) { 3577 // increment number of slow_allocations 3578 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 3579 } 3580 jmp(try_eden); 3581 3582 bind(discard_tlab); 3583 if (TLABStats) { 3584 // increment number of refills 3585 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 3586 // accumulate wastage -- t1 is amount free in tlab 3587 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 3588 } 3589 3590 // if tlab is currently allocated (top or end != null) then 3591 // fill [top, end + alignment_reserve) with array object 3592 testptr(top, top); 3593 jcc(Assembler::zero, do_refill); 3594 3595 // set up the mark word 3596 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 3597 // set the length to the remaining space 3598 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 3599 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 3600 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 3601 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 3602 // set klass to intArrayKlass 3603 // dubious reloc why not an oop reloc? 3604 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 3605 // store klass last. concurrent gcs assumes klass length is valid if 3606 // klass field is not null. 3607 store_klass(top, t1); 3608 3609 movptr(t1, top); 3610 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 3611 incr_allocated_bytes(thread_reg, t1, 0); 3612 3613 // refill the tlab with an eden allocation 3614 bind(do_refill); 3615 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 3616 shlptr(t1, LogHeapWordSize); 3617 // allocate new tlab, address returned in top 3618 eden_allocate(top, t1, 0, t2, slow_case); 3619 3620 // Check that t1 was preserved in eden_allocate. 3621 #ifdef ASSERT 3622 if (UseTLAB) { 3623 Label ok; 3624 Register tsize = rsi; 3625 assert_different_registers(tsize, thread_reg, t1); 3626 push(tsize); 3627 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 3628 shlptr(tsize, LogHeapWordSize); 3629 cmpptr(t1, tsize); 3630 jcc(Assembler::equal, ok); 3631 STOP("assert(t1 != tlab size)"); 3632 should_not_reach_here(); 3633 3634 bind(ok); 3635 pop(tsize); 3636 } 3637 #endif 3638 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 3639 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 3640 addptr(top, t1); 3641 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 3642 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 3643 verify_tlab(); 3644 jmp(retry); 3645 3646 return thread_reg; // for use by caller 3647 } 3648 3649 void MacroAssembler::incr_allocated_bytes(Register thread, 3650 Register var_size_in_bytes, 3651 int con_size_in_bytes, 3652 Register t1) { 3653 if (!thread->is_valid()) { 3654 #ifdef _LP64 3655 thread = r15_thread; 3656 #else 3657 assert(t1->is_valid(), "need temp reg"); 3658 thread = t1; 3659 get_thread(thread); 3660 #endif 3661 } 3662 3663 #ifdef _LP64 3664 if (var_size_in_bytes->is_valid()) { 3665 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 3666 } else { 3667 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 3668 } 3669 #else 3670 if (var_size_in_bytes->is_valid()) { 3671 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 3672 } else { 3673 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 3674 } 3675 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 3676 #endif 3677 } 3678 3679 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 3680 pusha(); 3681 3682 // if we are coming from c1, xmm registers may be live 3683 int off = 0; 3684 if (UseSSE == 1) { 3685 subptr(rsp, sizeof(jdouble)*8); 3686 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 3687 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 3688 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 3689 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 3690 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 3691 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 3692 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 3693 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 3694 } else if (UseSSE >= 2) { 3695 #ifdef COMPILER2 3696 if (MaxVectorSize > 16) { 3697 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 3698 // Save upper half of YMM registes 3699 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3700 vextractf128h(Address(rsp, 0),xmm0); 3701 vextractf128h(Address(rsp, 16),xmm1); 3702 vextractf128h(Address(rsp, 32),xmm2); 3703 vextractf128h(Address(rsp, 48),xmm3); 3704 vextractf128h(Address(rsp, 64),xmm4); 3705 vextractf128h(Address(rsp, 80),xmm5); 3706 vextractf128h(Address(rsp, 96),xmm6); 3707 vextractf128h(Address(rsp,112),xmm7); 3708 #ifdef _LP64 3709 vextractf128h(Address(rsp,128),xmm8); 3710 vextractf128h(Address(rsp,144),xmm9); 3711 vextractf128h(Address(rsp,160),xmm10); 3712 vextractf128h(Address(rsp,176),xmm11); 3713 vextractf128h(Address(rsp,192),xmm12); 3714 vextractf128h(Address(rsp,208),xmm13); 3715 vextractf128h(Address(rsp,224),xmm14); 3716 vextractf128h(Address(rsp,240),xmm15); 3717 #endif 3718 } 3719 #endif 3720 // Save whole 128bit (16 bytes) XMM regiters 3721 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3722 movdqu(Address(rsp,off++*16),xmm0); 3723 movdqu(Address(rsp,off++*16),xmm1); 3724 movdqu(Address(rsp,off++*16),xmm2); 3725 movdqu(Address(rsp,off++*16),xmm3); 3726 movdqu(Address(rsp,off++*16),xmm4); 3727 movdqu(Address(rsp,off++*16),xmm5); 3728 movdqu(Address(rsp,off++*16),xmm6); 3729 movdqu(Address(rsp,off++*16),xmm7); 3730 #ifdef _LP64 3731 movdqu(Address(rsp,off++*16),xmm8); 3732 movdqu(Address(rsp,off++*16),xmm9); 3733 movdqu(Address(rsp,off++*16),xmm10); 3734 movdqu(Address(rsp,off++*16),xmm11); 3735 movdqu(Address(rsp,off++*16),xmm12); 3736 movdqu(Address(rsp,off++*16),xmm13); 3737 movdqu(Address(rsp,off++*16),xmm14); 3738 movdqu(Address(rsp,off++*16),xmm15); 3739 #endif 3740 } 3741 3742 // Preserve registers across runtime call 3743 int incoming_argument_and_return_value_offset = -1; 3744 if (num_fpu_regs_in_use > 1) { 3745 // Must preserve all other FPU regs (could alternatively convert 3746 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 3747 // FPU state, but can not trust C compiler) 3748 NEEDS_CLEANUP; 3749 // NOTE that in this case we also push the incoming argument(s) to 3750 // the stack and restore it later; we also use this stack slot to 3751 // hold the return value from dsin, dcos etc. 3752 for (int i = 0; i < num_fpu_regs_in_use; i++) { 3753 subptr(rsp, sizeof(jdouble)); 3754 fstp_d(Address(rsp, 0)); 3755 } 3756 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 3757 for (int i = nb_args-1; i >= 0; i--) { 3758 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 3759 } 3760 } 3761 3762 subptr(rsp, nb_args*sizeof(jdouble)); 3763 for (int i = 0; i < nb_args; i++) { 3764 fstp_d(Address(rsp, i*sizeof(jdouble))); 3765 } 3766 3767 #ifdef _LP64 3768 if (nb_args > 0) { 3769 movdbl(xmm0, Address(rsp, 0)); 3770 } 3771 if (nb_args > 1) { 3772 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 3773 } 3774 assert(nb_args <= 2, "unsupported number of args"); 3775 #endif // _LP64 3776 3777 // NOTE: we must not use call_VM_leaf here because that requires a 3778 // complete interpreter frame in debug mode -- same bug as 4387334 3779 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 3780 // do proper 64bit abi 3781 3782 NEEDS_CLEANUP; 3783 // Need to add stack banging before this runtime call if it needs to 3784 // be taken; however, there is no generic stack banging routine at 3785 // the MacroAssembler level 3786 3787 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 3788 3789 #ifdef _LP64 3790 movsd(Address(rsp, 0), xmm0); 3791 fld_d(Address(rsp, 0)); 3792 #endif // _LP64 3793 addptr(rsp, sizeof(jdouble) * nb_args); 3794 if (num_fpu_regs_in_use > 1) { 3795 // Must save return value to stack and then restore entire FPU 3796 // stack except incoming arguments 3797 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 3798 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 3799 fld_d(Address(rsp, 0)); 3800 addptr(rsp, sizeof(jdouble)); 3801 } 3802 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 3803 addptr(rsp, sizeof(jdouble) * nb_args); 3804 } 3805 3806 off = 0; 3807 if (UseSSE == 1) { 3808 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 3809 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 3810 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 3811 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 3812 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 3813 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 3814 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 3815 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 3816 addptr(rsp, sizeof(jdouble)*8); 3817 } else if (UseSSE >= 2) { 3818 // Restore whole 128bit (16 bytes) XMM regiters 3819 movdqu(xmm0, Address(rsp,off++*16)); 3820 movdqu(xmm1, Address(rsp,off++*16)); 3821 movdqu(xmm2, Address(rsp,off++*16)); 3822 movdqu(xmm3, Address(rsp,off++*16)); 3823 movdqu(xmm4, Address(rsp,off++*16)); 3824 movdqu(xmm5, Address(rsp,off++*16)); 3825 movdqu(xmm6, Address(rsp,off++*16)); 3826 movdqu(xmm7, Address(rsp,off++*16)); 3827 #ifdef _LP64 3828 movdqu(xmm8, Address(rsp,off++*16)); 3829 movdqu(xmm9, Address(rsp,off++*16)); 3830 movdqu(xmm10, Address(rsp,off++*16)); 3831 movdqu(xmm11, Address(rsp,off++*16)); 3832 movdqu(xmm12, Address(rsp,off++*16)); 3833 movdqu(xmm13, Address(rsp,off++*16)); 3834 movdqu(xmm14, Address(rsp,off++*16)); 3835 movdqu(xmm15, Address(rsp,off++*16)); 3836 #endif 3837 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3838 #ifdef COMPILER2 3839 if (MaxVectorSize > 16) { 3840 // Restore upper half of YMM registes. 3841 vinsertf128h(xmm0, Address(rsp, 0)); 3842 vinsertf128h(xmm1, Address(rsp, 16)); 3843 vinsertf128h(xmm2, Address(rsp, 32)); 3844 vinsertf128h(xmm3, Address(rsp, 48)); 3845 vinsertf128h(xmm4, Address(rsp, 64)); 3846 vinsertf128h(xmm5, Address(rsp, 80)); 3847 vinsertf128h(xmm6, Address(rsp, 96)); 3848 vinsertf128h(xmm7, Address(rsp,112)); 3849 #ifdef _LP64 3850 vinsertf128h(xmm8, Address(rsp,128)); 3851 vinsertf128h(xmm9, Address(rsp,144)); 3852 vinsertf128h(xmm10, Address(rsp,160)); 3853 vinsertf128h(xmm11, Address(rsp,176)); 3854 vinsertf128h(xmm12, Address(rsp,192)); 3855 vinsertf128h(xmm13, Address(rsp,208)); 3856 vinsertf128h(xmm14, Address(rsp,224)); 3857 vinsertf128h(xmm15, Address(rsp,240)); 3858 #endif 3859 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3860 } 3861 #endif 3862 } 3863 popa(); 3864 } 3865 3866 static const double pi_4 = 0.7853981633974483; 3867 3868 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 3869 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 3870 // was attempted in this code; unfortunately it appears that the 3871 // switch to 80-bit precision and back causes this to be 3872 // unprofitable compared with simply performing a runtime call if 3873 // the argument is out of the (-pi/4, pi/4) range. 3874 3875 Register tmp = noreg; 3876 if (!VM_Version::supports_cmov()) { 3877 // fcmp needs a temporary so preserve rbx, 3878 tmp = rbx; 3879 push(tmp); 3880 } 3881 3882 Label slow_case, done; 3883 3884 ExternalAddress pi4_adr = (address)&pi_4; 3885 if (reachable(pi4_adr)) { 3886 // x ?<= pi/4 3887 fld_d(pi4_adr); 3888 fld_s(1); // Stack: X PI/4 X 3889 fabs(); // Stack: |X| PI/4 X 3890 fcmp(tmp); 3891 jcc(Assembler::above, slow_case); 3892 3893 // fastest case: -pi/4 <= x <= pi/4 3894 switch(trig) { 3895 case 's': 3896 fsin(); 3897 break; 3898 case 'c': 3899 fcos(); 3900 break; 3901 case 't': 3902 ftan(); 3903 break; 3904 default: 3905 assert(false, "bad intrinsic"); 3906 break; 3907 } 3908 jmp(done); 3909 } 3910 3911 // slow case: runtime call 3912 bind(slow_case); 3913 3914 switch(trig) { 3915 case 's': 3916 { 3917 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 3918 } 3919 break; 3920 case 'c': 3921 { 3922 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 3923 } 3924 break; 3925 case 't': 3926 { 3927 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 3928 } 3929 break; 3930 default: 3931 assert(false, "bad intrinsic"); 3932 break; 3933 } 3934 3935 // Come here with result in F-TOS 3936 bind(done); 3937 3938 if (tmp != noreg) { 3939 pop(tmp); 3940 } 3941 } 3942 3943 3944 // Look up the method for a megamorphic invokeinterface call. 3945 // The target method is determined by <intf_klass, itable_index>. 3946 // The receiver klass is in recv_klass. 3947 // On success, the result will be in method_result, and execution falls through. 3948 // On failure, execution transfers to the given label. 3949 void MacroAssembler::lookup_interface_method(Register recv_klass, 3950 Register intf_klass, 3951 RegisterOrConstant itable_index, 3952 Register method_result, 3953 Register scan_temp, 3954 Label& L_no_such_interface) { 3955 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 3956 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 3957 "caller must use same register for non-constant itable index as for method"); 3958 3959 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 3960 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 3961 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 3962 int scan_step = itableOffsetEntry::size() * wordSize; 3963 int vte_size = vtableEntry::size() * wordSize; 3964 Address::ScaleFactor times_vte_scale = Address::times_ptr; 3965 assert(vte_size == wordSize, "else adjust times_vte_scale"); 3966 3967 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 3968 3969 // %%% Could store the aligned, prescaled offset in the klassoop. 3970 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 3971 if (HeapWordsPerLong > 1) { 3972 // Round up to align_object_offset boundary 3973 // see code for InstanceKlass::start_of_itable! 3974 round_to(scan_temp, BytesPerLong); 3975 } 3976 3977 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 3978 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 3979 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 3980 3981 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 3982 // if (scan->interface() == intf) { 3983 // result = (klass + scan->offset() + itable_index); 3984 // } 3985 // } 3986 Label search, found_method; 3987 3988 for (int peel = 1; peel >= 0; peel--) { 3989 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 3990 cmpptr(intf_klass, method_result); 3991 3992 if (peel) { 3993 jccb(Assembler::equal, found_method); 3994 } else { 3995 jccb(Assembler::notEqual, search); 3996 // (invert the test to fall through to found_method...) 3997 } 3998 3999 if (!peel) break; 4000 4001 bind(search); 4002 4003 // Check that the previous entry is non-null. A null entry means that 4004 // the receiver class doesn't implement the interface, and wasn't the 4005 // same as when the caller was compiled. 4006 testptr(method_result, method_result); 4007 jcc(Assembler::zero, L_no_such_interface); 4008 addptr(scan_temp, scan_step); 4009 } 4010 4011 bind(found_method); 4012 4013 // Got a hit. 4014 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 4015 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 4016 } 4017 4018 4019 // virtual method calling 4020 void MacroAssembler::lookup_virtual_method(Register recv_klass, 4021 RegisterOrConstant vtable_index, 4022 Register method_result) { 4023 const int base = InstanceKlass::vtable_start_offset() * wordSize; 4024 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 4025 Address vtable_entry_addr(recv_klass, 4026 vtable_index, Address::times_ptr, 4027 base + vtableEntry::method_offset_in_bytes()); 4028 movptr(method_result, vtable_entry_addr); 4029 } 4030 4031 4032 void MacroAssembler::check_klass_subtype(Register sub_klass, 4033 Register super_klass, 4034 Register temp_reg, 4035 Label& L_success) { 4036 Label L_failure; 4037 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 4038 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 4039 bind(L_failure); 4040 } 4041 4042 4043 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 4044 Register super_klass, 4045 Register temp_reg, 4046 Label* L_success, 4047 Label* L_failure, 4048 Label* L_slow_path, 4049 RegisterOrConstant super_check_offset) { 4050 assert_different_registers(sub_klass, super_klass, temp_reg); 4051 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 4052 if (super_check_offset.is_register()) { 4053 assert_different_registers(sub_klass, super_klass, 4054 super_check_offset.as_register()); 4055 } else if (must_load_sco) { 4056 assert(temp_reg != noreg, "supply either a temp or a register offset"); 4057 } 4058 4059 Label L_fallthrough; 4060 int label_nulls = 0; 4061 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 4062 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 4063 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 4064 assert(label_nulls <= 1, "at most one NULL in the batch"); 4065 4066 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 4067 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 4068 Address super_check_offset_addr(super_klass, sco_offset); 4069 4070 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 4071 // range of a jccb. If this routine grows larger, reconsider at 4072 // least some of these. 4073 #define local_jcc(assembler_cond, label) \ 4074 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 4075 else jcc( assembler_cond, label) /*omit semi*/ 4076 4077 // Hacked jmp, which may only be used just before L_fallthrough. 4078 #define final_jmp(label) \ 4079 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 4080 else jmp(label) /*omit semi*/ 4081 4082 // If the pointers are equal, we are done (e.g., String[] elements). 4083 // This self-check enables sharing of secondary supertype arrays among 4084 // non-primary types such as array-of-interface. Otherwise, each such 4085 // type would need its own customized SSA. 4086 // We move this check to the front of the fast path because many 4087 // type checks are in fact trivially successful in this manner, 4088 // so we get a nicely predicted branch right at the start of the check. 4089 cmpptr(sub_klass, super_klass); 4090 local_jcc(Assembler::equal, *L_success); 4091 4092 // Check the supertype display: 4093 if (must_load_sco) { 4094 // Positive movl does right thing on LP64. 4095 movl(temp_reg, super_check_offset_addr); 4096 super_check_offset = RegisterOrConstant(temp_reg); 4097 } 4098 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 4099 cmpptr(super_klass, super_check_addr); // load displayed supertype 4100 4101 // This check has worked decisively for primary supers. 4102 // Secondary supers are sought in the super_cache ('super_cache_addr'). 4103 // (Secondary supers are interfaces and very deeply nested subtypes.) 4104 // This works in the same check above because of a tricky aliasing 4105 // between the super_cache and the primary super display elements. 4106 // (The 'super_check_addr' can address either, as the case requires.) 4107 // Note that the cache is updated below if it does not help us find 4108 // what we need immediately. 4109 // So if it was a primary super, we can just fail immediately. 4110 // Otherwise, it's the slow path for us (no success at this point). 4111 4112 if (super_check_offset.is_register()) { 4113 local_jcc(Assembler::equal, *L_success); 4114 cmpl(super_check_offset.as_register(), sc_offset); 4115 if (L_failure == &L_fallthrough) { 4116 local_jcc(Assembler::equal, *L_slow_path); 4117 } else { 4118 local_jcc(Assembler::notEqual, *L_failure); 4119 final_jmp(*L_slow_path); 4120 } 4121 } else if (super_check_offset.as_constant() == sc_offset) { 4122 // Need a slow path; fast failure is impossible. 4123 if (L_slow_path == &L_fallthrough) { 4124 local_jcc(Assembler::equal, *L_success); 4125 } else { 4126 local_jcc(Assembler::notEqual, *L_slow_path); 4127 final_jmp(*L_success); 4128 } 4129 } else { 4130 // No slow path; it's a fast decision. 4131 if (L_failure == &L_fallthrough) { 4132 local_jcc(Assembler::equal, *L_success); 4133 } else { 4134 local_jcc(Assembler::notEqual, *L_failure); 4135 final_jmp(*L_success); 4136 } 4137 } 4138 4139 bind(L_fallthrough); 4140 4141 #undef local_jcc 4142 #undef final_jmp 4143 } 4144 4145 4146 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 4147 Register super_klass, 4148 Register temp_reg, 4149 Register temp2_reg, 4150 Label* L_success, 4151 Label* L_failure, 4152 bool set_cond_codes) { 4153 assert_different_registers(sub_klass, super_klass, temp_reg); 4154 if (temp2_reg != noreg) 4155 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 4156 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 4157 4158 Label L_fallthrough; 4159 int label_nulls = 0; 4160 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 4161 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 4162 assert(label_nulls <= 1, "at most one NULL in the batch"); 4163 4164 // a couple of useful fields in sub_klass: 4165 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 4166 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 4167 Address secondary_supers_addr(sub_klass, ss_offset); 4168 Address super_cache_addr( sub_klass, sc_offset); 4169 4170 // Do a linear scan of the secondary super-klass chain. 4171 // This code is rarely used, so simplicity is a virtue here. 4172 // The repne_scan instruction uses fixed registers, which we must spill. 4173 // Don't worry too much about pre-existing connections with the input regs. 4174 4175 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 4176 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 4177 4178 // Get super_klass value into rax (even if it was in rdi or rcx). 4179 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 4180 if (super_klass != rax || UseCompressedOops) { 4181 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 4182 mov(rax, super_klass); 4183 } 4184 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 4185 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 4186 4187 #ifndef PRODUCT 4188 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 4189 ExternalAddress pst_counter_addr((address) pst_counter); 4190 NOT_LP64( incrementl(pst_counter_addr) ); 4191 LP64_ONLY( lea(rcx, pst_counter_addr) ); 4192 LP64_ONLY( incrementl(Address(rcx, 0)) ); 4193 #endif //PRODUCT 4194 4195 // We will consult the secondary-super array. 4196 movptr(rdi, secondary_supers_addr); 4197 // Load the array length. (Positive movl does right thing on LP64.) 4198 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); 4199 // Skip to start of data. 4200 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); 4201 4202 // Scan RCX words at [RDI] for an occurrence of RAX. 4203 // Set NZ/Z based on last compare. 4204 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 4205 // not change flags (only scas instruction which is repeated sets flags). 4206 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 4207 4208 testptr(rax,rax); // Set Z = 0 4209 repne_scan(); 4210 4211 // Unspill the temp. registers: 4212 if (pushed_rdi) pop(rdi); 4213 if (pushed_rcx) pop(rcx); 4214 if (pushed_rax) pop(rax); 4215 4216 if (set_cond_codes) { 4217 // Special hack for the AD files: rdi is guaranteed non-zero. 4218 assert(!pushed_rdi, "rdi must be left non-NULL"); 4219 // Also, the condition codes are properly set Z/NZ on succeed/failure. 4220 } 4221 4222 if (L_failure == &L_fallthrough) 4223 jccb(Assembler::notEqual, *L_failure); 4224 else jcc(Assembler::notEqual, *L_failure); 4225 4226 // Success. Cache the super we found and proceed in triumph. 4227 movptr(super_cache_addr, super_klass); 4228 4229 if (L_success != &L_fallthrough) { 4230 jmp(*L_success); 4231 } 4232 4233 #undef IS_A_TEMP 4234 4235 bind(L_fallthrough); 4236 } 4237 4238 4239 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 4240 if (VM_Version::supports_cmov()) { 4241 cmovl(cc, dst, src); 4242 } else { 4243 Label L; 4244 jccb(negate_condition(cc), L); 4245 movl(dst, src); 4246 bind(L); 4247 } 4248 } 4249 4250 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 4251 if (VM_Version::supports_cmov()) { 4252 cmovl(cc, dst, src); 4253 } else { 4254 Label L; 4255 jccb(negate_condition(cc), L); 4256 movl(dst, src); 4257 bind(L); 4258 } 4259 } 4260 4261 void MacroAssembler::verify_oop(Register reg, const char* s) { 4262 if (!VerifyOops) return; 4263 4264 // Pass register number to verify_oop_subroutine 4265 const char* b = NULL; 4266 { 4267 ResourceMark rm; 4268 stringStream ss; 4269 ss.print("verify_oop: %s: %s", reg->name(), s); 4270 b = code_string(ss.as_string()); 4271 } 4272 BLOCK_COMMENT("verify_oop {"); 4273 #ifdef _LP64 4274 push(rscratch1); // save r10, trashed by movptr() 4275 #endif 4276 push(rax); // save rax, 4277 push(reg); // pass register argument 4278 ExternalAddress buffer((address) b); 4279 // avoid using pushptr, as it modifies scratch registers 4280 // and our contract is not to modify anything 4281 movptr(rax, buffer.addr()); 4282 push(rax); 4283 // call indirectly to solve generation ordering problem 4284 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 4285 call(rax); 4286 // Caller pops the arguments (oop, message) and restores rax, r10 4287 BLOCK_COMMENT("} verify_oop"); 4288 } 4289 4290 4291 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 4292 Register tmp, 4293 int offset) { 4294 intptr_t value = *delayed_value_addr; 4295 if (value != 0) 4296 return RegisterOrConstant(value + offset); 4297 4298 // load indirectly to solve generation ordering problem 4299 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 4300 4301 #ifdef ASSERT 4302 { Label L; 4303 testptr(tmp, tmp); 4304 if (WizardMode) { 4305 const char* buf = NULL; 4306 { 4307 ResourceMark rm; 4308 stringStream ss; 4309 ss.print("DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 4310 buf = code_string(ss.as_string()); 4311 } 4312 jcc(Assembler::notZero, L); 4313 STOP(buf); 4314 } else { 4315 jccb(Assembler::notZero, L); 4316 hlt(); 4317 } 4318 bind(L); 4319 } 4320 #endif 4321 4322 if (offset != 0) 4323 addptr(tmp, offset); 4324 4325 return RegisterOrConstant(tmp); 4326 } 4327 4328 4329 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 4330 int extra_slot_offset) { 4331 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 4332 int stackElementSize = Interpreter::stackElementSize; 4333 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 4334 #ifdef ASSERT 4335 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 4336 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 4337 #endif 4338 Register scale_reg = noreg; 4339 Address::ScaleFactor scale_factor = Address::no_scale; 4340 if (arg_slot.is_constant()) { 4341 offset += arg_slot.as_constant() * stackElementSize; 4342 } else { 4343 scale_reg = arg_slot.as_register(); 4344 scale_factor = Address::times(stackElementSize); 4345 } 4346 offset += wordSize; // return PC is on stack 4347 return Address(rsp, scale_reg, scale_factor, offset); 4348 } 4349 4350 4351 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 4352 if (!VerifyOops) return; 4353 4354 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 4355 // Pass register number to verify_oop_subroutine 4356 const char* b = NULL; 4357 { 4358 ResourceMark rm; 4359 stringStream ss; 4360 ss.print("verify_oop_addr: %s", s); 4361 b = code_string(ss.as_string()); 4362 } 4363 #ifdef _LP64 4364 push(rscratch1); // save r10, trashed by movptr() 4365 #endif 4366 push(rax); // save rax, 4367 // addr may contain rsp so we will have to adjust it based on the push 4368 // we just did (and on 64 bit we do two pushes) 4369 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 4370 // stores rax into addr which is backwards of what was intended. 4371 if (addr.uses(rsp)) { 4372 lea(rax, addr); 4373 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 4374 } else { 4375 pushptr(addr); 4376 } 4377 4378 ExternalAddress buffer((address) b); 4379 // pass msg argument 4380 // avoid using pushptr, as it modifies scratch registers 4381 // and our contract is not to modify anything 4382 movptr(rax, buffer.addr()); 4383 push(rax); 4384 4385 // call indirectly to solve generation ordering problem 4386 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 4387 call(rax); 4388 // Caller pops the arguments (addr, message) and restores rax, r10. 4389 } 4390 4391 void MacroAssembler::verify_tlab() { 4392 #ifdef ASSERT 4393 if (UseTLAB && VerifyOops) { 4394 Label next, ok; 4395 Register t1 = rsi; 4396 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 4397 4398 push(t1); 4399 NOT_LP64(push(thread_reg)); 4400 NOT_LP64(get_thread(thread_reg)); 4401 4402 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 4403 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 4404 jcc(Assembler::aboveEqual, next); 4405 STOP("assert(top >= start)"); 4406 should_not_reach_here(); 4407 4408 bind(next); 4409 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 4410 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 4411 jcc(Assembler::aboveEqual, ok); 4412 STOP("assert(top <= end)"); 4413 should_not_reach_here(); 4414 4415 bind(ok); 4416 NOT_LP64(pop(thread_reg)); 4417 pop(t1); 4418 } 4419 #endif 4420 } 4421 4422 class ControlWord { 4423 public: 4424 int32_t _value; 4425 4426 int rounding_control() const { return (_value >> 10) & 3 ; } 4427 int precision_control() const { return (_value >> 8) & 3 ; } 4428 bool precision() const { return ((_value >> 5) & 1) != 0; } 4429 bool underflow() const { return ((_value >> 4) & 1) != 0; } 4430 bool overflow() const { return ((_value >> 3) & 1) != 0; } 4431 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 4432 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 4433 bool invalid() const { return ((_value >> 0) & 1) != 0; } 4434 4435 void print() const { 4436 // rounding control 4437 const char* rc; 4438 switch (rounding_control()) { 4439 case 0: rc = "round near"; break; 4440 case 1: rc = "round down"; break; 4441 case 2: rc = "round up "; break; 4442 case 3: rc = "chop "; break; 4443 }; 4444 // precision control 4445 const char* pc; 4446 switch (precision_control()) { 4447 case 0: pc = "24 bits "; break; 4448 case 1: pc = "reserved"; break; 4449 case 2: pc = "53 bits "; break; 4450 case 3: pc = "64 bits "; break; 4451 }; 4452 // flags 4453 char f[9]; 4454 f[0] = ' '; 4455 f[1] = ' '; 4456 f[2] = (precision ()) ? 'P' : 'p'; 4457 f[3] = (underflow ()) ? 'U' : 'u'; 4458 f[4] = (overflow ()) ? 'O' : 'o'; 4459 f[5] = (zero_divide ()) ? 'Z' : 'z'; 4460 f[6] = (denormalized()) ? 'D' : 'd'; 4461 f[7] = (invalid ()) ? 'I' : 'i'; 4462 f[8] = '\x0'; 4463 // output 4464 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 4465 } 4466 4467 }; 4468 4469 class StatusWord { 4470 public: 4471 int32_t _value; 4472 4473 bool busy() const { return ((_value >> 15) & 1) != 0; } 4474 bool C3() const { return ((_value >> 14) & 1) != 0; } 4475 bool C2() const { return ((_value >> 10) & 1) != 0; } 4476 bool C1() const { return ((_value >> 9) & 1) != 0; } 4477 bool C0() const { return ((_value >> 8) & 1) != 0; } 4478 int top() const { return (_value >> 11) & 7 ; } 4479 bool error_status() const { return ((_value >> 7) & 1) != 0; } 4480 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 4481 bool precision() const { return ((_value >> 5) & 1) != 0; } 4482 bool underflow() const { return ((_value >> 4) & 1) != 0; } 4483 bool overflow() const { return ((_value >> 3) & 1) != 0; } 4484 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 4485 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 4486 bool invalid() const { return ((_value >> 0) & 1) != 0; } 4487 4488 void print() const { 4489 // condition codes 4490 char c[5]; 4491 c[0] = (C3()) ? '3' : '-'; 4492 c[1] = (C2()) ? '2' : '-'; 4493 c[2] = (C1()) ? '1' : '-'; 4494 c[3] = (C0()) ? '0' : '-'; 4495 c[4] = '\x0'; 4496 // flags 4497 char f[9]; 4498 f[0] = (error_status()) ? 'E' : '-'; 4499 f[1] = (stack_fault ()) ? 'S' : '-'; 4500 f[2] = (precision ()) ? 'P' : '-'; 4501 f[3] = (underflow ()) ? 'U' : '-'; 4502 f[4] = (overflow ()) ? 'O' : '-'; 4503 f[5] = (zero_divide ()) ? 'Z' : '-'; 4504 f[6] = (denormalized()) ? 'D' : '-'; 4505 f[7] = (invalid ()) ? 'I' : '-'; 4506 f[8] = '\x0'; 4507 // output 4508 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 4509 } 4510 4511 }; 4512 4513 class TagWord { 4514 public: 4515 int32_t _value; 4516 4517 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 4518 4519 void print() const { 4520 printf("%04x", _value & 0xFFFF); 4521 } 4522 4523 }; 4524 4525 class FPU_Register { 4526 public: 4527 int32_t _m0; 4528 int32_t _m1; 4529 int16_t _ex; 4530 4531 bool is_indefinite() const { 4532 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 4533 } 4534 4535 void print() const { 4536 char sign = (_ex < 0) ? '-' : '+'; 4537 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 4538 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 4539 }; 4540 4541 }; 4542 4543 class FPU_State { 4544 public: 4545 enum { 4546 register_size = 10, 4547 number_of_registers = 8, 4548 register_mask = 7 4549 }; 4550 4551 ControlWord _control_word; 4552 StatusWord _status_word; 4553 TagWord _tag_word; 4554 int32_t _error_offset; 4555 int32_t _error_selector; 4556 int32_t _data_offset; 4557 int32_t _data_selector; 4558 int8_t _register[register_size * number_of_registers]; 4559 4560 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 4561 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 4562 4563 const char* tag_as_string(int tag) const { 4564 switch (tag) { 4565 case 0: return "valid"; 4566 case 1: return "zero"; 4567 case 2: return "special"; 4568 case 3: return "empty"; 4569 } 4570 ShouldNotReachHere(); 4571 return NULL; 4572 } 4573 4574 void print() const { 4575 // print computation registers 4576 { int t = _status_word.top(); 4577 for (int i = 0; i < number_of_registers; i++) { 4578 int j = (i - t) & register_mask; 4579 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 4580 st(j)->print(); 4581 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 4582 } 4583 } 4584 printf("\n"); 4585 // print control registers 4586 printf("ctrl = "); _control_word.print(); printf("\n"); 4587 printf("stat = "); _status_word .print(); printf("\n"); 4588 printf("tags = "); _tag_word .print(); printf("\n"); 4589 } 4590 4591 }; 4592 4593 class Flag_Register { 4594 public: 4595 int32_t _value; 4596 4597 bool overflow() const { return ((_value >> 11) & 1) != 0; } 4598 bool direction() const { return ((_value >> 10) & 1) != 0; } 4599 bool sign() const { return ((_value >> 7) & 1) != 0; } 4600 bool zero() const { return ((_value >> 6) & 1) != 0; } 4601 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 4602 bool parity() const { return ((_value >> 2) & 1) != 0; } 4603 bool carry() const { return ((_value >> 0) & 1) != 0; } 4604 4605 void print() const { 4606 // flags 4607 char f[8]; 4608 f[0] = (overflow ()) ? 'O' : '-'; 4609 f[1] = (direction ()) ? 'D' : '-'; 4610 f[2] = (sign ()) ? 'S' : '-'; 4611 f[3] = (zero ()) ? 'Z' : '-'; 4612 f[4] = (auxiliary_carry()) ? 'A' : '-'; 4613 f[5] = (parity ()) ? 'P' : '-'; 4614 f[6] = (carry ()) ? 'C' : '-'; 4615 f[7] = '\x0'; 4616 // output 4617 printf("%08x flags = %s", _value, f); 4618 } 4619 4620 }; 4621 4622 class IU_Register { 4623 public: 4624 int32_t _value; 4625 4626 void print() const { 4627 printf("%08x %11d", _value, _value); 4628 } 4629 4630 }; 4631 4632 class IU_State { 4633 public: 4634 Flag_Register _eflags; 4635 IU_Register _rdi; 4636 IU_Register _rsi; 4637 IU_Register _rbp; 4638 IU_Register _rsp; 4639 IU_Register _rbx; 4640 IU_Register _rdx; 4641 IU_Register _rcx; 4642 IU_Register _rax; 4643 4644 void print() const { 4645 // computation registers 4646 printf("rax, = "); _rax.print(); printf("\n"); 4647 printf("rbx, = "); _rbx.print(); printf("\n"); 4648 printf("rcx = "); _rcx.print(); printf("\n"); 4649 printf("rdx = "); _rdx.print(); printf("\n"); 4650 printf("rdi = "); _rdi.print(); printf("\n"); 4651 printf("rsi = "); _rsi.print(); printf("\n"); 4652 printf("rbp, = "); _rbp.print(); printf("\n"); 4653 printf("rsp = "); _rsp.print(); printf("\n"); 4654 printf("\n"); 4655 // control registers 4656 printf("flgs = "); _eflags.print(); printf("\n"); 4657 } 4658 }; 4659 4660 4661 class CPU_State { 4662 public: 4663 FPU_State _fpu_state; 4664 IU_State _iu_state; 4665 4666 void print() const { 4667 printf("--------------------------------------------------\n"); 4668 _iu_state .print(); 4669 printf("\n"); 4670 _fpu_state.print(); 4671 printf("--------------------------------------------------\n"); 4672 } 4673 4674 }; 4675 4676 4677 static void _print_CPU_state(CPU_State* state) { 4678 state->print(); 4679 }; 4680 4681 4682 void MacroAssembler::print_CPU_state() { 4683 push_CPU_state(); 4684 push(rsp); // pass CPU state 4685 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 4686 addptr(rsp, wordSize); // discard argument 4687 pop_CPU_state(); 4688 } 4689 4690 4691 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 4692 static int counter = 0; 4693 FPU_State* fs = &state->_fpu_state; 4694 counter++; 4695 // For leaf calls, only verify that the top few elements remain empty. 4696 // We only need 1 empty at the top for C2 code. 4697 if( stack_depth < 0 ) { 4698 if( fs->tag_for_st(7) != 3 ) { 4699 printf("FPR7 not empty\n"); 4700 state->print(); 4701 assert(false, "error"); 4702 return false; 4703 } 4704 return true; // All other stack states do not matter 4705 } 4706 4707 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 4708 "bad FPU control word"); 4709 4710 // compute stack depth 4711 int i = 0; 4712 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 4713 int d = i; 4714 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 4715 // verify findings 4716 if (i != FPU_State::number_of_registers) { 4717 // stack not contiguous 4718 printf("%s: stack not contiguous at ST%d\n", s, i); 4719 state->print(); 4720 assert(false, "error"); 4721 return false; 4722 } 4723 // check if computed stack depth corresponds to expected stack depth 4724 if (stack_depth < 0) { 4725 // expected stack depth is -stack_depth or less 4726 if (d > -stack_depth) { 4727 // too many elements on the stack 4728 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 4729 state->print(); 4730 assert(false, "error"); 4731 return false; 4732 } 4733 } else { 4734 // expected stack depth is stack_depth 4735 if (d != stack_depth) { 4736 // wrong stack depth 4737 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 4738 state->print(); 4739 assert(false, "error"); 4740 return false; 4741 } 4742 } 4743 // everything is cool 4744 return true; 4745 } 4746 4747 4748 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 4749 if (!VerifyFPU) return; 4750 push_CPU_state(); 4751 push(rsp); // pass CPU state 4752 ExternalAddress msg((address) s); 4753 // pass message string s 4754 pushptr(msg.addr()); 4755 push(stack_depth); // pass stack depth 4756 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 4757 addptr(rsp, 3 * wordSize); // discard arguments 4758 // check for error 4759 { Label L; 4760 testl(rax, rax); 4761 jcc(Assembler::notZero, L); 4762 int3(); // break if error condition 4763 bind(L); 4764 } 4765 pop_CPU_state(); 4766 } 4767 4768 void MacroAssembler::load_klass(Register dst, Register src) { 4769 #ifdef _LP64 4770 if (UseCompressedKlassPointers) { 4771 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4772 decode_klass_not_null(dst); 4773 } else 4774 #endif 4775 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4776 } 4777 4778 void MacroAssembler::load_prototype_header(Register dst, Register src) { 4779 #ifdef _LP64 4780 if (UseCompressedKlassPointers) { 4781 assert (Universe::heap() != NULL, "java heap should be initialized"); 4782 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4783 if (Universe::narrow_klass_shift() != 0) { 4784 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 4785 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 4786 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 4787 } else { 4788 movq(dst, Address(dst, Klass::prototype_header_offset())); 4789 } 4790 } else 4791 #endif 4792 { 4793 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4794 movptr(dst, Address(dst, Klass::prototype_header_offset())); 4795 } 4796 } 4797 4798 void MacroAssembler::store_klass(Register dst, Register src) { 4799 #ifdef _LP64 4800 if (UseCompressedKlassPointers) { 4801 encode_klass_not_null(src); 4802 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 4803 } else 4804 #endif 4805 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 4806 } 4807 4808 void MacroAssembler::load_heap_oop(Register dst, Address src) { 4809 #ifdef _LP64 4810 // FIXME: Must change all places where we try to load the klass. 4811 if (UseCompressedOops) { 4812 movl(dst, src); 4813 decode_heap_oop(dst); 4814 } else 4815 #endif 4816 movptr(dst, src); 4817 } 4818 4819 // Doesn't do verfication, generates fixed size code 4820 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 4821 #ifdef _LP64 4822 if (UseCompressedOops) { 4823 movl(dst, src); 4824 decode_heap_oop_not_null(dst); 4825 } else 4826 #endif 4827 movptr(dst, src); 4828 } 4829 4830 void MacroAssembler::store_heap_oop(Address dst, Register src) { 4831 #ifdef _LP64 4832 if (UseCompressedOops) { 4833 assert(!dst.uses(src), "not enough registers"); 4834 encode_heap_oop(src); 4835 movl(dst, src); 4836 } else 4837 #endif 4838 movptr(dst, src); 4839 } 4840 4841 void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { 4842 assert_different_registers(src1, tmp); 4843 #ifdef _LP64 4844 if (UseCompressedOops) { 4845 bool did_push = false; 4846 if (tmp == noreg) { 4847 tmp = rax; 4848 push(tmp); 4849 did_push = true; 4850 assert(!src2.uses(rsp), "can't push"); 4851 } 4852 load_heap_oop(tmp, src2); 4853 cmpptr(src1, tmp); 4854 if (did_push) pop(tmp); 4855 } else 4856 #endif 4857 cmpptr(src1, src2); 4858 } 4859 4860 // Used for storing NULLs. 4861 void MacroAssembler::store_heap_oop_null(Address dst) { 4862 #ifdef _LP64 4863 if (UseCompressedOops) { 4864 movl(dst, (int32_t)NULL_WORD); 4865 } else { 4866 movslq(dst, (int32_t)NULL_WORD); 4867 } 4868 #else 4869 movl(dst, (int32_t)NULL_WORD); 4870 #endif 4871 } 4872 4873 #ifdef _LP64 4874 void MacroAssembler::store_klass_gap(Register dst, Register src) { 4875 if (UseCompressedKlassPointers) { 4876 // Store to klass gap in destination 4877 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 4878 } 4879 } 4880 4881 #ifdef ASSERT 4882 void MacroAssembler::verify_heapbase(const char* msg) { 4883 assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed"); 4884 assert (Universe::heap() != NULL, "java heap should be initialized"); 4885 if (CheckCompressedOops) { 4886 Label ok; 4887 push(rscratch1); // cmpptr trashes rscratch1 4888 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 4889 jcc(Assembler::equal, ok); 4890 STOP(msg); 4891 bind(ok); 4892 pop(rscratch1); 4893 } 4894 } 4895 #endif 4896 4897 // Algorithm must match oop.inline.hpp encode_heap_oop. 4898 void MacroAssembler::encode_heap_oop(Register r) { 4899 #ifdef ASSERT 4900 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 4901 #endif 4902 verify_oop(r, "broken oop in encode_heap_oop"); 4903 if (Universe::narrow_oop_base() == NULL) { 4904 if (Universe::narrow_oop_shift() != 0) { 4905 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4906 shrq(r, LogMinObjAlignmentInBytes); 4907 } 4908 return; 4909 } 4910 testq(r, r); 4911 cmovq(Assembler::equal, r, r12_heapbase); 4912 subq(r, r12_heapbase); 4913 shrq(r, LogMinObjAlignmentInBytes); 4914 } 4915 4916 void MacroAssembler::encode_heap_oop_not_null(Register r) { 4917 #ifdef ASSERT 4918 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 4919 if (CheckCompressedOops) { 4920 Label ok; 4921 testq(r, r); 4922 jcc(Assembler::notEqual, ok); 4923 STOP("null oop passed to encode_heap_oop_not_null"); 4924 bind(ok); 4925 } 4926 #endif 4927 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 4928 if (Universe::narrow_oop_base() != NULL) { 4929 subq(r, r12_heapbase); 4930 } 4931 if (Universe::narrow_oop_shift() != 0) { 4932 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4933 shrq(r, LogMinObjAlignmentInBytes); 4934 } 4935 } 4936 4937 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 4938 #ifdef ASSERT 4939 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 4940 if (CheckCompressedOops) { 4941 Label ok; 4942 testq(src, src); 4943 jcc(Assembler::notEqual, ok); 4944 STOP("null oop passed to encode_heap_oop_not_null2"); 4945 bind(ok); 4946 } 4947 #endif 4948 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 4949 if (dst != src) { 4950 movq(dst, src); 4951 } 4952 if (Universe::narrow_oop_base() != NULL) { 4953 subq(dst, r12_heapbase); 4954 } 4955 if (Universe::narrow_oop_shift() != 0) { 4956 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4957 shrq(dst, LogMinObjAlignmentInBytes); 4958 } 4959 } 4960 4961 void MacroAssembler::decode_heap_oop(Register r) { 4962 #ifdef ASSERT 4963 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 4964 #endif 4965 if (Universe::narrow_oop_base() == NULL) { 4966 if (Universe::narrow_oop_shift() != 0) { 4967 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4968 shlq(r, LogMinObjAlignmentInBytes); 4969 } 4970 } else { 4971 Label done; 4972 shlq(r, LogMinObjAlignmentInBytes); 4973 jccb(Assembler::equal, done); 4974 addq(r, r12_heapbase); 4975 bind(done); 4976 } 4977 verify_oop(r, "broken oop in decode_heap_oop"); 4978 } 4979 4980 void MacroAssembler::decode_heap_oop_not_null(Register r) { 4981 // Note: it will change flags 4982 assert (UseCompressedOops, "should only be used for compressed headers"); 4983 assert (Universe::heap() != NULL, "java heap should be initialized"); 4984 // Cannot assert, unverified entry point counts instructions (see .ad file) 4985 // vtableStubs also counts instructions in pd_code_size_limit. 4986 // Also do not verify_oop as this is called by verify_oop. 4987 if (Universe::narrow_oop_shift() != 0) { 4988 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4989 shlq(r, LogMinObjAlignmentInBytes); 4990 if (Universe::narrow_oop_base() != NULL) { 4991 addq(r, r12_heapbase); 4992 } 4993 } else { 4994 assert (Universe::narrow_oop_base() == NULL, "sanity"); 4995 } 4996 } 4997 4998 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 4999 // Note: it will change flags 5000 assert (UseCompressedOops, "should only be used for compressed headers"); 5001 assert (Universe::heap() != NULL, "java heap should be initialized"); 5002 // Cannot assert, unverified entry point counts instructions (see .ad file) 5003 // vtableStubs also counts instructions in pd_code_size_limit. 5004 // Also do not verify_oop as this is called by verify_oop. 5005 if (Universe::narrow_oop_shift() != 0) { 5006 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 5007 if (LogMinObjAlignmentInBytes == Address::times_8) { 5008 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 5009 } else { 5010 if (dst != src) { 5011 movq(dst, src); 5012 } 5013 shlq(dst, LogMinObjAlignmentInBytes); 5014 if (Universe::narrow_oop_base() != NULL) { 5015 addq(dst, r12_heapbase); 5016 } 5017 } 5018 } else { 5019 assert (Universe::narrow_oop_base() == NULL, "sanity"); 5020 if (dst != src) { 5021 movq(dst, src); 5022 } 5023 } 5024 } 5025 5026 void MacroAssembler::encode_klass_not_null(Register r) { 5027 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5028 #ifdef ASSERT 5029 verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?"); 5030 #endif 5031 if (Universe::narrow_klass_base() != NULL) { 5032 subq(r, r12_heapbase); 5033 } 5034 if (Universe::narrow_klass_shift() != 0) { 5035 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5036 shrq(r, LogKlassAlignmentInBytes); 5037 } 5038 } 5039 5040 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 5041 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5042 #ifdef ASSERT 5043 verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); 5044 #endif 5045 if (dst != src) { 5046 movq(dst, src); 5047 } 5048 if (Universe::narrow_klass_base() != NULL) { 5049 subq(dst, r12_heapbase); 5050 } 5051 if (Universe::narrow_klass_shift() != 0) { 5052 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5053 shrq(dst, LogKlassAlignmentInBytes); 5054 } 5055 } 5056 5057 void MacroAssembler::decode_klass_not_null(Register r) { 5058 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5059 // Note: it will change flags 5060 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5061 // Cannot assert, unverified entry point counts instructions (see .ad file) 5062 // vtableStubs also counts instructions in pd_code_size_limit. 5063 // Also do not verify_oop as this is called by verify_oop. 5064 if (Universe::narrow_klass_shift() != 0) { 5065 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5066 shlq(r, LogKlassAlignmentInBytes); 5067 if (Universe::narrow_klass_base() != NULL) { 5068 addq(r, r12_heapbase); 5069 } 5070 } else { 5071 assert (Universe::narrow_klass_base() == NULL, "sanity"); 5072 } 5073 } 5074 5075 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 5076 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5077 // Note: it will change flags 5078 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5079 // Cannot assert, unverified entry point counts instructions (see .ad file) 5080 // vtableStubs also counts instructions in pd_code_size_limit. 5081 // Also do not verify_oop as this is called by verify_oop. 5082 if (Universe::narrow_klass_shift() != 0) { 5083 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5084 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 5085 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 5086 } else { 5087 assert (Universe::narrow_klass_base() == NULL, "sanity"); 5088 if (dst != src) { 5089 movq(dst, src); 5090 } 5091 } 5092 } 5093 5094 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 5095 assert (UseCompressedOops, "should only be used for compressed headers"); 5096 assert (Universe::heap() != NULL, "java heap should be initialized"); 5097 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5098 int oop_index = oop_recorder()->find_index(obj); 5099 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5100 mov_narrow_oop(dst, oop_index, rspec); 5101 } 5102 5103 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 5104 assert (UseCompressedOops, "should only be used for compressed headers"); 5105 assert (Universe::heap() != NULL, "java heap should be initialized"); 5106 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5107 int oop_index = oop_recorder()->find_index(obj); 5108 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5109 mov_narrow_oop(dst, oop_index, rspec); 5110 } 5111 5112 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 5113 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5114 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5115 int klass_index = oop_recorder()->find_index(k); 5116 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5117 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5118 } 5119 5120 void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { 5121 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5122 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5123 int klass_index = oop_recorder()->find_index(k); 5124 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5125 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5126 } 5127 5128 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 5129 assert (UseCompressedOops, "should only be used for compressed headers"); 5130 assert (Universe::heap() != NULL, "java heap should be initialized"); 5131 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5132 int oop_index = oop_recorder()->find_index(obj); 5133 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5134 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 5135 } 5136 5137 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 5138 assert (UseCompressedOops, "should only be used for compressed headers"); 5139 assert (Universe::heap() != NULL, "java heap should be initialized"); 5140 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5141 int oop_index = oop_recorder()->find_index(obj); 5142 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5143 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 5144 } 5145 5146 void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { 5147 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5148 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5149 int klass_index = oop_recorder()->find_index(k); 5150 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5151 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5152 } 5153 5154 void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { 5155 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5156 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5157 int klass_index = oop_recorder()->find_index(k); 5158 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5159 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5160 } 5161 5162 void MacroAssembler::reinit_heapbase() { 5163 if (UseCompressedOops || UseCompressedKlassPointers) { 5164 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 5165 } 5166 } 5167 #endif // _LP64 5168 5169 5170 // C2 compiled method's prolog code. 5171 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 5172 5173 // WARNING: Initial instruction MUST be 5 bytes or longer so that 5174 // NativeJump::patch_verified_entry will be able to patch out the entry 5175 // code safely. The push to verify stack depth is ok at 5 bytes, 5176 // the frame allocation can be either 3 or 6 bytes. So if we don't do 5177 // stack bang then we must use the 6 byte frame allocation even if 5178 // we have no frame. :-( 5179 5180 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 5181 // Remove word for return addr 5182 framesize -= wordSize; 5183 5184 // Calls to C2R adapters often do not accept exceptional returns. 5185 // We require that their callers must bang for them. But be careful, because 5186 // some VM calls (such as call site linkage) can use several kilobytes of 5187 // stack. But the stack safety zone should account for that. 5188 // See bugs 4446381, 4468289, 4497237. 5189 if (stack_bang) { 5190 generate_stack_overflow_check(framesize); 5191 5192 // We always push rbp, so that on return to interpreter rbp, will be 5193 // restored correctly and we can correct the stack. 5194 push(rbp); 5195 // Remove word for ebp 5196 framesize -= wordSize; 5197 5198 // Create frame 5199 if (framesize) { 5200 subptr(rsp, framesize); 5201 } 5202 } else { 5203 // Create frame (force generation of a 4 byte immediate value) 5204 subptr_imm32(rsp, framesize); 5205 5206 // Save RBP register now. 5207 framesize -= wordSize; 5208 movptr(Address(rsp, framesize), rbp); 5209 } 5210 5211 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 5212 framesize -= wordSize; 5213 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 5214 } 5215 5216 #ifndef _LP64 5217 // If method sets FPU control word do it now 5218 if (fp_mode_24b) { 5219 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 5220 } 5221 if (UseSSE >= 2 && VerifyFPU) { 5222 verify_FPU(0, "FPU stack must be clean on entry"); 5223 } 5224 #endif 5225 5226 #ifdef ASSERT 5227 if (VerifyStackAtCalls) { 5228 Label L; 5229 push(rax); 5230 mov(rax, rsp); 5231 andptr(rax, StackAlignmentInBytes-1); 5232 cmpptr(rax, StackAlignmentInBytes-wordSize); 5233 pop(rax); 5234 jcc(Assembler::equal, L); 5235 STOP("Stack is not properly aligned!"); 5236 bind(L); 5237 } 5238 #endif 5239 5240 } 5241 5242 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) { 5243 // cnt - number of qwords (8-byte words). 5244 // base - start address, qword aligned. 5245 assert(base==rdi, "base register must be edi for rep stos"); 5246 assert(tmp==rax, "tmp register must be eax for rep stos"); 5247 assert(cnt==rcx, "cnt register must be ecx for rep stos"); 5248 5249 xorptr(tmp, tmp); 5250 if (UseFastStosb) { 5251 shlptr(cnt,3); // convert to number of bytes 5252 rep_stosb(); 5253 } else { 5254 NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM 5255 rep_stos(); 5256 } 5257 } 5258 5259 // IndexOf for constant substrings with size >= 8 chars 5260 // which don't need to be loaded through stack. 5261 void MacroAssembler::string_indexofC8(Register str1, Register str2, 5262 Register cnt1, Register cnt2, 5263 int int_cnt2, Register result, 5264 XMMRegister vec, Register tmp) { 5265 ShortBranchVerifier sbv(this); 5266 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 5267 5268 // This method uses pcmpestri inxtruction with bound registers 5269 // inputs: 5270 // xmm - substring 5271 // rax - substring length (elements count) 5272 // mem - scanned string 5273 // rdx - string length (elements count) 5274 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 5275 // outputs: 5276 // rcx - matched index in string 5277 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5278 5279 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 5280 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 5281 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 5282 5283 // Note, inline_string_indexOf() generates checks: 5284 // if (substr.count > string.count) return -1; 5285 // if (substr.count == 0) return 0; 5286 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 5287 5288 // Load substring. 5289 movdqu(vec, Address(str2, 0)); 5290 movl(cnt2, int_cnt2); 5291 movptr(result, str1); // string addr 5292 5293 if (int_cnt2 > 8) { 5294 jmpb(SCAN_TO_SUBSTR); 5295 5296 // Reload substr for rescan, this code 5297 // is executed only for large substrings (> 8 chars) 5298 bind(RELOAD_SUBSTR); 5299 movdqu(vec, Address(str2, 0)); 5300 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 5301 5302 bind(RELOAD_STR); 5303 // We came here after the beginning of the substring was 5304 // matched but the rest of it was not so we need to search 5305 // again. Start from the next element after the previous match. 5306 5307 // cnt2 is number of substring reminding elements and 5308 // cnt1 is number of string reminding elements when cmp failed. 5309 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 5310 subl(cnt1, cnt2); 5311 addl(cnt1, int_cnt2); 5312 movl(cnt2, int_cnt2); // Now restore cnt2 5313 5314 decrementl(cnt1); // Shift to next element 5315 cmpl(cnt1, cnt2); 5316 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5317 5318 addptr(result, 2); 5319 5320 } // (int_cnt2 > 8) 5321 5322 // Scan string for start of substr in 16-byte vectors 5323 bind(SCAN_TO_SUBSTR); 5324 pcmpestri(vec, Address(result, 0), 0x0d); 5325 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 5326 subl(cnt1, 8); 5327 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 5328 cmpl(cnt1, cnt2); 5329 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5330 addptr(result, 16); 5331 jmpb(SCAN_TO_SUBSTR); 5332 5333 // Found a potential substr 5334 bind(FOUND_CANDIDATE); 5335 // Matched whole vector if first element matched (tmp(rcx) == 0). 5336 if (int_cnt2 == 8) { 5337 jccb(Assembler::overflow, RET_FOUND); // OF == 1 5338 } else { // int_cnt2 > 8 5339 jccb(Assembler::overflow, FOUND_SUBSTR); 5340 } 5341 // After pcmpestri tmp(rcx) contains matched element index 5342 // Compute start addr of substr 5343 lea(result, Address(result, tmp, Address::times_2)); 5344 5345 // Make sure string is still long enough 5346 subl(cnt1, tmp); 5347 cmpl(cnt1, cnt2); 5348 if (int_cnt2 == 8) { 5349 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 5350 } else { // int_cnt2 > 8 5351 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 5352 } 5353 // Left less then substring. 5354 5355 bind(RET_NOT_FOUND); 5356 movl(result, -1); 5357 jmpb(EXIT); 5358 5359 if (int_cnt2 > 8) { 5360 // This code is optimized for the case when whole substring 5361 // is matched if its head is matched. 5362 bind(MATCH_SUBSTR_HEAD); 5363 pcmpestri(vec, Address(result, 0), 0x0d); 5364 // Reload only string if does not match 5365 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 5366 5367 Label CONT_SCAN_SUBSTR; 5368 // Compare the rest of substring (> 8 chars). 5369 bind(FOUND_SUBSTR); 5370 // First 8 chars are already matched. 5371 negptr(cnt2); 5372 addptr(cnt2, 8); 5373 5374 bind(SCAN_SUBSTR); 5375 subl(cnt1, 8); 5376 cmpl(cnt2, -8); // Do not read beyond substring 5377 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 5378 // Back-up strings to avoid reading beyond substring: 5379 // cnt1 = cnt1 - cnt2 + 8 5380 addl(cnt1, cnt2); // cnt2 is negative 5381 addl(cnt1, 8); 5382 movl(cnt2, 8); negptr(cnt2); 5383 bind(CONT_SCAN_SUBSTR); 5384 if (int_cnt2 < (int)G) { 5385 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 5386 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 5387 } else { 5388 // calculate index in register to avoid integer overflow (int_cnt2*2) 5389 movl(tmp, int_cnt2); 5390 addptr(tmp, cnt2); 5391 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 5392 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 5393 } 5394 // Need to reload strings pointers if not matched whole vector 5395 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 5396 addptr(cnt2, 8); 5397 jcc(Assembler::negative, SCAN_SUBSTR); 5398 // Fall through if found full substring 5399 5400 } // (int_cnt2 > 8) 5401 5402 bind(RET_FOUND); 5403 // Found result if we matched full small substring. 5404 // Compute substr offset 5405 subptr(result, str1); 5406 shrl(result, 1); // index 5407 bind(EXIT); 5408 5409 } // string_indexofC8 5410 5411 // Small strings are loaded through stack if they cross page boundary. 5412 void MacroAssembler::string_indexof(Register str1, Register str2, 5413 Register cnt1, Register cnt2, 5414 int int_cnt2, Register result, 5415 XMMRegister vec, Register tmp) { 5416 ShortBranchVerifier sbv(this); 5417 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 5418 // 5419 // int_cnt2 is length of small (< 8 chars) constant substring 5420 // or (-1) for non constant substring in which case its length 5421 // is in cnt2 register. 5422 // 5423 // Note, inline_string_indexOf() generates checks: 5424 // if (substr.count > string.count) return -1; 5425 // if (substr.count == 0) return 0; 5426 // 5427 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 5428 5429 // This method uses pcmpestri inxtruction with bound registers 5430 // inputs: 5431 // xmm - substring 5432 // rax - substring length (elements count) 5433 // mem - scanned string 5434 // rdx - string length (elements count) 5435 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 5436 // outputs: 5437 // rcx - matched index in string 5438 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5439 5440 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 5441 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 5442 FOUND_CANDIDATE; 5443 5444 { //======================================================== 5445 // We don't know where these strings are located 5446 // and we can't read beyond them. Load them through stack. 5447 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 5448 5449 movptr(tmp, rsp); // save old SP 5450 5451 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 5452 if (int_cnt2 == 1) { // One char 5453 load_unsigned_short(result, Address(str2, 0)); 5454 movdl(vec, result); // move 32 bits 5455 } else if (int_cnt2 == 2) { // Two chars 5456 movdl(vec, Address(str2, 0)); // move 32 bits 5457 } else if (int_cnt2 == 4) { // Four chars 5458 movq(vec, Address(str2, 0)); // move 64 bits 5459 } else { // cnt2 = { 3, 5, 6, 7 } 5460 // Array header size is 12 bytes in 32-bit VM 5461 // + 6 bytes for 3 chars == 18 bytes, 5462 // enough space to load vec and shift. 5463 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); 5464 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 5465 psrldq(vec, 16-(int_cnt2*2)); 5466 } 5467 } else { // not constant substring 5468 cmpl(cnt2, 8); 5469 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 5470 5471 // We can read beyond string if srt+16 does not cross page boundary 5472 // since heaps are aligned and mapped by pages. 5473 assert(os::vm_page_size() < (int)G, "default page should be small"); 5474 movl(result, str2); // We need only low 32 bits 5475 andl(result, (os::vm_page_size()-1)); 5476 cmpl(result, (os::vm_page_size()-16)); 5477 jccb(Assembler::belowEqual, CHECK_STR); 5478 5479 // Move small strings to stack to allow load 16 bytes into vec. 5480 subptr(rsp, 16); 5481 int stk_offset = wordSize-2; 5482 push(cnt2); 5483 5484 bind(COPY_SUBSTR); 5485 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 5486 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 5487 decrement(cnt2); 5488 jccb(Assembler::notZero, COPY_SUBSTR); 5489 5490 pop(cnt2); 5491 movptr(str2, rsp); // New substring address 5492 } // non constant 5493 5494 bind(CHECK_STR); 5495 cmpl(cnt1, 8); 5496 jccb(Assembler::aboveEqual, BIG_STRINGS); 5497 5498 // Check cross page boundary. 5499 movl(result, str1); // We need only low 32 bits 5500 andl(result, (os::vm_page_size()-1)); 5501 cmpl(result, (os::vm_page_size()-16)); 5502 jccb(Assembler::belowEqual, BIG_STRINGS); 5503 5504 subptr(rsp, 16); 5505 int stk_offset = -2; 5506 if (int_cnt2 < 0) { // not constant 5507 push(cnt2); 5508 stk_offset += wordSize; 5509 } 5510 movl(cnt2, cnt1); 5511 5512 bind(COPY_STR); 5513 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 5514 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 5515 decrement(cnt2); 5516 jccb(Assembler::notZero, COPY_STR); 5517 5518 if (int_cnt2 < 0) { // not constant 5519 pop(cnt2); 5520 } 5521 movptr(str1, rsp); // New string address 5522 5523 bind(BIG_STRINGS); 5524 // Load substring. 5525 if (int_cnt2 < 0) { // -1 5526 movdqu(vec, Address(str2, 0)); 5527 push(cnt2); // substr count 5528 push(str2); // substr addr 5529 push(str1); // string addr 5530 } else { 5531 // Small (< 8 chars) constant substrings are loaded already. 5532 movl(cnt2, int_cnt2); 5533 } 5534 push(tmp); // original SP 5535 5536 } // Finished loading 5537 5538 //======================================================== 5539 // Start search 5540 // 5541 5542 movptr(result, str1); // string addr 5543 5544 if (int_cnt2 < 0) { // Only for non constant substring 5545 jmpb(SCAN_TO_SUBSTR); 5546 5547 // SP saved at sp+0 5548 // String saved at sp+1*wordSize 5549 // Substr saved at sp+2*wordSize 5550 // Substr count saved at sp+3*wordSize 5551 5552 // Reload substr for rescan, this code 5553 // is executed only for large substrings (> 8 chars) 5554 bind(RELOAD_SUBSTR); 5555 movptr(str2, Address(rsp, 2*wordSize)); 5556 movl(cnt2, Address(rsp, 3*wordSize)); 5557 movdqu(vec, Address(str2, 0)); 5558 // We came here after the beginning of the substring was 5559 // matched but the rest of it was not so we need to search 5560 // again. Start from the next element after the previous match. 5561 subptr(str1, result); // Restore counter 5562 shrl(str1, 1); 5563 addl(cnt1, str1); 5564 decrementl(cnt1); // Shift to next element 5565 cmpl(cnt1, cnt2); 5566 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5567 5568 addptr(result, 2); 5569 } // non constant 5570 5571 // Scan string for start of substr in 16-byte vectors 5572 bind(SCAN_TO_SUBSTR); 5573 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5574 pcmpestri(vec, Address(result, 0), 0x0d); 5575 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 5576 subl(cnt1, 8); 5577 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 5578 cmpl(cnt1, cnt2); 5579 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5580 addptr(result, 16); 5581 5582 bind(ADJUST_STR); 5583 cmpl(cnt1, 8); // Do not read beyond string 5584 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 5585 // Back-up string to avoid reading beyond string. 5586 lea(result, Address(result, cnt1, Address::times_2, -16)); 5587 movl(cnt1, 8); 5588 jmpb(SCAN_TO_SUBSTR); 5589 5590 // Found a potential substr 5591 bind(FOUND_CANDIDATE); 5592 // After pcmpestri tmp(rcx) contains matched element index 5593 5594 // Make sure string is still long enough 5595 subl(cnt1, tmp); 5596 cmpl(cnt1, cnt2); 5597 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 5598 // Left less then substring. 5599 5600 bind(RET_NOT_FOUND); 5601 movl(result, -1); 5602 jmpb(CLEANUP); 5603 5604 bind(FOUND_SUBSTR); 5605 // Compute start addr of substr 5606 lea(result, Address(result, tmp, Address::times_2)); 5607 5608 if (int_cnt2 > 0) { // Constant substring 5609 // Repeat search for small substring (< 8 chars) 5610 // from new point without reloading substring. 5611 // Have to check that we don't read beyond string. 5612 cmpl(tmp, 8-int_cnt2); 5613 jccb(Assembler::greater, ADJUST_STR); 5614 // Fall through if matched whole substring. 5615 } else { // non constant 5616 assert(int_cnt2 == -1, "should be != 0"); 5617 5618 addl(tmp, cnt2); 5619 // Found result if we matched whole substring. 5620 cmpl(tmp, 8); 5621 jccb(Assembler::lessEqual, RET_FOUND); 5622 5623 // Repeat search for small substring (<= 8 chars) 5624 // from new point 'str1' without reloading substring. 5625 cmpl(cnt2, 8); 5626 // Have to check that we don't read beyond string. 5627 jccb(Assembler::lessEqual, ADJUST_STR); 5628 5629 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 5630 // Compare the rest of substring (> 8 chars). 5631 movptr(str1, result); 5632 5633 cmpl(tmp, cnt2); 5634 // First 8 chars are already matched. 5635 jccb(Assembler::equal, CHECK_NEXT); 5636 5637 bind(SCAN_SUBSTR); 5638 pcmpestri(vec, Address(str1, 0), 0x0d); 5639 // Need to reload strings pointers if not matched whole vector 5640 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 5641 5642 bind(CHECK_NEXT); 5643 subl(cnt2, 8); 5644 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 5645 addptr(str1, 16); 5646 addptr(str2, 16); 5647 subl(cnt1, 8); 5648 cmpl(cnt2, 8); // Do not read beyond substring 5649 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 5650 // Back-up strings to avoid reading beyond substring. 5651 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 5652 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 5653 subl(cnt1, cnt2); 5654 movl(cnt2, 8); 5655 addl(cnt1, 8); 5656 bind(CONT_SCAN_SUBSTR); 5657 movdqu(vec, Address(str2, 0)); 5658 jmpb(SCAN_SUBSTR); 5659 5660 bind(RET_FOUND_LONG); 5661 movptr(str1, Address(rsp, wordSize)); 5662 } // non constant 5663 5664 bind(RET_FOUND); 5665 // Compute substr offset 5666 subptr(result, str1); 5667 shrl(result, 1); // index 5668 5669 bind(CLEANUP); 5670 pop(rsp); // restore SP 5671 5672 } // string_indexof 5673 5674 // Compare strings. 5675 void MacroAssembler::string_compare(Register str1, Register str2, 5676 Register cnt1, Register cnt2, Register result, 5677 XMMRegister vec1) { 5678 ShortBranchVerifier sbv(this); 5679 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 5680 5681 // Compute the minimum of the string lengths and the 5682 // difference of the string lengths (stack). 5683 // Do the conditional move stuff 5684 movl(result, cnt1); 5685 subl(cnt1, cnt2); 5686 push(cnt1); 5687 cmov32(Assembler::lessEqual, cnt2, result); 5688 5689 // Is the minimum length zero? 5690 testl(cnt2, cnt2); 5691 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 5692 5693 // Compare first characters 5694 load_unsigned_short(result, Address(str1, 0)); 5695 load_unsigned_short(cnt1, Address(str2, 0)); 5696 subl(result, cnt1); 5697 jcc(Assembler::notZero, POP_LABEL); 5698 cmpl(cnt2, 1); 5699 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 5700 5701 // Check if the strings start at the same location. 5702 cmpptr(str1, str2); 5703 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 5704 5705 Address::ScaleFactor scale = Address::times_2; 5706 int stride = 8; 5707 5708 if (UseAVX >= 2 && UseSSE42Intrinsics) { 5709 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; 5710 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; 5711 Label COMPARE_TAIL_LONG; 5712 int pcmpmask = 0x19; 5713 5714 // Setup to compare 16-chars (32-bytes) vectors, 5715 // start from first character again because it has aligned address. 5716 int stride2 = 16; 5717 int adr_stride = stride << scale; 5718 int adr_stride2 = stride2 << scale; 5719 5720 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 5721 // rax and rdx are used by pcmpestri as elements counters 5722 movl(result, cnt2); 5723 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count 5724 jcc(Assembler::zero, COMPARE_TAIL_LONG); 5725 5726 // fast path : compare first 2 8-char vectors. 5727 bind(COMPARE_16_CHARS); 5728 movdqu(vec1, Address(str1, 0)); 5729 pcmpestri(vec1, Address(str2, 0), pcmpmask); 5730 jccb(Assembler::below, COMPARE_INDEX_CHAR); 5731 5732 movdqu(vec1, Address(str1, adr_stride)); 5733 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); 5734 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS); 5735 addl(cnt1, stride); 5736 5737 // Compare the characters at index in cnt1 5738 bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character 5739 load_unsigned_short(result, Address(str1, cnt1, scale)); 5740 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); 5741 subl(result, cnt2); 5742 jmp(POP_LABEL); 5743 5744 // Setup the registers to start vector comparison loop 5745 bind(COMPARE_WIDE_VECTORS); 5746 lea(str1, Address(str1, result, scale)); 5747 lea(str2, Address(str2, result, scale)); 5748 subl(result, stride2); 5749 subl(cnt2, stride2); 5750 jccb(Assembler::zero, COMPARE_WIDE_TAIL); 5751 negptr(result); 5752 5753 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) 5754 bind(COMPARE_WIDE_VECTORS_LOOP); 5755 vmovdqu(vec1, Address(str1, result, scale)); 5756 vpxor(vec1, Address(str2, result, scale)); 5757 vptest(vec1, vec1); 5758 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); 5759 addptr(result, stride2); 5760 subl(cnt2, stride2); 5761 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); 5762 5763 // compare wide vectors tail 5764 bind(COMPARE_WIDE_TAIL); 5765 testptr(result, result); 5766 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5767 5768 movl(result, stride2); 5769 movl(cnt2, result); 5770 negptr(result); 5771 jmpb(COMPARE_WIDE_VECTORS_LOOP); 5772 5773 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. 5774 bind(VECTOR_NOT_EQUAL); 5775 lea(str1, Address(str1, result, scale)); 5776 lea(str2, Address(str2, result, scale)); 5777 jmp(COMPARE_16_CHARS); 5778 5779 // Compare tail chars, length between 1 to 15 chars 5780 bind(COMPARE_TAIL_LONG); 5781 movl(cnt2, result); 5782 cmpl(cnt2, stride); 5783 jccb(Assembler::less, COMPARE_SMALL_STR); 5784 5785 movdqu(vec1, Address(str1, 0)); 5786 pcmpestri(vec1, Address(str2, 0), pcmpmask); 5787 jcc(Assembler::below, COMPARE_INDEX_CHAR); 5788 subptr(cnt2, stride); 5789 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5790 lea(str1, Address(str1, result, scale)); 5791 lea(str2, Address(str2, result, scale)); 5792 negptr(cnt2); 5793 jmpb(WHILE_HEAD_LABEL); 5794 5795 bind(COMPARE_SMALL_STR); 5796 } else if (UseSSE42Intrinsics) { 5797 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 5798 int pcmpmask = 0x19; 5799 // Setup to compare 8-char (16-byte) vectors, 5800 // start from first character again because it has aligned address. 5801 movl(result, cnt2); 5802 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 5803 jccb(Assembler::zero, COMPARE_TAIL); 5804 5805 lea(str1, Address(str1, result, scale)); 5806 lea(str2, Address(str2, result, scale)); 5807 negptr(result); 5808 5809 // pcmpestri 5810 // inputs: 5811 // vec1- substring 5812 // rax - negative string length (elements count) 5813 // mem - scaned string 5814 // rdx - string length (elements count) 5815 // pcmpmask - cmp mode: 11000 (string compare with negated result) 5816 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 5817 // outputs: 5818 // rcx - first mismatched element index 5819 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 5820 5821 bind(COMPARE_WIDE_VECTORS); 5822 movdqu(vec1, Address(str1, result, scale)); 5823 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 5824 // After pcmpestri cnt1(rcx) contains mismatched element index 5825 5826 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 5827 addptr(result, stride); 5828 subptr(cnt2, stride); 5829 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 5830 5831 // compare wide vectors tail 5832 testptr(result, result); 5833 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5834 5835 movl(cnt2, stride); 5836 movl(result, stride); 5837 negptr(result); 5838 movdqu(vec1, Address(str1, result, scale)); 5839 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 5840 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 5841 5842 // Mismatched characters in the vectors 5843 bind(VECTOR_NOT_EQUAL); 5844 addptr(cnt1, result); 5845 load_unsigned_short(result, Address(str1, cnt1, scale)); 5846 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); 5847 subl(result, cnt2); 5848 jmpb(POP_LABEL); 5849 5850 bind(COMPARE_TAIL); // limit is zero 5851 movl(cnt2, result); 5852 // Fallthru to tail compare 5853 } 5854 // Shift str2 and str1 to the end of the arrays, negate min 5855 lea(str1, Address(str1, cnt2, scale)); 5856 lea(str2, Address(str2, cnt2, scale)); 5857 decrementl(cnt2); // first character was compared already 5858 negptr(cnt2); 5859 5860 // Compare the rest of the elements 5861 bind(WHILE_HEAD_LABEL); 5862 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 5863 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 5864 subl(result, cnt1); 5865 jccb(Assembler::notZero, POP_LABEL); 5866 increment(cnt2); 5867 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 5868 5869 // Strings are equal up to min length. Return the length difference. 5870 bind(LENGTH_DIFF_LABEL); 5871 pop(result); 5872 jmpb(DONE_LABEL); 5873 5874 // Discard the stored length difference 5875 bind(POP_LABEL); 5876 pop(cnt1); 5877 5878 // That's it 5879 bind(DONE_LABEL); 5880 } 5881 5882 // Compare char[] arrays aligned to 4 bytes or substrings. 5883 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 5884 Register limit, Register result, Register chr, 5885 XMMRegister vec1, XMMRegister vec2) { 5886 ShortBranchVerifier sbv(this); 5887 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 5888 5889 int length_offset = arrayOopDesc::length_offset_in_bytes(); 5890 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 5891 5892 // Check the input args 5893 cmpptr(ary1, ary2); 5894 jcc(Assembler::equal, TRUE_LABEL); 5895 5896 if (is_array_equ) { 5897 // Need additional checks for arrays_equals. 5898 testptr(ary1, ary1); 5899 jcc(Assembler::zero, FALSE_LABEL); 5900 testptr(ary2, ary2); 5901 jcc(Assembler::zero, FALSE_LABEL); 5902 5903 // Check the lengths 5904 movl(limit, Address(ary1, length_offset)); 5905 cmpl(limit, Address(ary2, length_offset)); 5906 jcc(Assembler::notEqual, FALSE_LABEL); 5907 } 5908 5909 // count == 0 5910 testl(limit, limit); 5911 jcc(Assembler::zero, TRUE_LABEL); 5912 5913 if (is_array_equ) { 5914 // Load array address 5915 lea(ary1, Address(ary1, base_offset)); 5916 lea(ary2, Address(ary2, base_offset)); 5917 } 5918 5919 shll(limit, 1); // byte count != 0 5920 movl(result, limit); // copy 5921 5922 if (UseAVX >= 2) { 5923 // With AVX2, use 32-byte vector compare 5924 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 5925 5926 // Compare 32-byte vectors 5927 andl(result, 0x0000001e); // tail count (in bytes) 5928 andl(limit, 0xffffffe0); // vector count (in bytes) 5929 jccb(Assembler::zero, COMPARE_TAIL); 5930 5931 lea(ary1, Address(ary1, limit, Address::times_1)); 5932 lea(ary2, Address(ary2, limit, Address::times_1)); 5933 negptr(limit); 5934 5935 bind(COMPARE_WIDE_VECTORS); 5936 vmovdqu(vec1, Address(ary1, limit, Address::times_1)); 5937 vmovdqu(vec2, Address(ary2, limit, Address::times_1)); 5938 vpxor(vec1, vec2); 5939 5940 vptest(vec1, vec1); 5941 jccb(Assembler::notZero, FALSE_LABEL); 5942 addptr(limit, 32); 5943 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 5944 5945 testl(result, result); 5946 jccb(Assembler::zero, TRUE_LABEL); 5947 5948 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); 5949 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); 5950 vpxor(vec1, vec2); 5951 5952 vptest(vec1, vec1); 5953 jccb(Assembler::notZero, FALSE_LABEL); 5954 jmpb(TRUE_LABEL); 5955 5956 bind(COMPARE_TAIL); // limit is zero 5957 movl(limit, result); 5958 // Fallthru to tail compare 5959 } else if (UseSSE42Intrinsics) { 5960 // With SSE4.2, use double quad vector compare 5961 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 5962 5963 // Compare 16-byte vectors 5964 andl(result, 0x0000000e); // tail count (in bytes) 5965 andl(limit, 0xfffffff0); // vector count (in bytes) 5966 jccb(Assembler::zero, COMPARE_TAIL); 5967 5968 lea(ary1, Address(ary1, limit, Address::times_1)); 5969 lea(ary2, Address(ary2, limit, Address::times_1)); 5970 negptr(limit); 5971 5972 bind(COMPARE_WIDE_VECTORS); 5973 movdqu(vec1, Address(ary1, limit, Address::times_1)); 5974 movdqu(vec2, Address(ary2, limit, Address::times_1)); 5975 pxor(vec1, vec2); 5976 5977 ptest(vec1, vec1); 5978 jccb(Assembler::notZero, FALSE_LABEL); 5979 addptr(limit, 16); 5980 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 5981 5982 testl(result, result); 5983 jccb(Assembler::zero, TRUE_LABEL); 5984 5985 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 5986 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 5987 pxor(vec1, vec2); 5988 5989 ptest(vec1, vec1); 5990 jccb(Assembler::notZero, FALSE_LABEL); 5991 jmpb(TRUE_LABEL); 5992 5993 bind(COMPARE_TAIL); // limit is zero 5994 movl(limit, result); 5995 // Fallthru to tail compare 5996 } 5997 5998 // Compare 4-byte vectors 5999 andl(limit, 0xfffffffc); // vector count (in bytes) 6000 jccb(Assembler::zero, COMPARE_CHAR); 6001 6002 lea(ary1, Address(ary1, limit, Address::times_1)); 6003 lea(ary2, Address(ary2, limit, Address::times_1)); 6004 negptr(limit); 6005 6006 bind(COMPARE_VECTORS); 6007 movl(chr, Address(ary1, limit, Address::times_1)); 6008 cmpl(chr, Address(ary2, limit, Address::times_1)); 6009 jccb(Assembler::notEqual, FALSE_LABEL); 6010 addptr(limit, 4); 6011 jcc(Assembler::notZero, COMPARE_VECTORS); 6012 6013 // Compare trailing char (final 2 bytes), if any 6014 bind(COMPARE_CHAR); 6015 testl(result, 0x2); // tail char 6016 jccb(Assembler::zero, TRUE_LABEL); 6017 load_unsigned_short(chr, Address(ary1, 0)); 6018 load_unsigned_short(limit, Address(ary2, 0)); 6019 cmpl(chr, limit); 6020 jccb(Assembler::notEqual, FALSE_LABEL); 6021 6022 bind(TRUE_LABEL); 6023 movl(result, 1); // return true 6024 jmpb(DONE); 6025 6026 bind(FALSE_LABEL); 6027 xorl(result, result); // return false 6028 6029 // That's it 6030 bind(DONE); 6031 } 6032 6033 void MacroAssembler::generate_fill(BasicType t, bool aligned, 6034 Register to, Register value, Register count, 6035 Register rtmp, XMMRegister xtmp) { 6036 ShortBranchVerifier sbv(this); 6037 assert_different_registers(to, value, count, rtmp); 6038 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 6039 Label L_fill_2_bytes, L_fill_4_bytes; 6040 6041 int shift = -1; 6042 switch (t) { 6043 case T_BYTE: 6044 shift = 2; 6045 break; 6046 case T_SHORT: 6047 shift = 1; 6048 break; 6049 case T_INT: 6050 shift = 0; 6051 break; 6052 default: ShouldNotReachHere(); 6053 } 6054 6055 if (t == T_BYTE) { 6056 andl(value, 0xff); 6057 movl(rtmp, value); 6058 shll(rtmp, 8); 6059 orl(value, rtmp); 6060 } 6061 if (t == T_SHORT) { 6062 andl(value, 0xffff); 6063 } 6064 if (t == T_BYTE || t == T_SHORT) { 6065 movl(rtmp, value); 6066 shll(rtmp, 16); 6067 orl(value, rtmp); 6068 } 6069 6070 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 6071 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 6072 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 6073 // align source address at 4 bytes address boundary 6074 if (t == T_BYTE) { 6075 // One byte misalignment happens only for byte arrays 6076 testptr(to, 1); 6077 jccb(Assembler::zero, L_skip_align1); 6078 movb(Address(to, 0), value); 6079 increment(to); 6080 decrement(count); 6081 BIND(L_skip_align1); 6082 } 6083 // Two bytes misalignment happens only for byte and short (char) arrays 6084 testptr(to, 2); 6085 jccb(Assembler::zero, L_skip_align2); 6086 movw(Address(to, 0), value); 6087 addptr(to, 2); 6088 subl(count, 1<<(shift-1)); 6089 BIND(L_skip_align2); 6090 } 6091 if (UseSSE < 2) { 6092 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 6093 // Fill 32-byte chunks 6094 subl(count, 8 << shift); 6095 jcc(Assembler::less, L_check_fill_8_bytes); 6096 align(16); 6097 6098 BIND(L_fill_32_bytes_loop); 6099 6100 for (int i = 0; i < 32; i += 4) { 6101 movl(Address(to, i), value); 6102 } 6103 6104 addptr(to, 32); 6105 subl(count, 8 << shift); 6106 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 6107 BIND(L_check_fill_8_bytes); 6108 addl(count, 8 << shift); 6109 jccb(Assembler::zero, L_exit); 6110 jmpb(L_fill_8_bytes); 6111 6112 // 6113 // length is too short, just fill qwords 6114 // 6115 BIND(L_fill_8_bytes_loop); 6116 movl(Address(to, 0), value); 6117 movl(Address(to, 4), value); 6118 addptr(to, 8); 6119 BIND(L_fill_8_bytes); 6120 subl(count, 1 << (shift + 1)); 6121 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 6122 // fall through to fill 4 bytes 6123 } else { 6124 Label L_fill_32_bytes; 6125 if (!UseUnalignedLoadStores) { 6126 // align to 8 bytes, we know we are 4 byte aligned to start 6127 testptr(to, 4); 6128 jccb(Assembler::zero, L_fill_32_bytes); 6129 movl(Address(to, 0), value); 6130 addptr(to, 4); 6131 subl(count, 1<<shift); 6132 } 6133 BIND(L_fill_32_bytes); 6134 { 6135 assert( UseSSE >= 2, "supported cpu only" ); 6136 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 6137 movdl(xtmp, value); 6138 if (UseAVX >= 2 && UseUnalignedLoadStores) { 6139 // Fill 64-byte chunks 6140 Label L_fill_64_bytes_loop, L_check_fill_32_bytes; 6141 vpbroadcastd(xtmp, xtmp); 6142 6143 subl(count, 16 << shift); 6144 jcc(Assembler::less, L_check_fill_32_bytes); 6145 align(16); 6146 6147 BIND(L_fill_64_bytes_loop); 6148 vmovdqu(Address(to, 0), xtmp); 6149 vmovdqu(Address(to, 32), xtmp); 6150 addptr(to, 64); 6151 subl(count, 16 << shift); 6152 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); 6153 6154 BIND(L_check_fill_32_bytes); 6155 addl(count, 8 << shift); 6156 jccb(Assembler::less, L_check_fill_8_bytes); 6157 vmovdqu(Address(to, 0), xtmp); 6158 addptr(to, 32); 6159 subl(count, 8 << shift); 6160 } else { 6161 // Fill 32-byte chunks 6162 pshufd(xtmp, xtmp, 0); 6163 6164 subl(count, 8 << shift); 6165 jcc(Assembler::less, L_check_fill_8_bytes); 6166 align(16); 6167 6168 BIND(L_fill_32_bytes_loop); 6169 6170 if (UseUnalignedLoadStores) { 6171 movdqu(Address(to, 0), xtmp); 6172 movdqu(Address(to, 16), xtmp); 6173 } else { 6174 movq(Address(to, 0), xtmp); 6175 movq(Address(to, 8), xtmp); 6176 movq(Address(to, 16), xtmp); 6177 movq(Address(to, 24), xtmp); 6178 } 6179 6180 addptr(to, 32); 6181 subl(count, 8 << shift); 6182 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 6183 } 6184 BIND(L_check_fill_8_bytes); 6185 addl(count, 8 << shift); 6186 jccb(Assembler::zero, L_exit); 6187 jmpb(L_fill_8_bytes); 6188 6189 // 6190 // length is too short, just fill qwords 6191 // 6192 BIND(L_fill_8_bytes_loop); 6193 movq(Address(to, 0), xtmp); 6194 addptr(to, 8); 6195 BIND(L_fill_8_bytes); 6196 subl(count, 1 << (shift + 1)); 6197 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 6198 } 6199 } 6200 // fill trailing 4 bytes 6201 BIND(L_fill_4_bytes); 6202 testl(count, 1<<shift); 6203 jccb(Assembler::zero, L_fill_2_bytes); 6204 movl(Address(to, 0), value); 6205 if (t == T_BYTE || t == T_SHORT) { 6206 addptr(to, 4); 6207 BIND(L_fill_2_bytes); 6208 // fill trailing 2 bytes 6209 testl(count, 1<<(shift-1)); 6210 jccb(Assembler::zero, L_fill_byte); 6211 movw(Address(to, 0), value); 6212 if (t == T_BYTE) { 6213 addptr(to, 2); 6214 BIND(L_fill_byte); 6215 // fill trailing byte 6216 testl(count, 1); 6217 jccb(Assembler::zero, L_exit); 6218 movb(Address(to, 0), value); 6219 } else { 6220 BIND(L_fill_byte); 6221 } 6222 } else { 6223 BIND(L_fill_2_bytes); 6224 } 6225 BIND(L_exit); 6226 } 6227 6228 // encode char[] to byte[] in ISO_8859_1 6229 void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, 6230 XMMRegister tmp1Reg, XMMRegister tmp2Reg, 6231 XMMRegister tmp3Reg, XMMRegister tmp4Reg, 6232 Register tmp5, Register result) { 6233 // rsi: src 6234 // rdi: dst 6235 // rdx: len 6236 // rcx: tmp5 6237 // rax: result 6238 ShortBranchVerifier sbv(this); 6239 assert_different_registers(src, dst, len, tmp5, result); 6240 Label L_done, L_copy_1_char, L_copy_1_char_exit; 6241 6242 // set result 6243 xorl(result, result); 6244 // check for zero length 6245 testl(len, len); 6246 jcc(Assembler::zero, L_done); 6247 movl(result, len); 6248 6249 // Setup pointers 6250 lea(src, Address(src, len, Address::times_2)); // char[] 6251 lea(dst, Address(dst, len, Address::times_1)); // byte[] 6252 negptr(len); 6253 6254 if (UseSSE42Intrinsics || UseAVX >= 2) { 6255 Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; 6256 Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; 6257 6258 if (UseAVX >= 2) { 6259 Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; 6260 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector 6261 movdl(tmp1Reg, tmp5); 6262 vpbroadcastd(tmp1Reg, tmp1Reg); 6263 jmpb(L_chars_32_check); 6264 6265 bind(L_copy_32_chars); 6266 vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); 6267 vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); 6268 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); 6269 vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector 6270 jccb(Assembler::notZero, L_copy_32_chars_exit); 6271 vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); 6272 vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true); 6273 vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg); 6274 6275 bind(L_chars_32_check); 6276 addptr(len, 32); 6277 jccb(Assembler::lessEqual, L_copy_32_chars); 6278 6279 bind(L_copy_32_chars_exit); 6280 subptr(len, 16); 6281 jccb(Assembler::greater, L_copy_16_chars_exit); 6282 6283 } else if (UseSSE42Intrinsics) { 6284 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector 6285 movdl(tmp1Reg, tmp5); 6286 pshufd(tmp1Reg, tmp1Reg, 0); 6287 jmpb(L_chars_16_check); 6288 } 6289 6290 bind(L_copy_16_chars); 6291 if (UseAVX >= 2) { 6292 vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32)); 6293 vptest(tmp2Reg, tmp1Reg); 6294 jccb(Assembler::notZero, L_copy_16_chars_exit); 6295 vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true); 6296 vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true); 6297 } else { 6298 if (UseAVX > 0) { 6299 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); 6300 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); 6301 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false); 6302 } else { 6303 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); 6304 por(tmp2Reg, tmp3Reg); 6305 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); 6306 por(tmp2Reg, tmp4Reg); 6307 } 6308 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector 6309 jccb(Assembler::notZero, L_copy_16_chars_exit); 6310 packuswb(tmp3Reg, tmp4Reg); 6311 } 6312 movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg); 6313 6314 bind(L_chars_16_check); 6315 addptr(len, 16); 6316 jccb(Assembler::lessEqual, L_copy_16_chars); 6317 6318 bind(L_copy_16_chars_exit); 6319 subptr(len, 8); 6320 jccb(Assembler::greater, L_copy_8_chars_exit); 6321 6322 bind(L_copy_8_chars); 6323 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); 6324 ptest(tmp3Reg, tmp1Reg); 6325 jccb(Assembler::notZero, L_copy_8_chars_exit); 6326 packuswb(tmp3Reg, tmp1Reg); 6327 movq(Address(dst, len, Address::times_1, -8), tmp3Reg); 6328 addptr(len, 8); 6329 jccb(Assembler::lessEqual, L_copy_8_chars); 6330 6331 bind(L_copy_8_chars_exit); 6332 subptr(len, 8); 6333 jccb(Assembler::zero, L_done); 6334 } 6335 6336 bind(L_copy_1_char); 6337 load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); 6338 testl(tmp5, 0xff00); // check if Unicode char 6339 jccb(Assembler::notZero, L_copy_1_char_exit); 6340 movb(Address(dst, len, Address::times_1, 0), tmp5); 6341 addptr(len, 1); 6342 jccb(Assembler::less, L_copy_1_char); 6343 6344 bind(L_copy_1_char_exit); 6345 addptr(result, len); // len is negative count of not processed elements 6346 bind(L_done); 6347 } 6348 6349 #undef BIND 6350 #undef BLOCK_COMMENT 6351 6352 6353 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 6354 switch (cond) { 6355 // Note some conditions are synonyms for others 6356 case Assembler::zero: return Assembler::notZero; 6357 case Assembler::notZero: return Assembler::zero; 6358 case Assembler::less: return Assembler::greaterEqual; 6359 case Assembler::lessEqual: return Assembler::greater; 6360 case Assembler::greater: return Assembler::lessEqual; 6361 case Assembler::greaterEqual: return Assembler::less; 6362 case Assembler::below: return Assembler::aboveEqual; 6363 case Assembler::belowEqual: return Assembler::above; 6364 case Assembler::above: return Assembler::belowEqual; 6365 case Assembler::aboveEqual: return Assembler::below; 6366 case Assembler::overflow: return Assembler::noOverflow; 6367 case Assembler::noOverflow: return Assembler::overflow; 6368 case Assembler::negative: return Assembler::positive; 6369 case Assembler::positive: return Assembler::negative; 6370 case Assembler::parity: return Assembler::noParity; 6371 case Assembler::noParity: return Assembler::parity; 6372 } 6373 ShouldNotReachHere(); return Assembler::overflow; 6374 } 6375 6376 SkipIfEqual::SkipIfEqual( 6377 MacroAssembler* masm, const bool* flag_addr, bool value) { 6378 _masm = masm; 6379 _masm->cmp8(ExternalAddress((address)flag_addr), value); 6380 _masm->jcc(Assembler::equal, _label); 6381 } 6382 6383 SkipIfEqual::~SkipIfEqual() { 6384 _masm->bind(_label); 6385 }