1 /* 2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "compiler/disassembler.hpp" 29 #include "gc_interface/collectedHeap.inline.hpp" 30 #include "interpreter/interpreter.hpp" 31 #include "memory/cardTableModRefBS.hpp" 32 #include "memory/resourceArea.hpp" 33 #include "prims/methodHandles.hpp" 34 #include "runtime/biasedLocking.hpp" 35 #include "runtime/interfaceSupport.hpp" 36 #include "runtime/objectMonitor.hpp" 37 #include "runtime/os.hpp" 38 #include "runtime/sharedRuntime.hpp" 39 #include "runtime/stubRoutines.hpp" 40 #include "utilities/macros.hpp" 41 #if INCLUDE_ALL_GCS 42 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 43 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 44 #include "gc_implementation/g1/heapRegion.hpp" 45 #endif // INCLUDE_ALL_GCS 46 47 #ifdef PRODUCT 48 #define BLOCK_COMMENT(str) /* nothing */ 49 #define STOP(error) stop(error) 50 #else 51 #define BLOCK_COMMENT(str) block_comment(str) 52 #define STOP(error) block_comment(error); stop(error) 53 #endif 54 55 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 56 57 58 #ifdef ASSERT 59 bool AbstractAssembler::pd_check_instruction_mark() { return true; } 60 #endif 61 62 static Assembler::Condition reverse[] = { 63 Assembler::noOverflow /* overflow = 0x0 */ , 64 Assembler::overflow /* noOverflow = 0x1 */ , 65 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 66 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 67 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 68 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 69 Assembler::above /* belowEqual = 0x6 */ , 70 Assembler::belowEqual /* above = 0x7 */ , 71 Assembler::positive /* negative = 0x8 */ , 72 Assembler::negative /* positive = 0x9 */ , 73 Assembler::noParity /* parity = 0xa */ , 74 Assembler::parity /* noParity = 0xb */ , 75 Assembler::greaterEqual /* less = 0xc */ , 76 Assembler::less /* greaterEqual = 0xd */ , 77 Assembler::greater /* lessEqual = 0xe */ , 78 Assembler::lessEqual /* greater = 0xf, */ 79 80 }; 81 82 83 // Implementation of MacroAssembler 84 85 // First all the versions that have distinct versions depending on 32/64 bit 86 // Unless the difference is trivial (1 line or so). 87 88 #ifndef _LP64 89 90 // 32bit versions 91 92 Address MacroAssembler::as_Address(AddressLiteral adr) { 93 return Address(adr.target(), adr.rspec()); 94 } 95 96 Address MacroAssembler::as_Address(ArrayAddress adr) { 97 return Address::make_array(adr); 98 } 99 100 int MacroAssembler::biased_locking_enter(Register lock_reg, 101 Register obj_reg, 102 Register swap_reg, 103 Register tmp_reg, 104 bool swap_reg_contains_mark, 105 Label& done, 106 Label* slow_case, 107 BiasedLockingCounters* counters) { 108 assert(UseBiasedLocking, "why call this otherwise?"); 109 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 110 assert_different_registers(lock_reg, obj_reg, swap_reg); 111 112 if (PrintBiasedLockingStatistics && counters == NULL) 113 counters = BiasedLocking::counters(); 114 115 bool need_tmp_reg = false; 116 if (tmp_reg == noreg) { 117 need_tmp_reg = true; 118 tmp_reg = lock_reg; 119 } else { 120 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 121 } 122 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 123 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 124 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 125 Address saved_mark_addr(lock_reg, 0); 126 127 // Biased locking 128 // See whether the lock is currently biased toward our thread and 129 // whether the epoch is still valid 130 // Note that the runtime guarantees sufficient alignment of JavaThread 131 // pointers to allow age to be placed into low bits 132 // First check to see whether biasing is even enabled for this object 133 Label cas_label; 134 int null_check_offset = -1; 135 if (!swap_reg_contains_mark) { 136 null_check_offset = offset(); 137 movl(swap_reg, mark_addr); 138 } 139 if (need_tmp_reg) { 140 push(tmp_reg); 141 } 142 movl(tmp_reg, swap_reg); 143 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 144 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 145 if (need_tmp_reg) { 146 pop(tmp_reg); 147 } 148 jcc(Assembler::notEqual, cas_label); 149 // The bias pattern is present in the object's header. Need to check 150 // whether the bias owner and the epoch are both still current. 151 // Note that because there is no current thread register on x86 we 152 // need to store off the mark word we read out of the object to 153 // avoid reloading it and needing to recheck invariants below. This 154 // store is unfortunate but it makes the overall code shorter and 155 // simpler. 156 movl(saved_mark_addr, swap_reg); 157 if (need_tmp_reg) { 158 push(tmp_reg); 159 } 160 get_thread(tmp_reg); 161 xorl(swap_reg, tmp_reg); 162 if (swap_reg_contains_mark) { 163 null_check_offset = offset(); 164 } 165 movl(tmp_reg, klass_addr); 166 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 167 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 168 if (need_tmp_reg) { 169 pop(tmp_reg); 170 } 171 if (counters != NULL) { 172 cond_inc32(Assembler::zero, 173 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 174 } 175 jcc(Assembler::equal, done); 176 177 Label try_revoke_bias; 178 Label try_rebias; 179 180 // At this point we know that the header has the bias pattern and 181 // that we are not the bias owner in the current epoch. We need to 182 // figure out more details about the state of the header in order to 183 // know what operations can be legally performed on the object's 184 // header. 185 186 // If the low three bits in the xor result aren't clear, that means 187 // the prototype header is no longer biased and we have to revoke 188 // the bias on this object. 189 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 190 jcc(Assembler::notZero, try_revoke_bias); 191 192 // Biasing is still enabled for this data type. See whether the 193 // epoch of the current bias is still valid, meaning that the epoch 194 // bits of the mark word are equal to the epoch bits of the 195 // prototype header. (Note that the prototype header's epoch bits 196 // only change at a safepoint.) If not, attempt to rebias the object 197 // toward the current thread. Note that we must be absolutely sure 198 // that the current epoch is invalid in order to do this because 199 // otherwise the manipulations it performs on the mark word are 200 // illegal. 201 testl(swap_reg, markOopDesc::epoch_mask_in_place); 202 jcc(Assembler::notZero, try_rebias); 203 204 // The epoch of the current bias is still valid but we know nothing 205 // about the owner; it might be set or it might be clear. Try to 206 // acquire the bias of the object using an atomic operation. If this 207 // fails we will go in to the runtime to revoke the object's bias. 208 // Note that we first construct the presumed unbiased header so we 209 // don't accidentally blow away another thread's valid bias. 210 movl(swap_reg, saved_mark_addr); 211 andl(swap_reg, 212 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 213 if (need_tmp_reg) { 214 push(tmp_reg); 215 } 216 get_thread(tmp_reg); 217 orl(tmp_reg, swap_reg); 218 if (os::is_MP()) { 219 lock(); 220 } 221 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 222 if (need_tmp_reg) { 223 pop(tmp_reg); 224 } 225 // If the biasing toward our thread failed, this means that 226 // another thread succeeded in biasing it toward itself and we 227 // need to revoke that bias. The revocation will occur in the 228 // interpreter runtime in the slow case. 229 if (counters != NULL) { 230 cond_inc32(Assembler::zero, 231 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 232 } 233 if (slow_case != NULL) { 234 jcc(Assembler::notZero, *slow_case); 235 } 236 jmp(done); 237 238 bind(try_rebias); 239 // At this point we know the epoch has expired, meaning that the 240 // current "bias owner", if any, is actually invalid. Under these 241 // circumstances _only_, we are allowed to use the current header's 242 // value as the comparison value when doing the cas to acquire the 243 // bias in the current epoch. In other words, we allow transfer of 244 // the bias from one thread to another directly in this situation. 245 // 246 // FIXME: due to a lack of registers we currently blow away the age 247 // bits in this situation. Should attempt to preserve them. 248 if (need_tmp_reg) { 249 push(tmp_reg); 250 } 251 get_thread(tmp_reg); 252 movl(swap_reg, klass_addr); 253 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 254 movl(swap_reg, saved_mark_addr); 255 if (os::is_MP()) { 256 lock(); 257 } 258 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 259 if (need_tmp_reg) { 260 pop(tmp_reg); 261 } 262 // If the biasing toward our thread failed, then another thread 263 // succeeded in biasing it toward itself and we need to revoke that 264 // bias. The revocation will occur in the runtime in the slow case. 265 if (counters != NULL) { 266 cond_inc32(Assembler::zero, 267 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 268 } 269 if (slow_case != NULL) { 270 jcc(Assembler::notZero, *slow_case); 271 } 272 jmp(done); 273 274 bind(try_revoke_bias); 275 // The prototype mark in the klass doesn't have the bias bit set any 276 // more, indicating that objects of this data type are not supposed 277 // to be biased any more. We are going to try to reset the mark of 278 // this object to the prototype value and fall through to the 279 // CAS-based locking scheme. Note that if our CAS fails, it means 280 // that another thread raced us for the privilege of revoking the 281 // bias of this particular object, so it's okay to continue in the 282 // normal locking code. 283 // 284 // FIXME: due to a lack of registers we currently blow away the age 285 // bits in this situation. Should attempt to preserve them. 286 movl(swap_reg, saved_mark_addr); 287 if (need_tmp_reg) { 288 push(tmp_reg); 289 } 290 movl(tmp_reg, klass_addr); 291 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 292 if (os::is_MP()) { 293 lock(); 294 } 295 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 296 if (need_tmp_reg) { 297 pop(tmp_reg); 298 } 299 // Fall through to the normal CAS-based lock, because no matter what 300 // the result of the above CAS, some thread must have succeeded in 301 // removing the bias bit from the object's header. 302 if (counters != NULL) { 303 cond_inc32(Assembler::zero, 304 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 305 } 306 307 bind(cas_label); 308 309 return null_check_offset; 310 } 311 void MacroAssembler::call_VM_leaf_base(address entry_point, 312 int number_of_arguments) { 313 call(RuntimeAddress(entry_point)); 314 increment(rsp, number_of_arguments * wordSize); 315 } 316 317 void MacroAssembler::cmpklass(Address src1, Metadata* obj) { 318 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 319 } 320 321 void MacroAssembler::cmpklass(Register src1, Metadata* obj) { 322 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 323 } 324 325 void MacroAssembler::cmpoop(Address src1, jobject obj) { 326 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 327 } 328 329 void MacroAssembler::cmpoop(Register src1, jobject obj) { 330 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 331 } 332 333 void MacroAssembler::extend_sign(Register hi, Register lo) { 334 // According to Intel Doc. AP-526, "Integer Divide", p.18. 335 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 336 cdql(); 337 } else { 338 movl(hi, lo); 339 sarl(hi, 31); 340 } 341 } 342 343 void MacroAssembler::jC2(Register tmp, Label& L) { 344 // set parity bit if FPU flag C2 is set (via rax) 345 save_rax(tmp); 346 fwait(); fnstsw_ax(); 347 sahf(); 348 restore_rax(tmp); 349 // branch 350 jcc(Assembler::parity, L); 351 } 352 353 void MacroAssembler::jnC2(Register tmp, Label& L) { 354 // set parity bit if FPU flag C2 is set (via rax) 355 save_rax(tmp); 356 fwait(); fnstsw_ax(); 357 sahf(); 358 restore_rax(tmp); 359 // branch 360 jcc(Assembler::noParity, L); 361 } 362 363 // 32bit can do a case table jump in one instruction but we no longer allow the base 364 // to be installed in the Address class 365 void MacroAssembler::jump(ArrayAddress entry) { 366 jmp(as_Address(entry)); 367 } 368 369 // Note: y_lo will be destroyed 370 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 371 // Long compare for Java (semantics as described in JVM spec.) 372 Label high, low, done; 373 374 cmpl(x_hi, y_hi); 375 jcc(Assembler::less, low); 376 jcc(Assembler::greater, high); 377 // x_hi is the return register 378 xorl(x_hi, x_hi); 379 cmpl(x_lo, y_lo); 380 jcc(Assembler::below, low); 381 jcc(Assembler::equal, done); 382 383 bind(high); 384 xorl(x_hi, x_hi); 385 increment(x_hi); 386 jmp(done); 387 388 bind(low); 389 xorl(x_hi, x_hi); 390 decrementl(x_hi); 391 392 bind(done); 393 } 394 395 void MacroAssembler::lea(Register dst, AddressLiteral src) { 396 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 397 } 398 399 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 400 // leal(dst, as_Address(adr)); 401 // see note in movl as to why we must use a move 402 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 403 } 404 405 void MacroAssembler::leave() { 406 mov(rsp, rbp); 407 pop(rbp); 408 } 409 410 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 411 // Multiplication of two Java long values stored on the stack 412 // as illustrated below. Result is in rdx:rax. 413 // 414 // rsp ---> [ ?? ] \ \ 415 // .... | y_rsp_offset | 416 // [ y_lo ] / (in bytes) | x_rsp_offset 417 // [ y_hi ] | (in bytes) 418 // .... | 419 // [ x_lo ] / 420 // [ x_hi ] 421 // .... 422 // 423 // Basic idea: lo(result) = lo(x_lo * y_lo) 424 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 425 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 426 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 427 Label quick; 428 // load x_hi, y_hi and check if quick 429 // multiplication is possible 430 movl(rbx, x_hi); 431 movl(rcx, y_hi); 432 movl(rax, rbx); 433 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 434 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 435 // do full multiplication 436 // 1st step 437 mull(y_lo); // x_hi * y_lo 438 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 439 // 2nd step 440 movl(rax, x_lo); 441 mull(rcx); // x_lo * y_hi 442 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 443 // 3rd step 444 bind(quick); // note: rbx, = 0 if quick multiply! 445 movl(rax, x_lo); 446 mull(y_lo); // x_lo * y_lo 447 addl(rdx, rbx); // correct hi(x_lo * y_lo) 448 } 449 450 void MacroAssembler::lneg(Register hi, Register lo) { 451 negl(lo); 452 adcl(hi, 0); 453 negl(hi); 454 } 455 456 void MacroAssembler::lshl(Register hi, Register lo) { 457 // Java shift left long support (semantics as described in JVM spec., p.305) 458 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 459 // shift value is in rcx ! 460 assert(hi != rcx, "must not use rcx"); 461 assert(lo != rcx, "must not use rcx"); 462 const Register s = rcx; // shift count 463 const int n = BitsPerWord; 464 Label L; 465 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 466 cmpl(s, n); // if (s < n) 467 jcc(Assembler::less, L); // else (s >= n) 468 movl(hi, lo); // x := x << n 469 xorl(lo, lo); 470 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 471 bind(L); // s (mod n) < n 472 shldl(hi, lo); // x := x << s 473 shll(lo); 474 } 475 476 477 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 478 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 479 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 480 assert(hi != rcx, "must not use rcx"); 481 assert(lo != rcx, "must not use rcx"); 482 const Register s = rcx; // shift count 483 const int n = BitsPerWord; 484 Label L; 485 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 486 cmpl(s, n); // if (s < n) 487 jcc(Assembler::less, L); // else (s >= n) 488 movl(lo, hi); // x := x >> n 489 if (sign_extension) sarl(hi, 31); 490 else xorl(hi, hi); 491 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 492 bind(L); // s (mod n) < n 493 shrdl(lo, hi); // x := x >> s 494 if (sign_extension) sarl(hi); 495 else shrl(hi); 496 } 497 498 void MacroAssembler::movoop(Register dst, jobject obj) { 499 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 500 } 501 502 void MacroAssembler::movoop(Address dst, jobject obj) { 503 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 504 } 505 506 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 507 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 508 } 509 510 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 511 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 512 } 513 514 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 515 if (src.is_lval()) { 516 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 517 } else { 518 movl(dst, as_Address(src)); 519 } 520 } 521 522 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 523 movl(as_Address(dst), src); 524 } 525 526 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 527 movl(dst, as_Address(src)); 528 } 529 530 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 531 void MacroAssembler::movptr(Address dst, intptr_t src) { 532 movl(dst, src); 533 } 534 535 536 void MacroAssembler::pop_callee_saved_registers() { 537 pop(rcx); 538 pop(rdx); 539 pop(rdi); 540 pop(rsi); 541 } 542 543 void MacroAssembler::pop_fTOS() { 544 fld_d(Address(rsp, 0)); 545 addl(rsp, 2 * wordSize); 546 } 547 548 void MacroAssembler::push_callee_saved_registers() { 549 push(rsi); 550 push(rdi); 551 push(rdx); 552 push(rcx); 553 } 554 555 void MacroAssembler::push_fTOS() { 556 subl(rsp, 2 * wordSize); 557 fstp_d(Address(rsp, 0)); 558 } 559 560 561 void MacroAssembler::pushoop(jobject obj) { 562 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 563 } 564 565 void MacroAssembler::pushklass(Metadata* obj) { 566 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); 567 } 568 569 void MacroAssembler::pushptr(AddressLiteral src) { 570 if (src.is_lval()) { 571 push_literal32((int32_t)src.target(), src.rspec()); 572 } else { 573 pushl(as_Address(src)); 574 } 575 } 576 577 void MacroAssembler::set_word_if_not_zero(Register dst) { 578 xorl(dst, dst); 579 set_byte_if_not_zero(dst); 580 } 581 582 static void pass_arg0(MacroAssembler* masm, Register arg) { 583 masm->push(arg); 584 } 585 586 static void pass_arg1(MacroAssembler* masm, Register arg) { 587 masm->push(arg); 588 } 589 590 static void pass_arg2(MacroAssembler* masm, Register arg) { 591 masm->push(arg); 592 } 593 594 static void pass_arg3(MacroAssembler* masm, Register arg) { 595 masm->push(arg); 596 } 597 598 #ifndef PRODUCT 599 extern "C" void findpc(intptr_t x); 600 #endif 601 602 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 603 // In order to get locks to work, we need to fake a in_VM state 604 JavaThread* thread = JavaThread::current(); 605 JavaThreadState saved_state = thread->thread_state(); 606 thread->set_thread_state(_thread_in_vm); 607 if (ShowMessageBoxOnError) { 608 JavaThread* thread = JavaThread::current(); 609 JavaThreadState saved_state = thread->thread_state(); 610 thread->set_thread_state(_thread_in_vm); 611 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 612 ttyLocker ttyl; 613 BytecodeCounter::print(); 614 } 615 // To see where a verify_oop failed, get $ebx+40/X for this frame. 616 // This is the value of eip which points to where verify_oop will return. 617 if (os::message_box(msg, "Execution stopped, print registers?")) { 618 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); 619 BREAKPOINT; 620 } 621 } else { 622 ttyLocker ttyl; 623 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 624 } 625 // Don't assert holding the ttyLock 626 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 627 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 628 } 629 630 void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { 631 ttyLocker ttyl; 632 FlagSetting fs(Debugging, true); 633 tty->print_cr("eip = 0x%08x", eip); 634 #ifndef PRODUCT 635 if ((WizardMode || Verbose) && PrintMiscellaneous) { 636 tty->cr(); 637 findpc(eip); 638 tty->cr(); 639 } 640 #endif 641 #define PRINT_REG(rax) \ 642 { tty->print("%s = ", #rax); os::print_location(tty, rax); } 643 PRINT_REG(rax); 644 PRINT_REG(rbx); 645 PRINT_REG(rcx); 646 PRINT_REG(rdx); 647 PRINT_REG(rdi); 648 PRINT_REG(rsi); 649 PRINT_REG(rbp); 650 PRINT_REG(rsp); 651 #undef PRINT_REG 652 // Print some words near top of staack. 653 int* dump_sp = (int*) rsp; 654 for (int col1 = 0; col1 < 8; col1++) { 655 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 656 os::print_location(tty, *dump_sp++); 657 } 658 for (int row = 0; row < 16; row++) { 659 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 660 for (int col = 0; col < 8; col++) { 661 tty->print(" 0x%08x", *dump_sp++); 662 } 663 tty->cr(); 664 } 665 // Print some instructions around pc: 666 Disassembler::decode((address)eip-64, (address)eip); 667 tty->print_cr("--------"); 668 Disassembler::decode((address)eip, (address)eip+32); 669 } 670 671 void MacroAssembler::stop(const char* msg) { 672 ExternalAddress message((address)msg); 673 // push address of message 674 pushptr(message.addr()); 675 { Label L; call(L, relocInfo::none); bind(L); } // push eip 676 pusha(); // push registers 677 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 678 hlt(); 679 } 680 681 void MacroAssembler::warn(const char* msg) { 682 push_CPU_state(); 683 684 ExternalAddress message((address) msg); 685 // push address of message 686 pushptr(message.addr()); 687 688 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 689 addl(rsp, wordSize); // discard argument 690 pop_CPU_state(); 691 } 692 693 void MacroAssembler::print_state() { 694 { Label L; call(L, relocInfo::none); bind(L); } // push eip 695 pusha(); // push registers 696 697 push_CPU_state(); 698 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); 699 pop_CPU_state(); 700 701 popa(); 702 addl(rsp, wordSize); 703 } 704 705 #else // _LP64 706 707 // 64 bit versions 708 709 Address MacroAssembler::as_Address(AddressLiteral adr) { 710 // amd64 always does this as a pc-rel 711 // we can be absolute or disp based on the instruction type 712 // jmp/call are displacements others are absolute 713 assert(!adr.is_lval(), "must be rval"); 714 assert(reachable(adr), "must be"); 715 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 716 717 } 718 719 Address MacroAssembler::as_Address(ArrayAddress adr) { 720 AddressLiteral base = adr.base(); 721 lea(rscratch1, base); 722 Address index = adr.index(); 723 assert(index._disp == 0, "must not have disp"); // maybe it can? 724 Address array(rscratch1, index._index, index._scale, index._disp); 725 return array; 726 } 727 728 int MacroAssembler::biased_locking_enter(Register lock_reg, 729 Register obj_reg, 730 Register swap_reg, 731 Register tmp_reg, 732 bool swap_reg_contains_mark, 733 Label& done, 734 Label* slow_case, 735 BiasedLockingCounters* counters) { 736 assert(UseBiasedLocking, "why call this otherwise?"); 737 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 738 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 739 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 740 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 741 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 742 Address saved_mark_addr(lock_reg, 0); 743 744 if (PrintBiasedLockingStatistics && counters == NULL) 745 counters = BiasedLocking::counters(); 746 747 // Biased locking 748 // See whether the lock is currently biased toward our thread and 749 // whether the epoch is still valid 750 // Note that the runtime guarantees sufficient alignment of JavaThread 751 // pointers to allow age to be placed into low bits 752 // First check to see whether biasing is even enabled for this object 753 Label cas_label; 754 int null_check_offset = -1; 755 if (!swap_reg_contains_mark) { 756 null_check_offset = offset(); 757 movq(swap_reg, mark_addr); 758 } 759 movq(tmp_reg, swap_reg); 760 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 761 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 762 jcc(Assembler::notEqual, cas_label); 763 // The bias pattern is present in the object's header. Need to check 764 // whether the bias owner and the epoch are both still current. 765 load_prototype_header(tmp_reg, obj_reg); 766 orq(tmp_reg, r15_thread); 767 xorq(tmp_reg, swap_reg); 768 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 769 if (counters != NULL) { 770 cond_inc32(Assembler::zero, 771 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 772 } 773 jcc(Assembler::equal, done); 774 775 Label try_revoke_bias; 776 Label try_rebias; 777 778 // At this point we know that the header has the bias pattern and 779 // that we are not the bias owner in the current epoch. We need to 780 // figure out more details about the state of the header in order to 781 // know what operations can be legally performed on the object's 782 // header. 783 784 // If the low three bits in the xor result aren't clear, that means 785 // the prototype header is no longer biased and we have to revoke 786 // the bias on this object. 787 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 788 jcc(Assembler::notZero, try_revoke_bias); 789 790 // Biasing is still enabled for this data type. See whether the 791 // epoch of the current bias is still valid, meaning that the epoch 792 // bits of the mark word are equal to the epoch bits of the 793 // prototype header. (Note that the prototype header's epoch bits 794 // only change at a safepoint.) If not, attempt to rebias the object 795 // toward the current thread. Note that we must be absolutely sure 796 // that the current epoch is invalid in order to do this because 797 // otherwise the manipulations it performs on the mark word are 798 // illegal. 799 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 800 jcc(Assembler::notZero, try_rebias); 801 802 // The epoch of the current bias is still valid but we know nothing 803 // about the owner; it might be set or it might be clear. Try to 804 // acquire the bias of the object using an atomic operation. If this 805 // fails we will go in to the runtime to revoke the object's bias. 806 // Note that we first construct the presumed unbiased header so we 807 // don't accidentally blow away another thread's valid bias. 808 andq(swap_reg, 809 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 810 movq(tmp_reg, swap_reg); 811 orq(tmp_reg, r15_thread); 812 if (os::is_MP()) { 813 lock(); 814 } 815 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 816 // If the biasing toward our thread failed, this means that 817 // another thread succeeded in biasing it toward itself and we 818 // need to revoke that bias. The revocation will occur in the 819 // interpreter runtime in the slow case. 820 if (counters != NULL) { 821 cond_inc32(Assembler::zero, 822 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 823 } 824 if (slow_case != NULL) { 825 jcc(Assembler::notZero, *slow_case); 826 } 827 jmp(done); 828 829 bind(try_rebias); 830 // At this point we know the epoch has expired, meaning that the 831 // current "bias owner", if any, is actually invalid. Under these 832 // circumstances _only_, we are allowed to use the current header's 833 // value as the comparison value when doing the cas to acquire the 834 // bias in the current epoch. In other words, we allow transfer of 835 // the bias from one thread to another directly in this situation. 836 // 837 // FIXME: due to a lack of registers we currently blow away the age 838 // bits in this situation. Should attempt to preserve them. 839 load_prototype_header(tmp_reg, obj_reg); 840 orq(tmp_reg, r15_thread); 841 if (os::is_MP()) { 842 lock(); 843 } 844 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 845 // If the biasing toward our thread failed, then another thread 846 // succeeded in biasing it toward itself and we need to revoke that 847 // bias. The revocation will occur in the runtime in the slow case. 848 if (counters != NULL) { 849 cond_inc32(Assembler::zero, 850 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 851 } 852 if (slow_case != NULL) { 853 jcc(Assembler::notZero, *slow_case); 854 } 855 jmp(done); 856 857 bind(try_revoke_bias); 858 // The prototype mark in the klass doesn't have the bias bit set any 859 // more, indicating that objects of this data type are not supposed 860 // to be biased any more. We are going to try to reset the mark of 861 // this object to the prototype value and fall through to the 862 // CAS-based locking scheme. Note that if our CAS fails, it means 863 // that another thread raced us for the privilege of revoking the 864 // bias of this particular object, so it's okay to continue in the 865 // normal locking code. 866 // 867 // FIXME: due to a lack of registers we currently blow away the age 868 // bits in this situation. Should attempt to preserve them. 869 load_prototype_header(tmp_reg, obj_reg); 870 if (os::is_MP()) { 871 lock(); 872 } 873 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 874 // Fall through to the normal CAS-based lock, because no matter what 875 // the result of the above CAS, some thread must have succeeded in 876 // removing the bias bit from the object's header. 877 if (counters != NULL) { 878 cond_inc32(Assembler::zero, 879 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 880 } 881 882 bind(cas_label); 883 884 return null_check_offset; 885 } 886 887 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 888 Label L, E; 889 890 #ifdef _WIN64 891 // Windows always allocates space for it's register args 892 assert(num_args <= 4, "only register arguments supported"); 893 subq(rsp, frame::arg_reg_save_area_bytes); 894 #endif 895 896 // Align stack if necessary 897 testl(rsp, 15); 898 jcc(Assembler::zero, L); 899 900 subq(rsp, 8); 901 { 902 call(RuntimeAddress(entry_point)); 903 } 904 addq(rsp, 8); 905 jmp(E); 906 907 bind(L); 908 { 909 call(RuntimeAddress(entry_point)); 910 } 911 912 bind(E); 913 914 #ifdef _WIN64 915 // restore stack pointer 916 addq(rsp, frame::arg_reg_save_area_bytes); 917 #endif 918 919 } 920 921 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 922 assert(!src2.is_lval(), "should use cmpptr"); 923 924 if (reachable(src2)) { 925 cmpq(src1, as_Address(src2)); 926 } else { 927 lea(rscratch1, src2); 928 Assembler::cmpq(src1, Address(rscratch1, 0)); 929 } 930 } 931 932 int MacroAssembler::corrected_idivq(Register reg) { 933 // Full implementation of Java ldiv and lrem; checks for special 934 // case as described in JVM spec., p.243 & p.271. The function 935 // returns the (pc) offset of the idivl instruction - may be needed 936 // for implicit exceptions. 937 // 938 // normal case special case 939 // 940 // input : rax: dividend min_long 941 // reg: divisor (may not be eax/edx) -1 942 // 943 // output: rax: quotient (= rax idiv reg) min_long 944 // rdx: remainder (= rax irem reg) 0 945 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 946 static const int64_t min_long = 0x8000000000000000; 947 Label normal_case, special_case; 948 949 // check for special case 950 cmp64(rax, ExternalAddress((address) &min_long)); 951 jcc(Assembler::notEqual, normal_case); 952 xorl(rdx, rdx); // prepare rdx for possible special case (where 953 // remainder = 0) 954 cmpq(reg, -1); 955 jcc(Assembler::equal, special_case); 956 957 // handle normal case 958 bind(normal_case); 959 cdqq(); 960 int idivq_offset = offset(); 961 idivq(reg); 962 963 // normal and special case exit 964 bind(special_case); 965 966 return idivq_offset; 967 } 968 969 void MacroAssembler::decrementq(Register reg, int value) { 970 if (value == min_jint) { subq(reg, value); return; } 971 if (value < 0) { incrementq(reg, -value); return; } 972 if (value == 0) { ; return; } 973 if (value == 1 && UseIncDec) { decq(reg) ; return; } 974 /* else */ { subq(reg, value) ; return; } 975 } 976 977 void MacroAssembler::decrementq(Address dst, int value) { 978 if (value == min_jint) { subq(dst, value); return; } 979 if (value < 0) { incrementq(dst, -value); return; } 980 if (value == 0) { ; return; } 981 if (value == 1 && UseIncDec) { decq(dst) ; return; } 982 /* else */ { subq(dst, value) ; return; } 983 } 984 985 void MacroAssembler::incrementq(Register reg, int value) { 986 if (value == min_jint) { addq(reg, value); return; } 987 if (value < 0) { decrementq(reg, -value); return; } 988 if (value == 0) { ; return; } 989 if (value == 1 && UseIncDec) { incq(reg) ; return; } 990 /* else */ { addq(reg, value) ; return; } 991 } 992 993 void MacroAssembler::incrementq(Address dst, int value) { 994 if (value == min_jint) { addq(dst, value); return; } 995 if (value < 0) { decrementq(dst, -value); return; } 996 if (value == 0) { ; return; } 997 if (value == 1 && UseIncDec) { incq(dst) ; return; } 998 /* else */ { addq(dst, value) ; return; } 999 } 1000 1001 // 32bit can do a case table jump in one instruction but we no longer allow the base 1002 // to be installed in the Address class 1003 void MacroAssembler::jump(ArrayAddress entry) { 1004 lea(rscratch1, entry.base()); 1005 Address dispatch = entry.index(); 1006 assert(dispatch._base == noreg, "must be"); 1007 dispatch._base = rscratch1; 1008 jmp(dispatch); 1009 } 1010 1011 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 1012 ShouldNotReachHere(); // 64bit doesn't use two regs 1013 cmpq(x_lo, y_lo); 1014 } 1015 1016 void MacroAssembler::lea(Register dst, AddressLiteral src) { 1017 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 1018 } 1019 1020 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 1021 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 1022 movptr(dst, rscratch1); 1023 } 1024 1025 void MacroAssembler::leave() { 1026 // %%% is this really better? Why not on 32bit too? 1027 emit_int8((unsigned char)0xC9); // LEAVE 1028 } 1029 1030 void MacroAssembler::lneg(Register hi, Register lo) { 1031 ShouldNotReachHere(); // 64bit doesn't use two regs 1032 negq(lo); 1033 } 1034 1035 void MacroAssembler::movoop(Register dst, jobject obj) { 1036 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 1037 } 1038 1039 void MacroAssembler::movoop(Address dst, jobject obj) { 1040 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 1041 movq(dst, rscratch1); 1042 } 1043 1044 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 1045 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 1046 } 1047 1048 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 1049 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 1050 movq(dst, rscratch1); 1051 } 1052 1053 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 1054 if (src.is_lval()) { 1055 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 1056 } else { 1057 if (reachable(src)) { 1058 movq(dst, as_Address(src)); 1059 } else { 1060 lea(rscratch1, src); 1061 movq(dst, Address(rscratch1,0)); 1062 } 1063 } 1064 } 1065 1066 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 1067 movq(as_Address(dst), src); 1068 } 1069 1070 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 1071 movq(dst, as_Address(src)); 1072 } 1073 1074 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 1075 void MacroAssembler::movptr(Address dst, intptr_t src) { 1076 mov64(rscratch1, src); 1077 movq(dst, rscratch1); 1078 } 1079 1080 // These are mostly for initializing NULL 1081 void MacroAssembler::movptr(Address dst, int32_t src) { 1082 movslq(dst, src); 1083 } 1084 1085 void MacroAssembler::movptr(Register dst, int32_t src) { 1086 mov64(dst, (intptr_t)src); 1087 } 1088 1089 void MacroAssembler::pushoop(jobject obj) { 1090 movoop(rscratch1, obj); 1091 push(rscratch1); 1092 } 1093 1094 void MacroAssembler::pushklass(Metadata* obj) { 1095 mov_metadata(rscratch1, obj); 1096 push(rscratch1); 1097 } 1098 1099 void MacroAssembler::pushptr(AddressLiteral src) { 1100 lea(rscratch1, src); 1101 if (src.is_lval()) { 1102 push(rscratch1); 1103 } else { 1104 pushq(Address(rscratch1, 0)); 1105 } 1106 } 1107 1108 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 1109 bool clear_pc) { 1110 // we must set sp to zero to clear frame 1111 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 1112 // must clear fp, so that compiled frames are not confused; it is 1113 // possible that we need it only for debugging 1114 if (clear_fp) { 1115 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 1116 } 1117 1118 if (clear_pc) { 1119 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 1120 } 1121 } 1122 1123 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 1124 Register last_java_fp, 1125 address last_java_pc) { 1126 // determine last_java_sp register 1127 if (!last_java_sp->is_valid()) { 1128 last_java_sp = rsp; 1129 } 1130 1131 // last_java_fp is optional 1132 if (last_java_fp->is_valid()) { 1133 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 1134 last_java_fp); 1135 } 1136 1137 // last_java_pc is optional 1138 if (last_java_pc != NULL) { 1139 Address java_pc(r15_thread, 1140 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 1141 lea(rscratch1, InternalAddress(last_java_pc)); 1142 movptr(java_pc, rscratch1); 1143 } 1144 1145 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 1146 } 1147 1148 static void pass_arg0(MacroAssembler* masm, Register arg) { 1149 if (c_rarg0 != arg ) { 1150 masm->mov(c_rarg0, arg); 1151 } 1152 } 1153 1154 static void pass_arg1(MacroAssembler* masm, Register arg) { 1155 if (c_rarg1 != arg ) { 1156 masm->mov(c_rarg1, arg); 1157 } 1158 } 1159 1160 static void pass_arg2(MacroAssembler* masm, Register arg) { 1161 if (c_rarg2 != arg ) { 1162 masm->mov(c_rarg2, arg); 1163 } 1164 } 1165 1166 static void pass_arg3(MacroAssembler* masm, Register arg) { 1167 if (c_rarg3 != arg ) { 1168 masm->mov(c_rarg3, arg); 1169 } 1170 } 1171 1172 void MacroAssembler::stop(const char* msg) { 1173 address rip = pc(); 1174 pusha(); // get regs on stack 1175 lea(c_rarg0, ExternalAddress((address) msg)); 1176 lea(c_rarg1, InternalAddress(rip)); 1177 movq(c_rarg2, rsp); // pass pointer to regs array 1178 andq(rsp, -16); // align stack as required by ABI 1179 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 1180 hlt(); 1181 } 1182 1183 void MacroAssembler::warn(const char* msg) { 1184 push(rbp); 1185 movq(rbp, rsp); 1186 andq(rsp, -16); // align stack as required by push_CPU_state and call 1187 push_CPU_state(); // keeps alignment at 16 bytes 1188 lea(c_rarg0, ExternalAddress((address) msg)); 1189 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 1190 pop_CPU_state(); 1191 mov(rsp, rbp); 1192 pop(rbp); 1193 } 1194 1195 void MacroAssembler::print_state() { 1196 address rip = pc(); 1197 pusha(); // get regs on stack 1198 push(rbp); 1199 movq(rbp, rsp); 1200 andq(rsp, -16); // align stack as required by push_CPU_state and call 1201 push_CPU_state(); // keeps alignment at 16 bytes 1202 1203 lea(c_rarg0, InternalAddress(rip)); 1204 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array 1205 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); 1206 1207 pop_CPU_state(); 1208 mov(rsp, rbp); 1209 pop(rbp); 1210 popa(); 1211 } 1212 1213 #ifndef PRODUCT 1214 extern "C" void findpc(intptr_t x); 1215 #endif 1216 1217 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 1218 // In order to get locks to work, we need to fake a in_VM state 1219 if (ShowMessageBoxOnError) { 1220 JavaThread* thread = JavaThread::current(); 1221 JavaThreadState saved_state = thread->thread_state(); 1222 thread->set_thread_state(_thread_in_vm); 1223 #ifndef PRODUCT 1224 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1225 ttyLocker ttyl; 1226 BytecodeCounter::print(); 1227 } 1228 #endif 1229 // To see where a verify_oop failed, get $ebx+40/X for this frame. 1230 // XXX correct this offset for amd64 1231 // This is the value of eip which points to where verify_oop will return. 1232 if (os::message_box(msg, "Execution stopped, print registers?")) { 1233 print_state64(pc, regs); 1234 BREAKPOINT; 1235 assert(false, "start up GDB"); 1236 } 1237 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 1238 } else { 1239 ttyLocker ttyl; 1240 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 1241 msg); 1242 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 1243 } 1244 } 1245 1246 void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { 1247 ttyLocker ttyl; 1248 FlagSetting fs(Debugging, true); 1249 tty->print_cr("rip = 0x%016lx", pc); 1250 #ifndef PRODUCT 1251 tty->cr(); 1252 findpc(pc); 1253 tty->cr(); 1254 #endif 1255 #define PRINT_REG(rax, value) \ 1256 { tty->print("%s = ", #rax); os::print_location(tty, value); } 1257 PRINT_REG(rax, regs[15]); 1258 PRINT_REG(rbx, regs[12]); 1259 PRINT_REG(rcx, regs[14]); 1260 PRINT_REG(rdx, regs[13]); 1261 PRINT_REG(rdi, regs[8]); 1262 PRINT_REG(rsi, regs[9]); 1263 PRINT_REG(rbp, regs[10]); 1264 PRINT_REG(rsp, regs[11]); 1265 PRINT_REG(r8 , regs[7]); 1266 PRINT_REG(r9 , regs[6]); 1267 PRINT_REG(r10, regs[5]); 1268 PRINT_REG(r11, regs[4]); 1269 PRINT_REG(r12, regs[3]); 1270 PRINT_REG(r13, regs[2]); 1271 PRINT_REG(r14, regs[1]); 1272 PRINT_REG(r15, regs[0]); 1273 #undef PRINT_REG 1274 // Print some words near top of staack. 1275 int64_t* rsp = (int64_t*) regs[11]; 1276 int64_t* dump_sp = rsp; 1277 for (int col1 = 0; col1 < 8; col1++) { 1278 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 1279 os::print_location(tty, *dump_sp++); 1280 } 1281 for (int row = 0; row < 25; row++) { 1282 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 1283 for (int col = 0; col < 4; col++) { 1284 tty->print(" 0x%016lx", *dump_sp++); 1285 } 1286 tty->cr(); 1287 } 1288 // Print some instructions around pc: 1289 Disassembler::decode((address)pc-64, (address)pc); 1290 tty->print_cr("--------"); 1291 Disassembler::decode((address)pc, (address)pc+32); 1292 } 1293 1294 #endif // _LP64 1295 1296 // Now versions that are common to 32/64 bit 1297 1298 void MacroAssembler::addptr(Register dst, int32_t imm32) { 1299 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 1300 } 1301 1302 void MacroAssembler::addptr(Register dst, Register src) { 1303 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 1304 } 1305 1306 void MacroAssembler::addptr(Address dst, Register src) { 1307 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 1308 } 1309 1310 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 1311 if (reachable(src)) { 1312 Assembler::addsd(dst, as_Address(src)); 1313 } else { 1314 lea(rscratch1, src); 1315 Assembler::addsd(dst, Address(rscratch1, 0)); 1316 } 1317 } 1318 1319 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 1320 if (reachable(src)) { 1321 addss(dst, as_Address(src)); 1322 } else { 1323 lea(rscratch1, src); 1324 addss(dst, Address(rscratch1, 0)); 1325 } 1326 } 1327 1328 void MacroAssembler::align(int modulus) { 1329 if (offset() % modulus != 0) { 1330 nop(modulus - (offset() % modulus)); 1331 } 1332 } 1333 1334 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 1335 // Used in sign-masking with aligned address. 1336 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 1337 if (reachable(src)) { 1338 Assembler::andpd(dst, as_Address(src)); 1339 } else { 1340 lea(rscratch1, src); 1341 Assembler::andpd(dst, Address(rscratch1, 0)); 1342 } 1343 } 1344 1345 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 1346 // Used in sign-masking with aligned address. 1347 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 1348 if (reachable(src)) { 1349 Assembler::andps(dst, as_Address(src)); 1350 } else { 1351 lea(rscratch1, src); 1352 Assembler::andps(dst, Address(rscratch1, 0)); 1353 } 1354 } 1355 1356 void MacroAssembler::andptr(Register dst, int32_t imm32) { 1357 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 1358 } 1359 1360 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 1361 pushf(); 1362 if (os::is_MP()) 1363 lock(); 1364 incrementl(counter_addr); 1365 popf(); 1366 } 1367 1368 // Writes to stack successive pages until offset reached to check for 1369 // stack overflow + shadow pages. This clobbers tmp. 1370 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 1371 movptr(tmp, rsp); 1372 // Bang stack for total size given plus shadow page size. 1373 // Bang one page at a time because large size can bang beyond yellow and 1374 // red zones. 1375 Label loop; 1376 bind(loop); 1377 movl(Address(tmp, (-os::vm_page_size())), size ); 1378 subptr(tmp, os::vm_page_size()); 1379 subl(size, os::vm_page_size()); 1380 jcc(Assembler::greater, loop); 1381 1382 // Bang down shadow pages too. 1383 // The -1 because we already subtracted 1 page. 1384 for (int i = 0; i< StackShadowPages-1; i++) { 1385 // this could be any sized move but this is can be a debugging crumb 1386 // so the bigger the better. 1387 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 1388 } 1389 } 1390 1391 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 1392 assert(UseBiasedLocking, "why call this otherwise?"); 1393 1394 // Check for biased locking unlock case, which is a no-op 1395 // Note: we do not have to check the thread ID for two reasons. 1396 // First, the interpreter checks for IllegalMonitorStateException at 1397 // a higher level. Second, if the bias was revoked while we held the 1398 // lock, the object could not be rebiased toward another thread, so 1399 // the bias bit would be clear. 1400 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1401 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 1402 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 1403 jcc(Assembler::equal, done); 1404 } 1405 1406 void MacroAssembler::c2bool(Register x) { 1407 // implements x == 0 ? 0 : 1 1408 // note: must only look at least-significant byte of x 1409 // since C-style booleans are stored in one byte 1410 // only! (was bug) 1411 andl(x, 0xFF); 1412 setb(Assembler::notZero, x); 1413 } 1414 1415 // Wouldn't need if AddressLiteral version had new name 1416 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 1417 Assembler::call(L, rtype); 1418 } 1419 1420 void MacroAssembler::call(Register entry) { 1421 Assembler::call(entry); 1422 } 1423 1424 void MacroAssembler::call(AddressLiteral entry) { 1425 if (reachable(entry)) { 1426 Assembler::call_literal(entry.target(), entry.rspec()); 1427 } else { 1428 lea(rscratch1, entry); 1429 Assembler::call(rscratch1); 1430 } 1431 } 1432 1433 void MacroAssembler::ic_call(address entry) { 1434 RelocationHolder rh = virtual_call_Relocation::spec(pc()); 1435 movptr(rax, (intptr_t)Universe::non_oop_word()); 1436 call(AddressLiteral(entry, rh)); 1437 } 1438 1439 // Implementation of call_VM versions 1440 1441 void MacroAssembler::call_VM(Register oop_result, 1442 address entry_point, 1443 bool check_exceptions) { 1444 Label C, E; 1445 call(C, relocInfo::none); 1446 jmp(E); 1447 1448 bind(C); 1449 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 1450 ret(0); 1451 1452 bind(E); 1453 } 1454 1455 void MacroAssembler::call_VM(Register oop_result, 1456 address entry_point, 1457 Register arg_1, 1458 bool check_exceptions) { 1459 Label C, E; 1460 call(C, relocInfo::none); 1461 jmp(E); 1462 1463 bind(C); 1464 pass_arg1(this, arg_1); 1465 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 1466 ret(0); 1467 1468 bind(E); 1469 } 1470 1471 void MacroAssembler::call_VM(Register oop_result, 1472 address entry_point, 1473 Register arg_1, 1474 Register arg_2, 1475 bool check_exceptions) { 1476 Label C, E; 1477 call(C, relocInfo::none); 1478 jmp(E); 1479 1480 bind(C); 1481 1482 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1483 1484 pass_arg2(this, arg_2); 1485 pass_arg1(this, arg_1); 1486 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 1487 ret(0); 1488 1489 bind(E); 1490 } 1491 1492 void MacroAssembler::call_VM(Register oop_result, 1493 address entry_point, 1494 Register arg_1, 1495 Register arg_2, 1496 Register arg_3, 1497 bool check_exceptions) { 1498 Label C, E; 1499 call(C, relocInfo::none); 1500 jmp(E); 1501 1502 bind(C); 1503 1504 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1505 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1506 pass_arg3(this, arg_3); 1507 1508 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1509 pass_arg2(this, arg_2); 1510 1511 pass_arg1(this, arg_1); 1512 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 1513 ret(0); 1514 1515 bind(E); 1516 } 1517 1518 void MacroAssembler::call_VM(Register oop_result, 1519 Register last_java_sp, 1520 address entry_point, 1521 int number_of_arguments, 1522 bool check_exceptions) { 1523 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 1524 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1525 } 1526 1527 void MacroAssembler::call_VM(Register oop_result, 1528 Register last_java_sp, 1529 address entry_point, 1530 Register arg_1, 1531 bool check_exceptions) { 1532 pass_arg1(this, arg_1); 1533 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1534 } 1535 1536 void MacroAssembler::call_VM(Register oop_result, 1537 Register last_java_sp, 1538 address entry_point, 1539 Register arg_1, 1540 Register arg_2, 1541 bool check_exceptions) { 1542 1543 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1544 pass_arg2(this, arg_2); 1545 pass_arg1(this, arg_1); 1546 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1547 } 1548 1549 void MacroAssembler::call_VM(Register oop_result, 1550 Register last_java_sp, 1551 address entry_point, 1552 Register arg_1, 1553 Register arg_2, 1554 Register arg_3, 1555 bool check_exceptions) { 1556 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1557 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1558 pass_arg3(this, arg_3); 1559 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1560 pass_arg2(this, arg_2); 1561 pass_arg1(this, arg_1); 1562 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1563 } 1564 1565 void MacroAssembler::super_call_VM(Register oop_result, 1566 Register last_java_sp, 1567 address entry_point, 1568 int number_of_arguments, 1569 bool check_exceptions) { 1570 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 1571 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1572 } 1573 1574 void MacroAssembler::super_call_VM(Register oop_result, 1575 Register last_java_sp, 1576 address entry_point, 1577 Register arg_1, 1578 bool check_exceptions) { 1579 pass_arg1(this, arg_1); 1580 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1581 } 1582 1583 void MacroAssembler::super_call_VM(Register oop_result, 1584 Register last_java_sp, 1585 address entry_point, 1586 Register arg_1, 1587 Register arg_2, 1588 bool check_exceptions) { 1589 1590 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1591 pass_arg2(this, arg_2); 1592 pass_arg1(this, arg_1); 1593 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1594 } 1595 1596 void MacroAssembler::super_call_VM(Register oop_result, 1597 Register last_java_sp, 1598 address entry_point, 1599 Register arg_1, 1600 Register arg_2, 1601 Register arg_3, 1602 bool check_exceptions) { 1603 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1604 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1605 pass_arg3(this, arg_3); 1606 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1607 pass_arg2(this, arg_2); 1608 pass_arg1(this, arg_1); 1609 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1610 } 1611 1612 void MacroAssembler::call_VM_base(Register oop_result, 1613 Register java_thread, 1614 Register last_java_sp, 1615 address entry_point, 1616 int number_of_arguments, 1617 bool check_exceptions) { 1618 // determine java_thread register 1619 if (!java_thread->is_valid()) { 1620 #ifdef _LP64 1621 java_thread = r15_thread; 1622 #else 1623 java_thread = rdi; 1624 get_thread(java_thread); 1625 #endif // LP64 1626 } 1627 // determine last_java_sp register 1628 if (!last_java_sp->is_valid()) { 1629 last_java_sp = rsp; 1630 } 1631 // debugging support 1632 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 1633 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 1634 #ifdef ASSERT 1635 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 1636 // r12 is the heapbase. 1637 LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) 1638 #endif // ASSERT 1639 1640 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 1641 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 1642 1643 // push java thread (becomes first argument of C function) 1644 1645 NOT_LP64(push(java_thread); number_of_arguments++); 1646 LP64_ONLY(mov(c_rarg0, r15_thread)); 1647 1648 // set last Java frame before call 1649 assert(last_java_sp != rbp, "can't use ebp/rbp"); 1650 1651 // Only interpreter should have to set fp 1652 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 1653 1654 // do the call, remove parameters 1655 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 1656 1657 // restore the thread (cannot use the pushed argument since arguments 1658 // may be overwritten by C code generated by an optimizing compiler); 1659 // however can use the register value directly if it is callee saved. 1660 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 1661 // rdi & rsi (also r15) are callee saved -> nothing to do 1662 #ifdef ASSERT 1663 guarantee(java_thread != rax, "change this code"); 1664 push(rax); 1665 { Label L; 1666 get_thread(rax); 1667 cmpptr(java_thread, rax); 1668 jcc(Assembler::equal, L); 1669 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); 1670 bind(L); 1671 } 1672 pop(rax); 1673 #endif 1674 } else { 1675 get_thread(java_thread); 1676 } 1677 // reset last Java frame 1678 // Only interpreter should have to clear fp 1679 reset_last_Java_frame(java_thread, true, false); 1680 1681 #ifndef CC_INTERP 1682 // C++ interp handles this in the interpreter 1683 check_and_handle_popframe(java_thread); 1684 check_and_handle_earlyret(java_thread); 1685 #endif /* CC_INTERP */ 1686 1687 if (check_exceptions) { 1688 // check for pending exceptions (java_thread is set upon return) 1689 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 1690 #ifndef _LP64 1691 jump_cc(Assembler::notEqual, 1692 RuntimeAddress(StubRoutines::forward_exception_entry())); 1693 #else 1694 // This used to conditionally jump to forward_exception however it is 1695 // possible if we relocate that the branch will not reach. So we must jump 1696 // around so we can always reach 1697 1698 Label ok; 1699 jcc(Assembler::equal, ok); 1700 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1701 bind(ok); 1702 #endif // LP64 1703 } 1704 1705 // get oop result if there is one and reset the value in the thread 1706 if (oop_result->is_valid()) { 1707 get_vm_result(oop_result, java_thread); 1708 } 1709 } 1710 1711 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 1712 1713 // Calculate the value for last_Java_sp 1714 // somewhat subtle. call_VM does an intermediate call 1715 // which places a return address on the stack just under the 1716 // stack pointer as the user finsihed with it. This allows 1717 // use to retrieve last_Java_pc from last_Java_sp[-1]. 1718 // On 32bit we then have to push additional args on the stack to accomplish 1719 // the actual requested call. On 64bit call_VM only can use register args 1720 // so the only extra space is the return address that call_VM created. 1721 // This hopefully explains the calculations here. 1722 1723 #ifdef _LP64 1724 // We've pushed one address, correct last_Java_sp 1725 lea(rax, Address(rsp, wordSize)); 1726 #else 1727 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 1728 #endif // LP64 1729 1730 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 1731 1732 } 1733 1734 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 1735 call_VM_leaf_base(entry_point, number_of_arguments); 1736 } 1737 1738 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 1739 pass_arg0(this, arg_0); 1740 call_VM_leaf(entry_point, 1); 1741 } 1742 1743 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1744 1745 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1746 pass_arg1(this, arg_1); 1747 pass_arg0(this, arg_0); 1748 call_VM_leaf(entry_point, 2); 1749 } 1750 1751 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1752 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1753 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1754 pass_arg2(this, arg_2); 1755 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1756 pass_arg1(this, arg_1); 1757 pass_arg0(this, arg_0); 1758 call_VM_leaf(entry_point, 3); 1759 } 1760 1761 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 1762 pass_arg0(this, arg_0); 1763 MacroAssembler::call_VM_leaf_base(entry_point, 1); 1764 } 1765 1766 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1767 1768 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1769 pass_arg1(this, arg_1); 1770 pass_arg0(this, arg_0); 1771 MacroAssembler::call_VM_leaf_base(entry_point, 2); 1772 } 1773 1774 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1775 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1776 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1777 pass_arg2(this, arg_2); 1778 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1779 pass_arg1(this, arg_1); 1780 pass_arg0(this, arg_0); 1781 MacroAssembler::call_VM_leaf_base(entry_point, 3); 1782 } 1783 1784 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 1785 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 1786 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1787 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1788 pass_arg3(this, arg_3); 1789 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1790 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1791 pass_arg2(this, arg_2); 1792 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1793 pass_arg1(this, arg_1); 1794 pass_arg0(this, arg_0); 1795 MacroAssembler::call_VM_leaf_base(entry_point, 4); 1796 } 1797 1798 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 1799 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 1800 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 1801 verify_oop(oop_result, "broken oop in call_VM_base"); 1802 } 1803 1804 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 1805 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 1806 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); 1807 } 1808 1809 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 1810 } 1811 1812 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 1813 } 1814 1815 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 1816 if (reachable(src1)) { 1817 cmpl(as_Address(src1), imm); 1818 } else { 1819 lea(rscratch1, src1); 1820 cmpl(Address(rscratch1, 0), imm); 1821 } 1822 } 1823 1824 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 1825 assert(!src2.is_lval(), "use cmpptr"); 1826 if (reachable(src2)) { 1827 cmpl(src1, as_Address(src2)); 1828 } else { 1829 lea(rscratch1, src2); 1830 cmpl(src1, Address(rscratch1, 0)); 1831 } 1832 } 1833 1834 void MacroAssembler::cmp32(Register src1, int32_t imm) { 1835 Assembler::cmpl(src1, imm); 1836 } 1837 1838 void MacroAssembler::cmp32(Register src1, Address src2) { 1839 Assembler::cmpl(src1, src2); 1840 } 1841 1842 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1843 ucomisd(opr1, opr2); 1844 1845 Label L; 1846 if (unordered_is_less) { 1847 movl(dst, -1); 1848 jcc(Assembler::parity, L); 1849 jcc(Assembler::below , L); 1850 movl(dst, 0); 1851 jcc(Assembler::equal , L); 1852 increment(dst); 1853 } else { // unordered is greater 1854 movl(dst, 1); 1855 jcc(Assembler::parity, L); 1856 jcc(Assembler::above , L); 1857 movl(dst, 0); 1858 jcc(Assembler::equal , L); 1859 decrementl(dst); 1860 } 1861 bind(L); 1862 } 1863 1864 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1865 ucomiss(opr1, opr2); 1866 1867 Label L; 1868 if (unordered_is_less) { 1869 movl(dst, -1); 1870 jcc(Assembler::parity, L); 1871 jcc(Assembler::below , L); 1872 movl(dst, 0); 1873 jcc(Assembler::equal , L); 1874 increment(dst); 1875 } else { // unordered is greater 1876 movl(dst, 1); 1877 jcc(Assembler::parity, L); 1878 jcc(Assembler::above , L); 1879 movl(dst, 0); 1880 jcc(Assembler::equal , L); 1881 decrementl(dst); 1882 } 1883 bind(L); 1884 } 1885 1886 1887 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 1888 if (reachable(src1)) { 1889 cmpb(as_Address(src1), imm); 1890 } else { 1891 lea(rscratch1, src1); 1892 cmpb(Address(rscratch1, 0), imm); 1893 } 1894 } 1895 1896 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 1897 #ifdef _LP64 1898 if (src2.is_lval()) { 1899 movptr(rscratch1, src2); 1900 Assembler::cmpq(src1, rscratch1); 1901 } else if (reachable(src2)) { 1902 cmpq(src1, as_Address(src2)); 1903 } else { 1904 lea(rscratch1, src2); 1905 Assembler::cmpq(src1, Address(rscratch1, 0)); 1906 } 1907 #else 1908 if (src2.is_lval()) { 1909 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1910 } else { 1911 cmpl(src1, as_Address(src2)); 1912 } 1913 #endif // _LP64 1914 } 1915 1916 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 1917 assert(src2.is_lval(), "not a mem-mem compare"); 1918 #ifdef _LP64 1919 // moves src2's literal address 1920 movptr(rscratch1, src2); 1921 Assembler::cmpq(src1, rscratch1); 1922 #else 1923 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1924 #endif // _LP64 1925 } 1926 1927 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 1928 if (reachable(adr)) { 1929 if (os::is_MP()) 1930 lock(); 1931 cmpxchgptr(reg, as_Address(adr)); 1932 } else { 1933 lea(rscratch1, adr); 1934 if (os::is_MP()) 1935 lock(); 1936 cmpxchgptr(reg, Address(rscratch1, 0)); 1937 } 1938 } 1939 1940 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 1941 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 1942 } 1943 1944 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 1945 if (reachable(src)) { 1946 Assembler::comisd(dst, as_Address(src)); 1947 } else { 1948 lea(rscratch1, src); 1949 Assembler::comisd(dst, Address(rscratch1, 0)); 1950 } 1951 } 1952 1953 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 1954 if (reachable(src)) { 1955 Assembler::comiss(dst, as_Address(src)); 1956 } else { 1957 lea(rscratch1, src); 1958 Assembler::comiss(dst, Address(rscratch1, 0)); 1959 } 1960 } 1961 1962 1963 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 1964 Condition negated_cond = negate_condition(cond); 1965 Label L; 1966 jcc(negated_cond, L); 1967 atomic_incl(counter_addr); 1968 bind(L); 1969 } 1970 1971 int MacroAssembler::corrected_idivl(Register reg) { 1972 // Full implementation of Java idiv and irem; checks for 1973 // special case as described in JVM spec., p.243 & p.271. 1974 // The function returns the (pc) offset of the idivl 1975 // instruction - may be needed for implicit exceptions. 1976 // 1977 // normal case special case 1978 // 1979 // input : rax,: dividend min_int 1980 // reg: divisor (may not be rax,/rdx) -1 1981 // 1982 // output: rax,: quotient (= rax, idiv reg) min_int 1983 // rdx: remainder (= rax, irem reg) 0 1984 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 1985 const int min_int = 0x80000000; 1986 Label normal_case, special_case; 1987 1988 // check for special case 1989 cmpl(rax, min_int); 1990 jcc(Assembler::notEqual, normal_case); 1991 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 1992 cmpl(reg, -1); 1993 jcc(Assembler::equal, special_case); 1994 1995 // handle normal case 1996 bind(normal_case); 1997 cdql(); 1998 int idivl_offset = offset(); 1999 idivl(reg); 2000 2001 // normal and special case exit 2002 bind(special_case); 2003 2004 return idivl_offset; 2005 } 2006 2007 2008 2009 void MacroAssembler::decrementl(Register reg, int value) { 2010 if (value == min_jint) {subl(reg, value) ; return; } 2011 if (value < 0) { incrementl(reg, -value); return; } 2012 if (value == 0) { ; return; } 2013 if (value == 1 && UseIncDec) { decl(reg) ; return; } 2014 /* else */ { subl(reg, value) ; return; } 2015 } 2016 2017 void MacroAssembler::decrementl(Address dst, int value) { 2018 if (value == min_jint) {subl(dst, value) ; return; } 2019 if (value < 0) { incrementl(dst, -value); return; } 2020 if (value == 0) { ; return; } 2021 if (value == 1 && UseIncDec) { decl(dst) ; return; } 2022 /* else */ { subl(dst, value) ; return; } 2023 } 2024 2025 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 2026 assert (shift_value > 0, "illegal shift value"); 2027 Label _is_positive; 2028 testl (reg, reg); 2029 jcc (Assembler::positive, _is_positive); 2030 int offset = (1 << shift_value) - 1 ; 2031 2032 if (offset == 1) { 2033 incrementl(reg); 2034 } else { 2035 addl(reg, offset); 2036 } 2037 2038 bind (_is_positive); 2039 sarl(reg, shift_value); 2040 } 2041 2042 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 2043 if (reachable(src)) { 2044 Assembler::divsd(dst, as_Address(src)); 2045 } else { 2046 lea(rscratch1, src); 2047 Assembler::divsd(dst, Address(rscratch1, 0)); 2048 } 2049 } 2050 2051 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 2052 if (reachable(src)) { 2053 Assembler::divss(dst, as_Address(src)); 2054 } else { 2055 lea(rscratch1, src); 2056 Assembler::divss(dst, Address(rscratch1, 0)); 2057 } 2058 } 2059 2060 // !defined(COMPILER2) is because of stupid core builds 2061 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 2062 void MacroAssembler::empty_FPU_stack() { 2063 if (VM_Version::supports_mmx()) { 2064 emms(); 2065 } else { 2066 for (int i = 8; i-- > 0; ) ffree(i); 2067 } 2068 } 2069 #endif // !LP64 || C1 || !C2 2070 2071 2072 // Defines obj, preserves var_size_in_bytes 2073 void MacroAssembler::eden_allocate(Register obj, 2074 Register var_size_in_bytes, 2075 int con_size_in_bytes, 2076 Register t1, 2077 Label& slow_case) { 2078 assert(obj == rax, "obj must be in rax, for cmpxchg"); 2079 assert_different_registers(obj, var_size_in_bytes, t1); 2080 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 2081 jmp(slow_case); 2082 } else { 2083 Register end = t1; 2084 Label retry; 2085 bind(retry); 2086 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 2087 movptr(obj, heap_top); 2088 if (var_size_in_bytes == noreg) { 2089 lea(end, Address(obj, con_size_in_bytes)); 2090 } else { 2091 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 2092 } 2093 // if end < obj then we wrapped around => object too long => slow case 2094 cmpptr(end, obj); 2095 jcc(Assembler::below, slow_case); 2096 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 2097 jcc(Assembler::above, slow_case); 2098 // Compare obj with the top addr, and if still equal, store the new top addr in 2099 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 2100 // it otherwise. Use lock prefix for atomicity on MPs. 2101 locked_cmpxchgptr(end, heap_top); 2102 jcc(Assembler::notEqual, retry); 2103 } 2104 } 2105 2106 void MacroAssembler::enter() { 2107 push(rbp); 2108 mov(rbp, rsp); 2109 } 2110 2111 // A 5 byte nop that is safe for patching (see patch_verified_entry) 2112 void MacroAssembler::fat_nop() { 2113 if (UseAddressNop) { 2114 addr_nop_5(); 2115 } else { 2116 emit_int8(0x26); // es: 2117 emit_int8(0x2e); // cs: 2118 emit_int8(0x64); // fs: 2119 emit_int8(0x65); // gs: 2120 emit_int8((unsigned char)0x90); 2121 } 2122 } 2123 2124 void MacroAssembler::fcmp(Register tmp) { 2125 fcmp(tmp, 1, true, true); 2126 } 2127 2128 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 2129 assert(!pop_right || pop_left, "usage error"); 2130 if (VM_Version::supports_cmov()) { 2131 assert(tmp == noreg, "unneeded temp"); 2132 if (pop_left) { 2133 fucomip(index); 2134 } else { 2135 fucomi(index); 2136 } 2137 if (pop_right) { 2138 fpop(); 2139 } 2140 } else { 2141 assert(tmp != noreg, "need temp"); 2142 if (pop_left) { 2143 if (pop_right) { 2144 fcompp(); 2145 } else { 2146 fcomp(index); 2147 } 2148 } else { 2149 fcom(index); 2150 } 2151 // convert FPU condition into eflags condition via rax, 2152 save_rax(tmp); 2153 fwait(); fnstsw_ax(); 2154 sahf(); 2155 restore_rax(tmp); 2156 } 2157 // condition codes set as follows: 2158 // 2159 // CF (corresponds to C0) if x < y 2160 // PF (corresponds to C2) if unordered 2161 // ZF (corresponds to C3) if x = y 2162 } 2163 2164 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 2165 fcmp2int(dst, unordered_is_less, 1, true, true); 2166 } 2167 2168 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 2169 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 2170 Label L; 2171 if (unordered_is_less) { 2172 movl(dst, -1); 2173 jcc(Assembler::parity, L); 2174 jcc(Assembler::below , L); 2175 movl(dst, 0); 2176 jcc(Assembler::equal , L); 2177 increment(dst); 2178 } else { // unordered is greater 2179 movl(dst, 1); 2180 jcc(Assembler::parity, L); 2181 jcc(Assembler::above , L); 2182 movl(dst, 0); 2183 jcc(Assembler::equal , L); 2184 decrementl(dst); 2185 } 2186 bind(L); 2187 } 2188 2189 void MacroAssembler::fld_d(AddressLiteral src) { 2190 fld_d(as_Address(src)); 2191 } 2192 2193 void MacroAssembler::fld_s(AddressLiteral src) { 2194 fld_s(as_Address(src)); 2195 } 2196 2197 void MacroAssembler::fld_x(AddressLiteral src) { 2198 Assembler::fld_x(as_Address(src)); 2199 } 2200 2201 void MacroAssembler::fldcw(AddressLiteral src) { 2202 Assembler::fldcw(as_Address(src)); 2203 } 2204 2205 void MacroAssembler::pow_exp_core_encoding() { 2206 // kills rax, rcx, rdx 2207 subptr(rsp,sizeof(jdouble)); 2208 // computes 2^X. Stack: X ... 2209 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 2210 // keep it on the thread's stack to compute 2^int(X) later 2211 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 2212 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 2213 fld_s(0); // Stack: X X ... 2214 frndint(); // Stack: int(X) X ... 2215 fsuba(1); // Stack: int(X) X-int(X) ... 2216 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 2217 f2xm1(); // Stack: 2^(X-int(X))-1 ... 2218 fld1(); // Stack: 1 2^(X-int(X))-1 ... 2219 faddp(1); // Stack: 2^(X-int(X)) 2220 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 2221 // shift int(X)+1023 to exponent position. 2222 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 2223 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 2224 // values so detect them and set result to NaN. 2225 movl(rax,Address(rsp,0)); 2226 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 2227 addl(rax, 1023); 2228 movl(rdx,rax); 2229 shll(rax,20); 2230 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 2231 addl(rdx,1); 2232 // Check that 1 < int(X)+1023+1 < 2048 2233 // in 3 steps: 2234 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 2235 // 2- (int(X)+1023+1)&-2048 != 0 2236 // 3- (int(X)+1023+1)&-2048 != 1 2237 // Do 2- first because addl just updated the flags. 2238 cmov32(Assembler::equal,rax,rcx); 2239 cmpl(rdx,1); 2240 cmov32(Assembler::equal,rax,rcx); 2241 testl(rdx,rcx); 2242 cmov32(Assembler::notEqual,rax,rcx); 2243 movl(Address(rsp,4),rax); 2244 movl(Address(rsp,0),0); 2245 fmul_d(Address(rsp,0)); // Stack: 2^X ... 2246 addptr(rsp,sizeof(jdouble)); 2247 } 2248 2249 void MacroAssembler::increase_precision() { 2250 subptr(rsp, BytesPerWord); 2251 fnstcw(Address(rsp, 0)); 2252 movl(rax, Address(rsp, 0)); 2253 orl(rax, 0x300); 2254 push(rax); 2255 fldcw(Address(rsp, 0)); 2256 pop(rax); 2257 } 2258 2259 void MacroAssembler::restore_precision() { 2260 fldcw(Address(rsp, 0)); 2261 addptr(rsp, BytesPerWord); 2262 } 2263 2264 void MacroAssembler::fast_pow() { 2265 // computes X^Y = 2^(Y * log2(X)) 2266 // if fast computation is not possible, result is NaN. Requires 2267 // fallback from user of this macro. 2268 // increase precision for intermediate steps of the computation 2269 increase_precision(); 2270 fyl2x(); // Stack: (Y*log2(X)) ... 2271 pow_exp_core_encoding(); // Stack: exp(X) ... 2272 restore_precision(); 2273 } 2274 2275 void MacroAssembler::fast_exp() { 2276 // computes exp(X) = 2^(X * log2(e)) 2277 // if fast computation is not possible, result is NaN. Requires 2278 // fallback from user of this macro. 2279 // increase precision for intermediate steps of the computation 2280 increase_precision(); 2281 fldl2e(); // Stack: log2(e) X ... 2282 fmulp(1); // Stack: (X*log2(e)) ... 2283 pow_exp_core_encoding(); // Stack: exp(X) ... 2284 restore_precision(); 2285 } 2286 2287 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 2288 // kills rax, rcx, rdx 2289 // pow and exp needs 2 extra registers on the fpu stack. 2290 Label slow_case, done; 2291 Register tmp = noreg; 2292 if (!VM_Version::supports_cmov()) { 2293 // fcmp needs a temporary so preserve rdx, 2294 tmp = rdx; 2295 } 2296 Register tmp2 = rax; 2297 Register tmp3 = rcx; 2298 2299 if (is_exp) { 2300 // Stack: X 2301 fld_s(0); // duplicate argument for runtime call. Stack: X X 2302 fast_exp(); // Stack: exp(X) X 2303 fcmp(tmp, 0, false, false); // Stack: exp(X) X 2304 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 2305 jcc(Assembler::parity, slow_case); 2306 // get rid of duplicate argument. Stack: exp(X) 2307 if (num_fpu_regs_in_use > 0) { 2308 fxch(); 2309 fpop(); 2310 } else { 2311 ffree(1); 2312 } 2313 jmp(done); 2314 } else { 2315 // Stack: X Y 2316 Label x_negative, y_odd; 2317 2318 fldz(); // Stack: 0 X Y 2319 fcmp(tmp, 1, true, false); // Stack: X Y 2320 jcc(Assembler::above, x_negative); 2321 2322 // X >= 0 2323 2324 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 2325 fld_s(1); // Stack: X Y X Y 2326 fast_pow(); // Stack: X^Y X Y 2327 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 2328 // X^Y not equal to itself: X^Y is NaN go to slow case. 2329 jcc(Assembler::parity, slow_case); 2330 // get rid of duplicate arguments. Stack: X^Y 2331 if (num_fpu_regs_in_use > 0) { 2332 fxch(); fpop(); 2333 fxch(); fpop(); 2334 } else { 2335 ffree(2); 2336 ffree(1); 2337 } 2338 jmp(done); 2339 2340 // X <= 0 2341 bind(x_negative); 2342 2343 fld_s(1); // Stack: Y X Y 2344 frndint(); // Stack: int(Y) X Y 2345 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 2346 jcc(Assembler::notEqual, slow_case); 2347 2348 subptr(rsp, 8); 2349 2350 // For X^Y, when X < 0, Y has to be an integer and the final 2351 // result depends on whether it's odd or even. We just checked 2352 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 2353 // integer to test its parity. If int(Y) is huge and doesn't fit 2354 // in the 64 bit integer range, the integer indefinite value will 2355 // end up in the gp registers. Huge numbers are all even, the 2356 // integer indefinite number is even so it's fine. 2357 2358 #ifdef ASSERT 2359 // Let's check we don't end up with an integer indefinite number 2360 // when not expected. First test for huge numbers: check whether 2361 // int(Y)+1 == int(Y) which is true for very large numbers and 2362 // those are all even. A 64 bit integer is guaranteed to not 2363 // overflow for numbers where y+1 != y (when precision is set to 2364 // double precision). 2365 Label y_not_huge; 2366 2367 fld1(); // Stack: 1 int(Y) X Y 2368 fadd(1); // Stack: 1+int(Y) int(Y) X Y 2369 2370 #ifdef _LP64 2371 // trip to memory to force the precision down from double extended 2372 // precision 2373 fstp_d(Address(rsp, 0)); 2374 fld_d(Address(rsp, 0)); 2375 #endif 2376 2377 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 2378 #endif 2379 2380 // move int(Y) as 64 bit integer to thread's stack 2381 fistp_d(Address(rsp,0)); // Stack: X Y 2382 2383 #ifdef ASSERT 2384 jcc(Assembler::notEqual, y_not_huge); 2385 2386 // Y is huge so we know it's even. It may not fit in a 64 bit 2387 // integer and we don't want the debug code below to see the 2388 // integer indefinite value so overwrite int(Y) on the thread's 2389 // stack with 0. 2390 movl(Address(rsp, 0), 0); 2391 movl(Address(rsp, 4), 0); 2392 2393 bind(y_not_huge); 2394 #endif 2395 2396 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 2397 fld_s(1); // Stack: X Y X Y 2398 fabs(); // Stack: abs(X) Y X Y 2399 fast_pow(); // Stack: abs(X)^Y X Y 2400 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 2401 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 2402 2403 pop(tmp2); 2404 NOT_LP64(pop(tmp3)); 2405 jcc(Assembler::parity, slow_case); 2406 2407 #ifdef ASSERT 2408 // Check that int(Y) is not integer indefinite value (int 2409 // overflow). Shouldn't happen because for values that would 2410 // overflow, 1+int(Y)==Y which was tested earlier. 2411 #ifndef _LP64 2412 { 2413 Label integer; 2414 testl(tmp2, tmp2); 2415 jcc(Assembler::notZero, integer); 2416 cmpl(tmp3, 0x80000000); 2417 jcc(Assembler::notZero, integer); 2418 STOP("integer indefinite value shouldn't be seen here"); 2419 bind(integer); 2420 } 2421 #else 2422 { 2423 Label integer; 2424 mov(tmp3, tmp2); // preserve tmp2 for parity check below 2425 shlq(tmp3, 1); 2426 jcc(Assembler::carryClear, integer); 2427 jcc(Assembler::notZero, integer); 2428 STOP("integer indefinite value shouldn't be seen here"); 2429 bind(integer); 2430 } 2431 #endif 2432 #endif 2433 2434 // get rid of duplicate arguments. Stack: X^Y 2435 if (num_fpu_regs_in_use > 0) { 2436 fxch(); fpop(); 2437 fxch(); fpop(); 2438 } else { 2439 ffree(2); 2440 ffree(1); 2441 } 2442 2443 testl(tmp2, 1); 2444 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 2445 // X <= 0, Y even: X^Y = -abs(X)^Y 2446 2447 fchs(); // Stack: -abs(X)^Y Y 2448 jmp(done); 2449 } 2450 2451 // slow case: runtime call 2452 bind(slow_case); 2453 2454 fpop(); // pop incorrect result or int(Y) 2455 2456 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2457 is_exp ? 1 : 2, num_fpu_regs_in_use); 2458 2459 // Come here with result in F-TOS 2460 bind(done); 2461 } 2462 2463 void MacroAssembler::fpop() { 2464 ffree(); 2465 fincstp(); 2466 } 2467 2468 void MacroAssembler::fremr(Register tmp) { 2469 save_rax(tmp); 2470 { Label L; 2471 bind(L); 2472 fprem(); 2473 fwait(); fnstsw_ax(); 2474 #ifdef _LP64 2475 testl(rax, 0x400); 2476 jcc(Assembler::notEqual, L); 2477 #else 2478 sahf(); 2479 jcc(Assembler::parity, L); 2480 #endif // _LP64 2481 } 2482 restore_rax(tmp); 2483 // Result is in ST0. 2484 // Note: fxch & fpop to get rid of ST1 2485 // (otherwise FPU stack could overflow eventually) 2486 fxch(1); 2487 fpop(); 2488 } 2489 2490 2491 void MacroAssembler::incrementl(AddressLiteral dst) { 2492 if (reachable(dst)) { 2493 incrementl(as_Address(dst)); 2494 } else { 2495 lea(rscratch1, dst); 2496 incrementl(Address(rscratch1, 0)); 2497 } 2498 } 2499 2500 void MacroAssembler::incrementl(ArrayAddress dst) { 2501 incrementl(as_Address(dst)); 2502 } 2503 2504 void MacroAssembler::incrementl(Register reg, int value) { 2505 if (value == min_jint) {addl(reg, value) ; return; } 2506 if (value < 0) { decrementl(reg, -value); return; } 2507 if (value == 0) { ; return; } 2508 if (value == 1 && UseIncDec) { incl(reg) ; return; } 2509 /* else */ { addl(reg, value) ; return; } 2510 } 2511 2512 void MacroAssembler::incrementl(Address dst, int value) { 2513 if (value == min_jint) {addl(dst, value) ; return; } 2514 if (value < 0) { decrementl(dst, -value); return; } 2515 if (value == 0) { ; return; } 2516 if (value == 1 && UseIncDec) { incl(dst) ; return; } 2517 /* else */ { addl(dst, value) ; return; } 2518 } 2519 2520 void MacroAssembler::jump(AddressLiteral dst) { 2521 if (reachable(dst)) { 2522 jmp_literal(dst.target(), dst.rspec()); 2523 } else { 2524 lea(rscratch1, dst); 2525 jmp(rscratch1); 2526 } 2527 } 2528 2529 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 2530 if (reachable(dst)) { 2531 InstructionMark im(this); 2532 relocate(dst.reloc()); 2533 const int short_size = 2; 2534 const int long_size = 6; 2535 int offs = (intptr_t)dst.target() - ((intptr_t)pc()); 2536 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 2537 // 0111 tttn #8-bit disp 2538 emit_int8(0x70 | cc); 2539 emit_int8((offs - short_size) & 0xFF); 2540 } else { 2541 // 0000 1111 1000 tttn #32-bit disp 2542 emit_int8(0x0F); 2543 emit_int8((unsigned char)(0x80 | cc)); 2544 emit_int32(offs - long_size); 2545 } 2546 } else { 2547 #ifdef ASSERT 2548 warning("reversing conditional branch"); 2549 #endif /* ASSERT */ 2550 Label skip; 2551 jccb(reverse[cc], skip); 2552 lea(rscratch1, dst); 2553 Assembler::jmp(rscratch1); 2554 bind(skip); 2555 } 2556 } 2557 2558 void MacroAssembler::ldmxcsr(AddressLiteral src) { 2559 if (reachable(src)) { 2560 Assembler::ldmxcsr(as_Address(src)); 2561 } else { 2562 lea(rscratch1, src); 2563 Assembler::ldmxcsr(Address(rscratch1, 0)); 2564 } 2565 } 2566 2567 int MacroAssembler::load_signed_byte(Register dst, Address src) { 2568 int off; 2569 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2570 off = offset(); 2571 movsbl(dst, src); // movsxb 2572 } else { 2573 off = load_unsigned_byte(dst, src); 2574 shll(dst, 24); 2575 sarl(dst, 24); 2576 } 2577 return off; 2578 } 2579 2580 // Note: load_signed_short used to be called load_signed_word. 2581 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 2582 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 2583 // The term "word" in HotSpot means a 32- or 64-bit machine word. 2584 int MacroAssembler::load_signed_short(Register dst, Address src) { 2585 int off; 2586 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2587 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 2588 // version but this is what 64bit has always done. This seems to imply 2589 // that users are only using 32bits worth. 2590 off = offset(); 2591 movswl(dst, src); // movsxw 2592 } else { 2593 off = load_unsigned_short(dst, src); 2594 shll(dst, 16); 2595 sarl(dst, 16); 2596 } 2597 return off; 2598 } 2599 2600 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 2601 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 2602 // and "3.9 Partial Register Penalties", p. 22). 2603 int off; 2604 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 2605 off = offset(); 2606 movzbl(dst, src); // movzxb 2607 } else { 2608 xorl(dst, dst); 2609 off = offset(); 2610 movb(dst, src); 2611 } 2612 return off; 2613 } 2614 2615 // Note: load_unsigned_short used to be called load_unsigned_word. 2616 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 2617 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 2618 // and "3.9 Partial Register Penalties", p. 22). 2619 int off; 2620 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 2621 off = offset(); 2622 movzwl(dst, src); // movzxw 2623 } else { 2624 xorl(dst, dst); 2625 off = offset(); 2626 movw(dst, src); 2627 } 2628 return off; 2629 } 2630 2631 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 2632 switch (size_in_bytes) { 2633 #ifndef _LP64 2634 case 8: 2635 assert(dst2 != noreg, "second dest register required"); 2636 movl(dst, src); 2637 movl(dst2, src.plus_disp(BytesPerInt)); 2638 break; 2639 #else 2640 case 8: movq(dst, src); break; 2641 #endif 2642 case 4: movl(dst, src); break; 2643 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 2644 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 2645 default: ShouldNotReachHere(); 2646 } 2647 } 2648 2649 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 2650 switch (size_in_bytes) { 2651 #ifndef _LP64 2652 case 8: 2653 assert(src2 != noreg, "second source register required"); 2654 movl(dst, src); 2655 movl(dst.plus_disp(BytesPerInt), src2); 2656 break; 2657 #else 2658 case 8: movq(dst, src); break; 2659 #endif 2660 case 4: movl(dst, src); break; 2661 case 2: movw(dst, src); break; 2662 case 1: movb(dst, src); break; 2663 default: ShouldNotReachHere(); 2664 } 2665 } 2666 2667 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 2668 if (reachable(dst)) { 2669 movl(as_Address(dst), src); 2670 } else { 2671 lea(rscratch1, dst); 2672 movl(Address(rscratch1, 0), src); 2673 } 2674 } 2675 2676 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 2677 if (reachable(src)) { 2678 movl(dst, as_Address(src)); 2679 } else { 2680 lea(rscratch1, src); 2681 movl(dst, Address(rscratch1, 0)); 2682 } 2683 } 2684 2685 // C++ bool manipulation 2686 2687 void MacroAssembler::movbool(Register dst, Address src) { 2688 if(sizeof(bool) == 1) 2689 movb(dst, src); 2690 else if(sizeof(bool) == 2) 2691 movw(dst, src); 2692 else if(sizeof(bool) == 4) 2693 movl(dst, src); 2694 else 2695 // unsupported 2696 ShouldNotReachHere(); 2697 } 2698 2699 void MacroAssembler::movbool(Address dst, bool boolconst) { 2700 if(sizeof(bool) == 1) 2701 movb(dst, (int) boolconst); 2702 else if(sizeof(bool) == 2) 2703 movw(dst, (int) boolconst); 2704 else if(sizeof(bool) == 4) 2705 movl(dst, (int) boolconst); 2706 else 2707 // unsupported 2708 ShouldNotReachHere(); 2709 } 2710 2711 void MacroAssembler::movbool(Address dst, Register src) { 2712 if(sizeof(bool) == 1) 2713 movb(dst, src); 2714 else if(sizeof(bool) == 2) 2715 movw(dst, src); 2716 else if(sizeof(bool) == 4) 2717 movl(dst, src); 2718 else 2719 // unsupported 2720 ShouldNotReachHere(); 2721 } 2722 2723 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 2724 movb(as_Address(dst), src); 2725 } 2726 2727 void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { 2728 if (reachable(src)) { 2729 movdl(dst, as_Address(src)); 2730 } else { 2731 lea(rscratch1, src); 2732 movdl(dst, Address(rscratch1, 0)); 2733 } 2734 } 2735 2736 void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { 2737 if (reachable(src)) { 2738 movq(dst, as_Address(src)); 2739 } else { 2740 lea(rscratch1, src); 2741 movq(dst, Address(rscratch1, 0)); 2742 } 2743 } 2744 2745 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 2746 if (reachable(src)) { 2747 if (UseXmmLoadAndClearUpper) { 2748 movsd (dst, as_Address(src)); 2749 } else { 2750 movlpd(dst, as_Address(src)); 2751 } 2752 } else { 2753 lea(rscratch1, src); 2754 if (UseXmmLoadAndClearUpper) { 2755 movsd (dst, Address(rscratch1, 0)); 2756 } else { 2757 movlpd(dst, Address(rscratch1, 0)); 2758 } 2759 } 2760 } 2761 2762 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 2763 if (reachable(src)) { 2764 movss(dst, as_Address(src)); 2765 } else { 2766 lea(rscratch1, src); 2767 movss(dst, Address(rscratch1, 0)); 2768 } 2769 } 2770 2771 void MacroAssembler::movptr(Register dst, Register src) { 2772 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2773 } 2774 2775 void MacroAssembler::movptr(Register dst, Address src) { 2776 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2777 } 2778 2779 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 2780 void MacroAssembler::movptr(Register dst, intptr_t src) { 2781 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 2782 } 2783 2784 void MacroAssembler::movptr(Address dst, Register src) { 2785 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2786 } 2787 2788 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { 2789 if (reachable(src)) { 2790 Assembler::movdqu(dst, as_Address(src)); 2791 } else { 2792 lea(rscratch1, src); 2793 Assembler::movdqu(dst, Address(rscratch1, 0)); 2794 } 2795 } 2796 2797 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) { 2798 if (reachable(src)) { 2799 Assembler::movdqa(dst, as_Address(src)); 2800 } else { 2801 lea(rscratch1, src); 2802 Assembler::movdqa(dst, Address(rscratch1, 0)); 2803 } 2804 } 2805 2806 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 2807 if (reachable(src)) { 2808 Assembler::movsd(dst, as_Address(src)); 2809 } else { 2810 lea(rscratch1, src); 2811 Assembler::movsd(dst, Address(rscratch1, 0)); 2812 } 2813 } 2814 2815 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 2816 if (reachable(src)) { 2817 Assembler::movss(dst, as_Address(src)); 2818 } else { 2819 lea(rscratch1, src); 2820 Assembler::movss(dst, Address(rscratch1, 0)); 2821 } 2822 } 2823 2824 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 2825 if (reachable(src)) { 2826 Assembler::mulsd(dst, as_Address(src)); 2827 } else { 2828 lea(rscratch1, src); 2829 Assembler::mulsd(dst, Address(rscratch1, 0)); 2830 } 2831 } 2832 2833 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 2834 if (reachable(src)) { 2835 Assembler::mulss(dst, as_Address(src)); 2836 } else { 2837 lea(rscratch1, src); 2838 Assembler::mulss(dst, Address(rscratch1, 0)); 2839 } 2840 } 2841 2842 void MacroAssembler::null_check(Register reg, int offset) { 2843 if (needs_explicit_null_check(offset)) { 2844 // provoke OS NULL exception if reg = NULL by 2845 // accessing M[reg] w/o changing any (non-CC) registers 2846 // NOTE: cmpl is plenty here to provoke a segv 2847 cmpptr(rax, Address(reg, 0)); 2848 // Note: should probably use testl(rax, Address(reg, 0)); 2849 // may be shorter code (however, this version of 2850 // testl needs to be implemented first) 2851 } else { 2852 // nothing to do, (later) access of M[reg + offset] 2853 // will provoke OS NULL exception if reg = NULL 2854 } 2855 } 2856 2857 void MacroAssembler::os_breakpoint() { 2858 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 2859 // (e.g., MSVC can't call ps() otherwise) 2860 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 2861 } 2862 2863 void MacroAssembler::pop_CPU_state() { 2864 pop_FPU_state(); 2865 pop_IU_state(); 2866 } 2867 2868 void MacroAssembler::pop_FPU_state() { 2869 NOT_LP64(frstor(Address(rsp, 0));) 2870 LP64_ONLY(fxrstor(Address(rsp, 0));) 2871 addptr(rsp, FPUStateSizeInWords * wordSize); 2872 } 2873 2874 void MacroAssembler::pop_IU_state() { 2875 popa(); 2876 LP64_ONLY(addq(rsp, 8)); 2877 popf(); 2878 } 2879 2880 // Save Integer and Float state 2881 // Warning: Stack must be 16 byte aligned (64bit) 2882 void MacroAssembler::push_CPU_state() { 2883 push_IU_state(); 2884 push_FPU_state(); 2885 } 2886 2887 void MacroAssembler::push_FPU_state() { 2888 subptr(rsp, FPUStateSizeInWords * wordSize); 2889 #ifndef _LP64 2890 fnsave(Address(rsp, 0)); 2891 fwait(); 2892 #else 2893 fxsave(Address(rsp, 0)); 2894 #endif // LP64 2895 } 2896 2897 void MacroAssembler::push_IU_state() { 2898 // Push flags first because pusha kills them 2899 pushf(); 2900 // Make sure rsp stays 16-byte aligned 2901 LP64_ONLY(subq(rsp, 8)); 2902 pusha(); 2903 } 2904 2905 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 2906 // determine java_thread register 2907 if (!java_thread->is_valid()) { 2908 java_thread = rdi; 2909 get_thread(java_thread); 2910 } 2911 // we must set sp to zero to clear frame 2912 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 2913 if (clear_fp) { 2914 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 2915 } 2916 2917 if (clear_pc) 2918 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 2919 2920 } 2921 2922 void MacroAssembler::restore_rax(Register tmp) { 2923 if (tmp == noreg) pop(rax); 2924 else if (tmp != rax) mov(rax, tmp); 2925 } 2926 2927 void MacroAssembler::round_to(Register reg, int modulus) { 2928 addptr(reg, modulus - 1); 2929 andptr(reg, -modulus); 2930 } 2931 2932 void MacroAssembler::save_rax(Register tmp) { 2933 if (tmp == noreg) push(rax); 2934 else if (tmp != rax) mov(tmp, rax); 2935 } 2936 2937 // Write serialization page so VM thread can do a pseudo remote membar. 2938 // We use the current thread pointer to calculate a thread specific 2939 // offset to write to within the page. This minimizes bus traffic 2940 // due to cache line collision. 2941 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 2942 movl(tmp, thread); 2943 shrl(tmp, os::get_serialize_page_shift_count()); 2944 andl(tmp, (os::vm_page_size() - sizeof(int))); 2945 2946 Address index(noreg, tmp, Address::times_1); 2947 ExternalAddress page(os::get_memory_serialize_page()); 2948 2949 // Size of store must match masking code above 2950 movl(as_Address(ArrayAddress(page, index)), tmp); 2951 } 2952 2953 // Calls to C land 2954 // 2955 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 2956 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 2957 // has to be reset to 0. This is required to allow proper stack traversal. 2958 void MacroAssembler::set_last_Java_frame(Register java_thread, 2959 Register last_java_sp, 2960 Register last_java_fp, 2961 address last_java_pc) { 2962 // determine java_thread register 2963 if (!java_thread->is_valid()) { 2964 java_thread = rdi; 2965 get_thread(java_thread); 2966 } 2967 // determine last_java_sp register 2968 if (!last_java_sp->is_valid()) { 2969 last_java_sp = rsp; 2970 } 2971 2972 // last_java_fp is optional 2973 2974 if (last_java_fp->is_valid()) { 2975 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 2976 } 2977 2978 // last_java_pc is optional 2979 2980 if (last_java_pc != NULL) { 2981 lea(Address(java_thread, 2982 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 2983 InternalAddress(last_java_pc)); 2984 2985 } 2986 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 2987 } 2988 2989 void MacroAssembler::shlptr(Register dst, int imm8) { 2990 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 2991 } 2992 2993 void MacroAssembler::shrptr(Register dst, int imm8) { 2994 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 2995 } 2996 2997 void MacroAssembler::sign_extend_byte(Register reg) { 2998 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 2999 movsbl(reg, reg); // movsxb 3000 } else { 3001 shll(reg, 24); 3002 sarl(reg, 24); 3003 } 3004 } 3005 3006 void MacroAssembler::sign_extend_short(Register reg) { 3007 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 3008 movswl(reg, reg); // movsxw 3009 } else { 3010 shll(reg, 16); 3011 sarl(reg, 16); 3012 } 3013 } 3014 3015 void MacroAssembler::testl(Register dst, AddressLiteral src) { 3016 assert(reachable(src), "Address should be reachable"); 3017 testl(dst, as_Address(src)); 3018 } 3019 3020 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 3021 if (reachable(src)) { 3022 Assembler::sqrtsd(dst, as_Address(src)); 3023 } else { 3024 lea(rscratch1, src); 3025 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 3026 } 3027 } 3028 3029 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 3030 if (reachable(src)) { 3031 Assembler::sqrtss(dst, as_Address(src)); 3032 } else { 3033 lea(rscratch1, src); 3034 Assembler::sqrtss(dst, Address(rscratch1, 0)); 3035 } 3036 } 3037 3038 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 3039 if (reachable(src)) { 3040 Assembler::subsd(dst, as_Address(src)); 3041 } else { 3042 lea(rscratch1, src); 3043 Assembler::subsd(dst, Address(rscratch1, 0)); 3044 } 3045 } 3046 3047 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 3048 if (reachable(src)) { 3049 Assembler::subss(dst, as_Address(src)); 3050 } else { 3051 lea(rscratch1, src); 3052 Assembler::subss(dst, Address(rscratch1, 0)); 3053 } 3054 } 3055 3056 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 3057 if (reachable(src)) { 3058 Assembler::ucomisd(dst, as_Address(src)); 3059 } else { 3060 lea(rscratch1, src); 3061 Assembler::ucomisd(dst, Address(rscratch1, 0)); 3062 } 3063 } 3064 3065 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 3066 if (reachable(src)) { 3067 Assembler::ucomiss(dst, as_Address(src)); 3068 } else { 3069 lea(rscratch1, src); 3070 Assembler::ucomiss(dst, Address(rscratch1, 0)); 3071 } 3072 } 3073 3074 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 3075 // Used in sign-bit flipping with aligned address. 3076 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 3077 if (reachable(src)) { 3078 Assembler::xorpd(dst, as_Address(src)); 3079 } else { 3080 lea(rscratch1, src); 3081 Assembler::xorpd(dst, Address(rscratch1, 0)); 3082 } 3083 } 3084 3085 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 3086 // Used in sign-bit flipping with aligned address. 3087 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 3088 if (reachable(src)) { 3089 Assembler::xorps(dst, as_Address(src)); 3090 } else { 3091 lea(rscratch1, src); 3092 Assembler::xorps(dst, Address(rscratch1, 0)); 3093 } 3094 } 3095 3096 void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { 3097 // Used in sign-bit flipping with aligned address. 3098 bool aligned_adr = (((intptr_t)src.target() & 15) == 0); 3099 assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes"); 3100 if (reachable(src)) { 3101 Assembler::pshufb(dst, as_Address(src)); 3102 } else { 3103 lea(rscratch1, src); 3104 Assembler::pshufb(dst, Address(rscratch1, 0)); 3105 } 3106 } 3107 3108 // AVX 3-operands instructions 3109 3110 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3111 if (reachable(src)) { 3112 vaddsd(dst, nds, as_Address(src)); 3113 } else { 3114 lea(rscratch1, src); 3115 vaddsd(dst, nds, Address(rscratch1, 0)); 3116 } 3117 } 3118 3119 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3120 if (reachable(src)) { 3121 vaddss(dst, nds, as_Address(src)); 3122 } else { 3123 lea(rscratch1, src); 3124 vaddss(dst, nds, Address(rscratch1, 0)); 3125 } 3126 } 3127 3128 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3129 if (reachable(src)) { 3130 vandpd(dst, nds, as_Address(src), vector256); 3131 } else { 3132 lea(rscratch1, src); 3133 vandpd(dst, nds, Address(rscratch1, 0), vector256); 3134 } 3135 } 3136 3137 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3138 if (reachable(src)) { 3139 vandps(dst, nds, as_Address(src), vector256); 3140 } else { 3141 lea(rscratch1, src); 3142 vandps(dst, nds, Address(rscratch1, 0), vector256); 3143 } 3144 } 3145 3146 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3147 if (reachable(src)) { 3148 vdivsd(dst, nds, as_Address(src)); 3149 } else { 3150 lea(rscratch1, src); 3151 vdivsd(dst, nds, Address(rscratch1, 0)); 3152 } 3153 } 3154 3155 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3156 if (reachable(src)) { 3157 vdivss(dst, nds, as_Address(src)); 3158 } else { 3159 lea(rscratch1, src); 3160 vdivss(dst, nds, Address(rscratch1, 0)); 3161 } 3162 } 3163 3164 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3165 if (reachable(src)) { 3166 vmulsd(dst, nds, as_Address(src)); 3167 } else { 3168 lea(rscratch1, src); 3169 vmulsd(dst, nds, Address(rscratch1, 0)); 3170 } 3171 } 3172 3173 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3174 if (reachable(src)) { 3175 vmulss(dst, nds, as_Address(src)); 3176 } else { 3177 lea(rscratch1, src); 3178 vmulss(dst, nds, Address(rscratch1, 0)); 3179 } 3180 } 3181 3182 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3183 if (reachable(src)) { 3184 vsubsd(dst, nds, as_Address(src)); 3185 } else { 3186 lea(rscratch1, src); 3187 vsubsd(dst, nds, Address(rscratch1, 0)); 3188 } 3189 } 3190 3191 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3192 if (reachable(src)) { 3193 vsubss(dst, nds, as_Address(src)); 3194 } else { 3195 lea(rscratch1, src); 3196 vsubss(dst, nds, Address(rscratch1, 0)); 3197 } 3198 } 3199 3200 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3201 if (reachable(src)) { 3202 vxorpd(dst, nds, as_Address(src), vector256); 3203 } else { 3204 lea(rscratch1, src); 3205 vxorpd(dst, nds, Address(rscratch1, 0), vector256); 3206 } 3207 } 3208 3209 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3210 if (reachable(src)) { 3211 vxorps(dst, nds, as_Address(src), vector256); 3212 } else { 3213 lea(rscratch1, src); 3214 vxorps(dst, nds, Address(rscratch1, 0), vector256); 3215 } 3216 } 3217 3218 3219 ////////////////////////////////////////////////////////////////////////////////// 3220 #if INCLUDE_ALL_GCS 3221 3222 void MacroAssembler::g1_write_barrier_pre(Register obj, 3223 Register pre_val, 3224 Register thread, 3225 Register tmp, 3226 bool tosca_live, 3227 bool expand_call) { 3228 3229 // If expand_call is true then we expand the call_VM_leaf macro 3230 // directly to skip generating the check by 3231 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 3232 3233 #ifdef _LP64 3234 assert(thread == r15_thread, "must be"); 3235 #endif // _LP64 3236 3237 Label done; 3238 Label runtime; 3239 3240 assert(pre_val != noreg, "check this code"); 3241 3242 if (obj != noreg) { 3243 assert_different_registers(obj, pre_val, tmp); 3244 assert(pre_val != rax, "check this code"); 3245 } 3246 3247 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3248 PtrQueue::byte_offset_of_active())); 3249 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3250 PtrQueue::byte_offset_of_index())); 3251 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3252 PtrQueue::byte_offset_of_buf())); 3253 3254 3255 // Is marking active? 3256 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 3257 cmpl(in_progress, 0); 3258 } else { 3259 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 3260 cmpb(in_progress, 0); 3261 } 3262 jcc(Assembler::equal, done); 3263 3264 // Do we need to load the previous value? 3265 if (obj != noreg) { 3266 load_heap_oop(pre_val, Address(obj, 0)); 3267 } 3268 3269 // Is the previous value null? 3270 cmpptr(pre_val, (int32_t) NULL_WORD); 3271 jcc(Assembler::equal, done); 3272 3273 // Can we store original value in the thread's buffer? 3274 // Is index == 0? 3275 // (The index field is typed as size_t.) 3276 3277 movptr(tmp, index); // tmp := *index_adr 3278 cmpptr(tmp, 0); // tmp == 0? 3279 jcc(Assembler::equal, runtime); // If yes, goto runtime 3280 3281 subptr(tmp, wordSize); // tmp := tmp - wordSize 3282 movptr(index, tmp); // *index_adr := tmp 3283 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 3284 3285 // Record the previous value 3286 movptr(Address(tmp, 0), pre_val); 3287 jmp(done); 3288 3289 bind(runtime); 3290 // save the live input values 3291 if(tosca_live) push(rax); 3292 3293 if (obj != noreg && obj != rax) 3294 push(obj); 3295 3296 if (pre_val != rax) 3297 push(pre_val); 3298 3299 // Calling the runtime using the regular call_VM_leaf mechanism generates 3300 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 3301 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 3302 // 3303 // If we care generating the pre-barrier without a frame (e.g. in the 3304 // intrinsified Reference.get() routine) then ebp might be pointing to 3305 // the caller frame and so this check will most likely fail at runtime. 3306 // 3307 // Expanding the call directly bypasses the generation of the check. 3308 // So when we do not have have a full interpreter frame on the stack 3309 // expand_call should be passed true. 3310 3311 NOT_LP64( push(thread); ) 3312 3313 if (expand_call) { 3314 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 3315 pass_arg1(this, thread); 3316 pass_arg0(this, pre_val); 3317 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 3318 } else { 3319 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 3320 } 3321 3322 NOT_LP64( pop(thread); ) 3323 3324 // save the live input values 3325 if (pre_val != rax) 3326 pop(pre_val); 3327 3328 if (obj != noreg && obj != rax) 3329 pop(obj); 3330 3331 if(tosca_live) pop(rax); 3332 3333 bind(done); 3334 } 3335 3336 void MacroAssembler::g1_write_barrier_post(Register store_addr, 3337 Register new_val, 3338 Register thread, 3339 Register tmp, 3340 Register tmp2) { 3341 #ifdef _LP64 3342 assert(thread == r15_thread, "must be"); 3343 #endif // _LP64 3344 3345 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3346 PtrQueue::byte_offset_of_index())); 3347 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3348 PtrQueue::byte_offset_of_buf())); 3349 3350 BarrierSet* bs = Universe::heap()->barrier_set(); 3351 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 3352 Label done; 3353 Label runtime; 3354 3355 // Does store cross heap regions? 3356 3357 movptr(tmp, store_addr); 3358 xorptr(tmp, new_val); 3359 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 3360 jcc(Assembler::equal, done); 3361 3362 // crosses regions, storing NULL? 3363 3364 cmpptr(new_val, (int32_t) NULL_WORD); 3365 jcc(Assembler::equal, done); 3366 3367 // storing region crossing non-NULL, is card already dirty? 3368 3369 ExternalAddress cardtable((address) ct->byte_map_base); 3370 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3371 #ifdef _LP64 3372 const Register card_addr = tmp; 3373 3374 movq(card_addr, store_addr); 3375 shrq(card_addr, CardTableModRefBS::card_shift); 3376 3377 lea(tmp2, cardtable); 3378 3379 // get the address of the card 3380 addq(card_addr, tmp2); 3381 #else 3382 const Register card_index = tmp; 3383 3384 movl(card_index, store_addr); 3385 shrl(card_index, CardTableModRefBS::card_shift); 3386 3387 Address index(noreg, card_index, Address::times_1); 3388 const Register card_addr = tmp; 3389 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 3390 #endif 3391 cmpb(Address(card_addr, 0), 0); 3392 jcc(Assembler::equal, done); 3393 3394 // storing a region crossing, non-NULL oop, card is clean. 3395 // dirty card and log. 3396 3397 movb(Address(card_addr, 0), 0); 3398 3399 cmpl(queue_index, 0); 3400 jcc(Assembler::equal, runtime); 3401 subl(queue_index, wordSize); 3402 movptr(tmp2, buffer); 3403 #ifdef _LP64 3404 movslq(rscratch1, queue_index); 3405 addq(tmp2, rscratch1); 3406 movq(Address(tmp2, 0), card_addr); 3407 #else 3408 addl(tmp2, queue_index); 3409 movl(Address(tmp2, 0), card_index); 3410 #endif 3411 jmp(done); 3412 3413 bind(runtime); 3414 // save the live input values 3415 push(store_addr); 3416 push(new_val); 3417 #ifdef _LP64 3418 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 3419 #else 3420 push(thread); 3421 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 3422 pop(thread); 3423 #endif 3424 pop(new_val); 3425 pop(store_addr); 3426 3427 bind(done); 3428 } 3429 3430 #endif // INCLUDE_ALL_GCS 3431 ////////////////////////////////////////////////////////////////////////////////// 3432 3433 3434 void MacroAssembler::store_check(Register obj) { 3435 // Does a store check for the oop in register obj. The content of 3436 // register obj is destroyed afterwards. 3437 store_check_part_1(obj); 3438 store_check_part_2(obj); 3439 } 3440 3441 void MacroAssembler::store_check(Register obj, Address dst) { 3442 store_check(obj); 3443 } 3444 3445 3446 // split the store check operation so that other instructions can be scheduled inbetween 3447 void MacroAssembler::store_check_part_1(Register obj) { 3448 BarrierSet* bs = Universe::heap()->barrier_set(); 3449 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 3450 shrptr(obj, CardTableModRefBS::card_shift); 3451 } 3452 3453 void MacroAssembler::store_check_part_2(Register obj) { 3454 BarrierSet* bs = Universe::heap()->barrier_set(); 3455 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 3456 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 3457 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3458 3459 // The calculation for byte_map_base is as follows: 3460 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 3461 // So this essentially converts an address to a displacement and 3462 // it will never need to be relocated. On 64bit however the value may be too 3463 // large for a 32bit displacement 3464 3465 intptr_t disp = (intptr_t) ct->byte_map_base; 3466 if (is_simm32(disp)) { 3467 Address cardtable(noreg, obj, Address::times_1, disp); 3468 movb(cardtable, 0); 3469 } else { 3470 // By doing it as an ExternalAddress disp could be converted to a rip-relative 3471 // displacement and done in a single instruction given favorable mapping and 3472 // a smarter version of as_Address. Worst case it is two instructions which 3473 // is no worse off then loading disp into a register and doing as a simple 3474 // Address() as above. 3475 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 3476 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 3477 // in some cases we'll get a single instruction version. 3478 3479 ExternalAddress cardtable((address)disp); 3480 Address index(noreg, obj, Address::times_1); 3481 movb(as_Address(ArrayAddress(cardtable, index)), 0); 3482 } 3483 } 3484 3485 void MacroAssembler::subptr(Register dst, int32_t imm32) { 3486 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 3487 } 3488 3489 // Force generation of a 4 byte immediate value even if it fits into 8bit 3490 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 3491 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 3492 } 3493 3494 void MacroAssembler::subptr(Register dst, Register src) { 3495 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 3496 } 3497 3498 // C++ bool manipulation 3499 void MacroAssembler::testbool(Register dst) { 3500 if(sizeof(bool) == 1) 3501 testb(dst, 0xff); 3502 else if(sizeof(bool) == 2) { 3503 // testw implementation needed for two byte bools 3504 ShouldNotReachHere(); 3505 } else if(sizeof(bool) == 4) 3506 testl(dst, dst); 3507 else 3508 // unsupported 3509 ShouldNotReachHere(); 3510 } 3511 3512 void MacroAssembler::testptr(Register dst, Register src) { 3513 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 3514 } 3515 3516 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 3517 void MacroAssembler::tlab_allocate(Register obj, 3518 Register var_size_in_bytes, 3519 int con_size_in_bytes, 3520 Register t1, 3521 Register t2, 3522 Label& slow_case) { 3523 assert_different_registers(obj, t1, t2); 3524 assert_different_registers(obj, var_size_in_bytes, t1); 3525 Register end = t2; 3526 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 3527 3528 verify_tlab(); 3529 3530 NOT_LP64(get_thread(thread)); 3531 3532 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 3533 if (var_size_in_bytes == noreg) { 3534 lea(end, Address(obj, con_size_in_bytes)); 3535 } else { 3536 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 3537 } 3538 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 3539 jcc(Assembler::above, slow_case); 3540 3541 // update the tlab top pointer 3542 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 3543 3544 // recover var_size_in_bytes if necessary 3545 if (var_size_in_bytes == end) { 3546 subptr(var_size_in_bytes, obj); 3547 } 3548 verify_tlab(); 3549 } 3550 3551 // Preserves rbx, and rdx. 3552 Register MacroAssembler::tlab_refill(Label& retry, 3553 Label& try_eden, 3554 Label& slow_case) { 3555 Register top = rax; 3556 Register t1 = rcx; 3557 Register t2 = rsi; 3558 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 3559 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 3560 Label do_refill, discard_tlab; 3561 3562 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 3563 // No allocation in the shared eden. 3564 jmp(slow_case); 3565 } 3566 3567 NOT_LP64(get_thread(thread_reg)); 3568 3569 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 3570 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 3571 3572 // calculate amount of free space 3573 subptr(t1, top); 3574 shrptr(t1, LogHeapWordSize); 3575 3576 // Retain tlab and allocate object in shared space if 3577 // the amount free in the tlab is too large to discard. 3578 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 3579 jcc(Assembler::lessEqual, discard_tlab); 3580 3581 // Retain 3582 // %%% yuck as movptr... 3583 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 3584 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 3585 if (TLABStats) { 3586 // increment number of slow_allocations 3587 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 3588 } 3589 jmp(try_eden); 3590 3591 bind(discard_tlab); 3592 if (TLABStats) { 3593 // increment number of refills 3594 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 3595 // accumulate wastage -- t1 is amount free in tlab 3596 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 3597 } 3598 3599 // if tlab is currently allocated (top or end != null) then 3600 // fill [top, end + alignment_reserve) with array object 3601 testptr(top, top); 3602 jcc(Assembler::zero, do_refill); 3603 3604 // set up the mark word 3605 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 3606 // set the length to the remaining space 3607 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 3608 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 3609 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 3610 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 3611 // set klass to intArrayKlass 3612 // dubious reloc why not an oop reloc? 3613 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 3614 // store klass last. concurrent gcs assumes klass length is valid if 3615 // klass field is not null. 3616 store_klass(top, t1); 3617 3618 movptr(t1, top); 3619 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 3620 incr_allocated_bytes(thread_reg, t1, 0); 3621 3622 // refill the tlab with an eden allocation 3623 bind(do_refill); 3624 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 3625 shlptr(t1, LogHeapWordSize); 3626 // allocate new tlab, address returned in top 3627 eden_allocate(top, t1, 0, t2, slow_case); 3628 3629 // Check that t1 was preserved in eden_allocate. 3630 #ifdef ASSERT 3631 if (UseTLAB) { 3632 Label ok; 3633 Register tsize = rsi; 3634 assert_different_registers(tsize, thread_reg, t1); 3635 push(tsize); 3636 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 3637 shlptr(tsize, LogHeapWordSize); 3638 cmpptr(t1, tsize); 3639 jcc(Assembler::equal, ok); 3640 STOP("assert(t1 != tlab size)"); 3641 should_not_reach_here(); 3642 3643 bind(ok); 3644 pop(tsize); 3645 } 3646 #endif 3647 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 3648 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 3649 addptr(top, t1); 3650 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 3651 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 3652 verify_tlab(); 3653 jmp(retry); 3654 3655 return thread_reg; // for use by caller 3656 } 3657 3658 void MacroAssembler::incr_allocated_bytes(Register thread, 3659 Register var_size_in_bytes, 3660 int con_size_in_bytes, 3661 Register t1) { 3662 if (!thread->is_valid()) { 3663 #ifdef _LP64 3664 thread = r15_thread; 3665 #else 3666 assert(t1->is_valid(), "need temp reg"); 3667 thread = t1; 3668 get_thread(thread); 3669 #endif 3670 } 3671 3672 #ifdef _LP64 3673 if (var_size_in_bytes->is_valid()) { 3674 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 3675 } else { 3676 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 3677 } 3678 #else 3679 if (var_size_in_bytes->is_valid()) { 3680 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 3681 } else { 3682 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 3683 } 3684 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 3685 #endif 3686 } 3687 3688 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 3689 pusha(); 3690 3691 // if we are coming from c1, xmm registers may be live 3692 int off = 0; 3693 if (UseSSE == 1) { 3694 subptr(rsp, sizeof(jdouble)*8); 3695 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 3696 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 3697 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 3698 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 3699 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 3700 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 3701 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 3702 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 3703 } else if (UseSSE >= 2) { 3704 #ifdef COMPILER2 3705 if (MaxVectorSize > 16) { 3706 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 3707 // Save upper half of YMM registes 3708 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3709 vextractf128h(Address(rsp, 0),xmm0); 3710 vextractf128h(Address(rsp, 16),xmm1); 3711 vextractf128h(Address(rsp, 32),xmm2); 3712 vextractf128h(Address(rsp, 48),xmm3); 3713 vextractf128h(Address(rsp, 64),xmm4); 3714 vextractf128h(Address(rsp, 80),xmm5); 3715 vextractf128h(Address(rsp, 96),xmm6); 3716 vextractf128h(Address(rsp,112),xmm7); 3717 #ifdef _LP64 3718 vextractf128h(Address(rsp,128),xmm8); 3719 vextractf128h(Address(rsp,144),xmm9); 3720 vextractf128h(Address(rsp,160),xmm10); 3721 vextractf128h(Address(rsp,176),xmm11); 3722 vextractf128h(Address(rsp,192),xmm12); 3723 vextractf128h(Address(rsp,208),xmm13); 3724 vextractf128h(Address(rsp,224),xmm14); 3725 vextractf128h(Address(rsp,240),xmm15); 3726 #endif 3727 } 3728 #endif 3729 // Save whole 128bit (16 bytes) XMM regiters 3730 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3731 movdqu(Address(rsp,off++*16),xmm0); 3732 movdqu(Address(rsp,off++*16),xmm1); 3733 movdqu(Address(rsp,off++*16),xmm2); 3734 movdqu(Address(rsp,off++*16),xmm3); 3735 movdqu(Address(rsp,off++*16),xmm4); 3736 movdqu(Address(rsp,off++*16),xmm5); 3737 movdqu(Address(rsp,off++*16),xmm6); 3738 movdqu(Address(rsp,off++*16),xmm7); 3739 #ifdef _LP64 3740 movdqu(Address(rsp,off++*16),xmm8); 3741 movdqu(Address(rsp,off++*16),xmm9); 3742 movdqu(Address(rsp,off++*16),xmm10); 3743 movdqu(Address(rsp,off++*16),xmm11); 3744 movdqu(Address(rsp,off++*16),xmm12); 3745 movdqu(Address(rsp,off++*16),xmm13); 3746 movdqu(Address(rsp,off++*16),xmm14); 3747 movdqu(Address(rsp,off++*16),xmm15); 3748 #endif 3749 } 3750 3751 // Preserve registers across runtime call 3752 int incoming_argument_and_return_value_offset = -1; 3753 if (num_fpu_regs_in_use > 1) { 3754 // Must preserve all other FPU regs (could alternatively convert 3755 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 3756 // FPU state, but can not trust C compiler) 3757 NEEDS_CLEANUP; 3758 // NOTE that in this case we also push the incoming argument(s) to 3759 // the stack and restore it later; we also use this stack slot to 3760 // hold the return value from dsin, dcos etc. 3761 for (int i = 0; i < num_fpu_regs_in_use; i++) { 3762 subptr(rsp, sizeof(jdouble)); 3763 fstp_d(Address(rsp, 0)); 3764 } 3765 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 3766 for (int i = nb_args-1; i >= 0; i--) { 3767 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 3768 } 3769 } 3770 3771 subptr(rsp, nb_args*sizeof(jdouble)); 3772 for (int i = 0; i < nb_args; i++) { 3773 fstp_d(Address(rsp, i*sizeof(jdouble))); 3774 } 3775 3776 #ifdef _LP64 3777 if (nb_args > 0) { 3778 movdbl(xmm0, Address(rsp, 0)); 3779 } 3780 if (nb_args > 1) { 3781 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 3782 } 3783 assert(nb_args <= 2, "unsupported number of args"); 3784 #endif // _LP64 3785 3786 // NOTE: we must not use call_VM_leaf here because that requires a 3787 // complete interpreter frame in debug mode -- same bug as 4387334 3788 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 3789 // do proper 64bit abi 3790 3791 NEEDS_CLEANUP; 3792 // Need to add stack banging before this runtime call if it needs to 3793 // be taken; however, there is no generic stack banging routine at 3794 // the MacroAssembler level 3795 3796 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 3797 3798 #ifdef _LP64 3799 movsd(Address(rsp, 0), xmm0); 3800 fld_d(Address(rsp, 0)); 3801 #endif // _LP64 3802 addptr(rsp, sizeof(jdouble) * nb_args); 3803 if (num_fpu_regs_in_use > 1) { 3804 // Must save return value to stack and then restore entire FPU 3805 // stack except incoming arguments 3806 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 3807 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 3808 fld_d(Address(rsp, 0)); 3809 addptr(rsp, sizeof(jdouble)); 3810 } 3811 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 3812 addptr(rsp, sizeof(jdouble) * nb_args); 3813 } 3814 3815 off = 0; 3816 if (UseSSE == 1) { 3817 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 3818 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 3819 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 3820 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 3821 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 3822 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 3823 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 3824 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 3825 addptr(rsp, sizeof(jdouble)*8); 3826 } else if (UseSSE >= 2) { 3827 // Restore whole 128bit (16 bytes) XMM regiters 3828 movdqu(xmm0, Address(rsp,off++*16)); 3829 movdqu(xmm1, Address(rsp,off++*16)); 3830 movdqu(xmm2, Address(rsp,off++*16)); 3831 movdqu(xmm3, Address(rsp,off++*16)); 3832 movdqu(xmm4, Address(rsp,off++*16)); 3833 movdqu(xmm5, Address(rsp,off++*16)); 3834 movdqu(xmm6, Address(rsp,off++*16)); 3835 movdqu(xmm7, Address(rsp,off++*16)); 3836 #ifdef _LP64 3837 movdqu(xmm8, Address(rsp,off++*16)); 3838 movdqu(xmm9, Address(rsp,off++*16)); 3839 movdqu(xmm10, Address(rsp,off++*16)); 3840 movdqu(xmm11, Address(rsp,off++*16)); 3841 movdqu(xmm12, Address(rsp,off++*16)); 3842 movdqu(xmm13, Address(rsp,off++*16)); 3843 movdqu(xmm14, Address(rsp,off++*16)); 3844 movdqu(xmm15, Address(rsp,off++*16)); 3845 #endif 3846 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3847 #ifdef COMPILER2 3848 if (MaxVectorSize > 16) { 3849 // Restore upper half of YMM registes. 3850 vinsertf128h(xmm0, Address(rsp, 0)); 3851 vinsertf128h(xmm1, Address(rsp, 16)); 3852 vinsertf128h(xmm2, Address(rsp, 32)); 3853 vinsertf128h(xmm3, Address(rsp, 48)); 3854 vinsertf128h(xmm4, Address(rsp, 64)); 3855 vinsertf128h(xmm5, Address(rsp, 80)); 3856 vinsertf128h(xmm6, Address(rsp, 96)); 3857 vinsertf128h(xmm7, Address(rsp,112)); 3858 #ifdef _LP64 3859 vinsertf128h(xmm8, Address(rsp,128)); 3860 vinsertf128h(xmm9, Address(rsp,144)); 3861 vinsertf128h(xmm10, Address(rsp,160)); 3862 vinsertf128h(xmm11, Address(rsp,176)); 3863 vinsertf128h(xmm12, Address(rsp,192)); 3864 vinsertf128h(xmm13, Address(rsp,208)); 3865 vinsertf128h(xmm14, Address(rsp,224)); 3866 vinsertf128h(xmm15, Address(rsp,240)); 3867 #endif 3868 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3869 } 3870 #endif 3871 } 3872 popa(); 3873 } 3874 3875 static const double pi_4 = 0.7853981633974483; 3876 3877 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 3878 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 3879 // was attempted in this code; unfortunately it appears that the 3880 // switch to 80-bit precision and back causes this to be 3881 // unprofitable compared with simply performing a runtime call if 3882 // the argument is out of the (-pi/4, pi/4) range. 3883 3884 Register tmp = noreg; 3885 if (!VM_Version::supports_cmov()) { 3886 // fcmp needs a temporary so preserve rbx, 3887 tmp = rbx; 3888 push(tmp); 3889 } 3890 3891 Label slow_case, done; 3892 3893 ExternalAddress pi4_adr = (address)&pi_4; 3894 if (reachable(pi4_adr)) { 3895 // x ?<= pi/4 3896 fld_d(pi4_adr); 3897 fld_s(1); // Stack: X PI/4 X 3898 fabs(); // Stack: |X| PI/4 X 3899 fcmp(tmp); 3900 jcc(Assembler::above, slow_case); 3901 3902 // fastest case: -pi/4 <= x <= pi/4 3903 switch(trig) { 3904 case 's': 3905 fsin(); 3906 break; 3907 case 'c': 3908 fcos(); 3909 break; 3910 case 't': 3911 ftan(); 3912 break; 3913 default: 3914 assert(false, "bad intrinsic"); 3915 break; 3916 } 3917 jmp(done); 3918 } 3919 3920 // slow case: runtime call 3921 bind(slow_case); 3922 3923 switch(trig) { 3924 case 's': 3925 { 3926 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 3927 } 3928 break; 3929 case 'c': 3930 { 3931 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 3932 } 3933 break; 3934 case 't': 3935 { 3936 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 3937 } 3938 break; 3939 default: 3940 assert(false, "bad intrinsic"); 3941 break; 3942 } 3943 3944 // Come here with result in F-TOS 3945 bind(done); 3946 3947 if (tmp != noreg) { 3948 pop(tmp); 3949 } 3950 } 3951 3952 3953 // Look up the method for a megamorphic invokeinterface call. 3954 // The target method is determined by <intf_klass, itable_index>. 3955 // The receiver klass is in recv_klass. 3956 // On success, the result will be in method_result, and execution falls through. 3957 // On failure, execution transfers to the given label. 3958 void MacroAssembler::lookup_interface_method(Register recv_klass, 3959 Register intf_klass, 3960 RegisterOrConstant itable_index, 3961 Register method_result, 3962 Register scan_temp, 3963 Label& L_no_such_interface) { 3964 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 3965 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 3966 "caller must use same register for non-constant itable index as for method"); 3967 3968 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 3969 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 3970 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 3971 int scan_step = itableOffsetEntry::size() * wordSize; 3972 int vte_size = vtableEntry::size() * wordSize; 3973 Address::ScaleFactor times_vte_scale = Address::times_ptr; 3974 assert(vte_size == wordSize, "else adjust times_vte_scale"); 3975 3976 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 3977 3978 // %%% Could store the aligned, prescaled offset in the klassoop. 3979 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 3980 if (HeapWordsPerLong > 1) { 3981 // Round up to align_object_offset boundary 3982 // see code for InstanceKlass::start_of_itable! 3983 round_to(scan_temp, BytesPerLong); 3984 } 3985 3986 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 3987 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 3988 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 3989 3990 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 3991 // if (scan->interface() == intf) { 3992 // result = (klass + scan->offset() + itable_index); 3993 // } 3994 // } 3995 Label search, found_method; 3996 3997 for (int peel = 1; peel >= 0; peel--) { 3998 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 3999 cmpptr(intf_klass, method_result); 4000 4001 if (peel) { 4002 jccb(Assembler::equal, found_method); 4003 } else { 4004 jccb(Assembler::notEqual, search); 4005 // (invert the test to fall through to found_method...) 4006 } 4007 4008 if (!peel) break; 4009 4010 bind(search); 4011 4012 // Check that the previous entry is non-null. A null entry means that 4013 // the receiver class doesn't implement the interface, and wasn't the 4014 // same as when the caller was compiled. 4015 testptr(method_result, method_result); 4016 jcc(Assembler::zero, L_no_such_interface); 4017 addptr(scan_temp, scan_step); 4018 } 4019 4020 bind(found_method); 4021 4022 // Got a hit. 4023 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 4024 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 4025 } 4026 4027 4028 // virtual method calling 4029 void MacroAssembler::lookup_virtual_method(Register recv_klass, 4030 RegisterOrConstant vtable_index, 4031 Register method_result) { 4032 const int base = InstanceKlass::vtable_start_offset() * wordSize; 4033 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 4034 Address vtable_entry_addr(recv_klass, 4035 vtable_index, Address::times_ptr, 4036 base + vtableEntry::method_offset_in_bytes()); 4037 movptr(method_result, vtable_entry_addr); 4038 } 4039 4040 4041 void MacroAssembler::check_klass_subtype(Register sub_klass, 4042 Register super_klass, 4043 Register temp_reg, 4044 Label& L_success) { 4045 Label L_failure; 4046 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 4047 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 4048 bind(L_failure); 4049 } 4050 4051 4052 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 4053 Register super_klass, 4054 Register temp_reg, 4055 Label* L_success, 4056 Label* L_failure, 4057 Label* L_slow_path, 4058 RegisterOrConstant super_check_offset) { 4059 assert_different_registers(sub_klass, super_klass, temp_reg); 4060 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 4061 if (super_check_offset.is_register()) { 4062 assert_different_registers(sub_klass, super_klass, 4063 super_check_offset.as_register()); 4064 } else if (must_load_sco) { 4065 assert(temp_reg != noreg, "supply either a temp or a register offset"); 4066 } 4067 4068 Label L_fallthrough; 4069 int label_nulls = 0; 4070 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 4071 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 4072 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 4073 assert(label_nulls <= 1, "at most one NULL in the batch"); 4074 4075 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 4076 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 4077 Address super_check_offset_addr(super_klass, sco_offset); 4078 4079 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 4080 // range of a jccb. If this routine grows larger, reconsider at 4081 // least some of these. 4082 #define local_jcc(assembler_cond, label) \ 4083 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 4084 else jcc( assembler_cond, label) /*omit semi*/ 4085 4086 // Hacked jmp, which may only be used just before L_fallthrough. 4087 #define final_jmp(label) \ 4088 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 4089 else jmp(label) /*omit semi*/ 4090 4091 // If the pointers are equal, we are done (e.g., String[] elements). 4092 // This self-check enables sharing of secondary supertype arrays among 4093 // non-primary types such as array-of-interface. Otherwise, each such 4094 // type would need its own customized SSA. 4095 // We move this check to the front of the fast path because many 4096 // type checks are in fact trivially successful in this manner, 4097 // so we get a nicely predicted branch right at the start of the check. 4098 cmpptr(sub_klass, super_klass); 4099 local_jcc(Assembler::equal, *L_success); 4100 4101 // Check the supertype display: 4102 if (must_load_sco) { 4103 // Positive movl does right thing on LP64. 4104 movl(temp_reg, super_check_offset_addr); 4105 super_check_offset = RegisterOrConstant(temp_reg); 4106 } 4107 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 4108 cmpptr(super_klass, super_check_addr); // load displayed supertype 4109 4110 // This check has worked decisively for primary supers. 4111 // Secondary supers are sought in the super_cache ('super_cache_addr'). 4112 // (Secondary supers are interfaces and very deeply nested subtypes.) 4113 // This works in the same check above because of a tricky aliasing 4114 // between the super_cache and the primary super display elements. 4115 // (The 'super_check_addr' can address either, as the case requires.) 4116 // Note that the cache is updated below if it does not help us find 4117 // what we need immediately. 4118 // So if it was a primary super, we can just fail immediately. 4119 // Otherwise, it's the slow path for us (no success at this point). 4120 4121 if (super_check_offset.is_register()) { 4122 local_jcc(Assembler::equal, *L_success); 4123 cmpl(super_check_offset.as_register(), sc_offset); 4124 if (L_failure == &L_fallthrough) { 4125 local_jcc(Assembler::equal, *L_slow_path); 4126 } else { 4127 local_jcc(Assembler::notEqual, *L_failure); 4128 final_jmp(*L_slow_path); 4129 } 4130 } else if (super_check_offset.as_constant() == sc_offset) { 4131 // Need a slow path; fast failure is impossible. 4132 if (L_slow_path == &L_fallthrough) { 4133 local_jcc(Assembler::equal, *L_success); 4134 } else { 4135 local_jcc(Assembler::notEqual, *L_slow_path); 4136 final_jmp(*L_success); 4137 } 4138 } else { 4139 // No slow path; it's a fast decision. 4140 if (L_failure == &L_fallthrough) { 4141 local_jcc(Assembler::equal, *L_success); 4142 } else { 4143 local_jcc(Assembler::notEqual, *L_failure); 4144 final_jmp(*L_success); 4145 } 4146 } 4147 4148 bind(L_fallthrough); 4149 4150 #undef local_jcc 4151 #undef final_jmp 4152 } 4153 4154 4155 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 4156 Register super_klass, 4157 Register temp_reg, 4158 Register temp2_reg, 4159 Label* L_success, 4160 Label* L_failure, 4161 bool set_cond_codes) { 4162 assert_different_registers(sub_klass, super_klass, temp_reg); 4163 if (temp2_reg != noreg) 4164 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 4165 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 4166 4167 Label L_fallthrough; 4168 int label_nulls = 0; 4169 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 4170 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 4171 assert(label_nulls <= 1, "at most one NULL in the batch"); 4172 4173 // a couple of useful fields in sub_klass: 4174 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 4175 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 4176 Address secondary_supers_addr(sub_klass, ss_offset); 4177 Address super_cache_addr( sub_klass, sc_offset); 4178 4179 // Do a linear scan of the secondary super-klass chain. 4180 // This code is rarely used, so simplicity is a virtue here. 4181 // The repne_scan instruction uses fixed registers, which we must spill. 4182 // Don't worry too much about pre-existing connections with the input regs. 4183 4184 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 4185 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 4186 4187 // Get super_klass value into rax (even if it was in rdi or rcx). 4188 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 4189 if (super_klass != rax || UseCompressedOops) { 4190 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 4191 mov(rax, super_klass); 4192 } 4193 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 4194 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 4195 4196 #ifndef PRODUCT 4197 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 4198 ExternalAddress pst_counter_addr((address) pst_counter); 4199 NOT_LP64( incrementl(pst_counter_addr) ); 4200 LP64_ONLY( lea(rcx, pst_counter_addr) ); 4201 LP64_ONLY( incrementl(Address(rcx, 0)) ); 4202 #endif //PRODUCT 4203 4204 // We will consult the secondary-super array. 4205 movptr(rdi, secondary_supers_addr); 4206 // Load the array length. (Positive movl does right thing on LP64.) 4207 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); 4208 // Skip to start of data. 4209 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); 4210 4211 // Scan RCX words at [RDI] for an occurrence of RAX. 4212 // Set NZ/Z based on last compare. 4213 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 4214 // not change flags (only scas instruction which is repeated sets flags). 4215 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 4216 4217 testptr(rax,rax); // Set Z = 0 4218 repne_scan(); 4219 4220 // Unspill the temp. registers: 4221 if (pushed_rdi) pop(rdi); 4222 if (pushed_rcx) pop(rcx); 4223 if (pushed_rax) pop(rax); 4224 4225 if (set_cond_codes) { 4226 // Special hack for the AD files: rdi is guaranteed non-zero. 4227 assert(!pushed_rdi, "rdi must be left non-NULL"); 4228 // Also, the condition codes are properly set Z/NZ on succeed/failure. 4229 } 4230 4231 if (L_failure == &L_fallthrough) 4232 jccb(Assembler::notEqual, *L_failure); 4233 else jcc(Assembler::notEqual, *L_failure); 4234 4235 // Success. Cache the super we found and proceed in triumph. 4236 movptr(super_cache_addr, super_klass); 4237 4238 if (L_success != &L_fallthrough) { 4239 jmp(*L_success); 4240 } 4241 4242 #undef IS_A_TEMP 4243 4244 bind(L_fallthrough); 4245 } 4246 4247 4248 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 4249 if (VM_Version::supports_cmov()) { 4250 cmovl(cc, dst, src); 4251 } else { 4252 Label L; 4253 jccb(negate_condition(cc), L); 4254 movl(dst, src); 4255 bind(L); 4256 } 4257 } 4258 4259 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 4260 if (VM_Version::supports_cmov()) { 4261 cmovl(cc, dst, src); 4262 } else { 4263 Label L; 4264 jccb(negate_condition(cc), L); 4265 movl(dst, src); 4266 bind(L); 4267 } 4268 } 4269 4270 void MacroAssembler::verify_oop(Register reg, const char* s) { 4271 if (!VerifyOops) return; 4272 4273 // Pass register number to verify_oop_subroutine 4274 const char* b = NULL; 4275 { 4276 ResourceMark rm; 4277 stringStream ss; 4278 ss.print("verify_oop: %s: %s", reg->name(), s); 4279 b = code_string(ss.as_string()); 4280 } 4281 BLOCK_COMMENT("verify_oop {"); 4282 #ifdef _LP64 4283 push(rscratch1); // save r10, trashed by movptr() 4284 #endif 4285 push(rax); // save rax, 4286 push(reg); // pass register argument 4287 ExternalAddress buffer((address) b); 4288 // avoid using pushptr, as it modifies scratch registers 4289 // and our contract is not to modify anything 4290 movptr(rax, buffer.addr()); 4291 push(rax); 4292 // call indirectly to solve generation ordering problem 4293 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 4294 call(rax); 4295 // Caller pops the arguments (oop, message) and restores rax, r10 4296 BLOCK_COMMENT("} verify_oop"); 4297 } 4298 4299 4300 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 4301 Register tmp, 4302 int offset) { 4303 intptr_t value = *delayed_value_addr; 4304 if (value != 0) 4305 return RegisterOrConstant(value + offset); 4306 4307 // load indirectly to solve generation ordering problem 4308 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 4309 4310 #ifdef ASSERT 4311 { Label L; 4312 testptr(tmp, tmp); 4313 if (WizardMode) { 4314 const char* buf = NULL; 4315 { 4316 ResourceMark rm; 4317 stringStream ss; 4318 ss.print("DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 4319 buf = code_string(ss.as_string()); 4320 } 4321 jcc(Assembler::notZero, L); 4322 STOP(buf); 4323 } else { 4324 jccb(Assembler::notZero, L); 4325 hlt(); 4326 } 4327 bind(L); 4328 } 4329 #endif 4330 4331 if (offset != 0) 4332 addptr(tmp, offset); 4333 4334 return RegisterOrConstant(tmp); 4335 } 4336 4337 4338 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 4339 int extra_slot_offset) { 4340 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 4341 int stackElementSize = Interpreter::stackElementSize; 4342 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 4343 #ifdef ASSERT 4344 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 4345 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 4346 #endif 4347 Register scale_reg = noreg; 4348 Address::ScaleFactor scale_factor = Address::no_scale; 4349 if (arg_slot.is_constant()) { 4350 offset += arg_slot.as_constant() * stackElementSize; 4351 } else { 4352 scale_reg = arg_slot.as_register(); 4353 scale_factor = Address::times(stackElementSize); 4354 } 4355 offset += wordSize; // return PC is on stack 4356 return Address(rsp, scale_reg, scale_factor, offset); 4357 } 4358 4359 4360 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 4361 if (!VerifyOops) return; 4362 4363 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 4364 // Pass register number to verify_oop_subroutine 4365 const char* b = NULL; 4366 { 4367 ResourceMark rm; 4368 stringStream ss; 4369 ss.print("verify_oop_addr: %s", s); 4370 b = code_string(ss.as_string()); 4371 } 4372 #ifdef _LP64 4373 push(rscratch1); // save r10, trashed by movptr() 4374 #endif 4375 push(rax); // save rax, 4376 // addr may contain rsp so we will have to adjust it based on the push 4377 // we just did (and on 64 bit we do two pushes) 4378 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 4379 // stores rax into addr which is backwards of what was intended. 4380 if (addr.uses(rsp)) { 4381 lea(rax, addr); 4382 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 4383 } else { 4384 pushptr(addr); 4385 } 4386 4387 ExternalAddress buffer((address) b); 4388 // pass msg argument 4389 // avoid using pushptr, as it modifies scratch registers 4390 // and our contract is not to modify anything 4391 movptr(rax, buffer.addr()); 4392 push(rax); 4393 4394 // call indirectly to solve generation ordering problem 4395 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 4396 call(rax); 4397 // Caller pops the arguments (addr, message) and restores rax, r10. 4398 } 4399 4400 void MacroAssembler::verify_tlab() { 4401 #ifdef ASSERT 4402 if (UseTLAB && VerifyOops) { 4403 Label next, ok; 4404 Register t1 = rsi; 4405 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 4406 4407 push(t1); 4408 NOT_LP64(push(thread_reg)); 4409 NOT_LP64(get_thread(thread_reg)); 4410 4411 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 4412 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 4413 jcc(Assembler::aboveEqual, next); 4414 STOP("assert(top >= start)"); 4415 should_not_reach_here(); 4416 4417 bind(next); 4418 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 4419 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 4420 jcc(Assembler::aboveEqual, ok); 4421 STOP("assert(top <= end)"); 4422 should_not_reach_here(); 4423 4424 bind(ok); 4425 NOT_LP64(pop(thread_reg)); 4426 pop(t1); 4427 } 4428 #endif 4429 } 4430 4431 class ControlWord { 4432 public: 4433 int32_t _value; 4434 4435 int rounding_control() const { return (_value >> 10) & 3 ; } 4436 int precision_control() const { return (_value >> 8) & 3 ; } 4437 bool precision() const { return ((_value >> 5) & 1) != 0; } 4438 bool underflow() const { return ((_value >> 4) & 1) != 0; } 4439 bool overflow() const { return ((_value >> 3) & 1) != 0; } 4440 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 4441 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 4442 bool invalid() const { return ((_value >> 0) & 1) != 0; } 4443 4444 void print() const { 4445 // rounding control 4446 const char* rc; 4447 switch (rounding_control()) { 4448 case 0: rc = "round near"; break; 4449 case 1: rc = "round down"; break; 4450 case 2: rc = "round up "; break; 4451 case 3: rc = "chop "; break; 4452 }; 4453 // precision control 4454 const char* pc; 4455 switch (precision_control()) { 4456 case 0: pc = "24 bits "; break; 4457 case 1: pc = "reserved"; break; 4458 case 2: pc = "53 bits "; break; 4459 case 3: pc = "64 bits "; break; 4460 }; 4461 // flags 4462 char f[9]; 4463 f[0] = ' '; 4464 f[1] = ' '; 4465 f[2] = (precision ()) ? 'P' : 'p'; 4466 f[3] = (underflow ()) ? 'U' : 'u'; 4467 f[4] = (overflow ()) ? 'O' : 'o'; 4468 f[5] = (zero_divide ()) ? 'Z' : 'z'; 4469 f[6] = (denormalized()) ? 'D' : 'd'; 4470 f[7] = (invalid ()) ? 'I' : 'i'; 4471 f[8] = '\x0'; 4472 // output 4473 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 4474 } 4475 4476 }; 4477 4478 class StatusWord { 4479 public: 4480 int32_t _value; 4481 4482 bool busy() const { return ((_value >> 15) & 1) != 0; } 4483 bool C3() const { return ((_value >> 14) & 1) != 0; } 4484 bool C2() const { return ((_value >> 10) & 1) != 0; } 4485 bool C1() const { return ((_value >> 9) & 1) != 0; } 4486 bool C0() const { return ((_value >> 8) & 1) != 0; } 4487 int top() const { return (_value >> 11) & 7 ; } 4488 bool error_status() const { return ((_value >> 7) & 1) != 0; } 4489 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 4490 bool precision() const { return ((_value >> 5) & 1) != 0; } 4491 bool underflow() const { return ((_value >> 4) & 1) != 0; } 4492 bool overflow() const { return ((_value >> 3) & 1) != 0; } 4493 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 4494 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 4495 bool invalid() const { return ((_value >> 0) & 1) != 0; } 4496 4497 void print() const { 4498 // condition codes 4499 char c[5]; 4500 c[0] = (C3()) ? '3' : '-'; 4501 c[1] = (C2()) ? '2' : '-'; 4502 c[2] = (C1()) ? '1' : '-'; 4503 c[3] = (C0()) ? '0' : '-'; 4504 c[4] = '\x0'; 4505 // flags 4506 char f[9]; 4507 f[0] = (error_status()) ? 'E' : '-'; 4508 f[1] = (stack_fault ()) ? 'S' : '-'; 4509 f[2] = (precision ()) ? 'P' : '-'; 4510 f[3] = (underflow ()) ? 'U' : '-'; 4511 f[4] = (overflow ()) ? 'O' : '-'; 4512 f[5] = (zero_divide ()) ? 'Z' : '-'; 4513 f[6] = (denormalized()) ? 'D' : '-'; 4514 f[7] = (invalid ()) ? 'I' : '-'; 4515 f[8] = '\x0'; 4516 // output 4517 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 4518 } 4519 4520 }; 4521 4522 class TagWord { 4523 public: 4524 int32_t _value; 4525 4526 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 4527 4528 void print() const { 4529 printf("%04x", _value & 0xFFFF); 4530 } 4531 4532 }; 4533 4534 class FPU_Register { 4535 public: 4536 int32_t _m0; 4537 int32_t _m1; 4538 int16_t _ex; 4539 4540 bool is_indefinite() const { 4541 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 4542 } 4543 4544 void print() const { 4545 char sign = (_ex < 0) ? '-' : '+'; 4546 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 4547 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 4548 }; 4549 4550 }; 4551 4552 class FPU_State { 4553 public: 4554 enum { 4555 register_size = 10, 4556 number_of_registers = 8, 4557 register_mask = 7 4558 }; 4559 4560 ControlWord _control_word; 4561 StatusWord _status_word; 4562 TagWord _tag_word; 4563 int32_t _error_offset; 4564 int32_t _error_selector; 4565 int32_t _data_offset; 4566 int32_t _data_selector; 4567 int8_t _register[register_size * number_of_registers]; 4568 4569 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 4570 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 4571 4572 const char* tag_as_string(int tag) const { 4573 switch (tag) { 4574 case 0: return "valid"; 4575 case 1: return "zero"; 4576 case 2: return "special"; 4577 case 3: return "empty"; 4578 } 4579 ShouldNotReachHere(); 4580 return NULL; 4581 } 4582 4583 void print() const { 4584 // print computation registers 4585 { int t = _status_word.top(); 4586 for (int i = 0; i < number_of_registers; i++) { 4587 int j = (i - t) & register_mask; 4588 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 4589 st(j)->print(); 4590 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 4591 } 4592 } 4593 printf("\n"); 4594 // print control registers 4595 printf("ctrl = "); _control_word.print(); printf("\n"); 4596 printf("stat = "); _status_word .print(); printf("\n"); 4597 printf("tags = "); _tag_word .print(); printf("\n"); 4598 } 4599 4600 }; 4601 4602 class Flag_Register { 4603 public: 4604 int32_t _value; 4605 4606 bool overflow() const { return ((_value >> 11) & 1) != 0; } 4607 bool direction() const { return ((_value >> 10) & 1) != 0; } 4608 bool sign() const { return ((_value >> 7) & 1) != 0; } 4609 bool zero() const { return ((_value >> 6) & 1) != 0; } 4610 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 4611 bool parity() const { return ((_value >> 2) & 1) != 0; } 4612 bool carry() const { return ((_value >> 0) & 1) != 0; } 4613 4614 void print() const { 4615 // flags 4616 char f[8]; 4617 f[0] = (overflow ()) ? 'O' : '-'; 4618 f[1] = (direction ()) ? 'D' : '-'; 4619 f[2] = (sign ()) ? 'S' : '-'; 4620 f[3] = (zero ()) ? 'Z' : '-'; 4621 f[4] = (auxiliary_carry()) ? 'A' : '-'; 4622 f[5] = (parity ()) ? 'P' : '-'; 4623 f[6] = (carry ()) ? 'C' : '-'; 4624 f[7] = '\x0'; 4625 // output 4626 printf("%08x flags = %s", _value, f); 4627 } 4628 4629 }; 4630 4631 class IU_Register { 4632 public: 4633 int32_t _value; 4634 4635 void print() const { 4636 printf("%08x %11d", _value, _value); 4637 } 4638 4639 }; 4640 4641 class IU_State { 4642 public: 4643 Flag_Register _eflags; 4644 IU_Register _rdi; 4645 IU_Register _rsi; 4646 IU_Register _rbp; 4647 IU_Register _rsp; 4648 IU_Register _rbx; 4649 IU_Register _rdx; 4650 IU_Register _rcx; 4651 IU_Register _rax; 4652 4653 void print() const { 4654 // computation registers 4655 printf("rax, = "); _rax.print(); printf("\n"); 4656 printf("rbx, = "); _rbx.print(); printf("\n"); 4657 printf("rcx = "); _rcx.print(); printf("\n"); 4658 printf("rdx = "); _rdx.print(); printf("\n"); 4659 printf("rdi = "); _rdi.print(); printf("\n"); 4660 printf("rsi = "); _rsi.print(); printf("\n"); 4661 printf("rbp, = "); _rbp.print(); printf("\n"); 4662 printf("rsp = "); _rsp.print(); printf("\n"); 4663 printf("\n"); 4664 // control registers 4665 printf("flgs = "); _eflags.print(); printf("\n"); 4666 } 4667 }; 4668 4669 4670 class CPU_State { 4671 public: 4672 FPU_State _fpu_state; 4673 IU_State _iu_state; 4674 4675 void print() const { 4676 printf("--------------------------------------------------\n"); 4677 _iu_state .print(); 4678 printf("\n"); 4679 _fpu_state.print(); 4680 printf("--------------------------------------------------\n"); 4681 } 4682 4683 }; 4684 4685 4686 static void _print_CPU_state(CPU_State* state) { 4687 state->print(); 4688 }; 4689 4690 4691 void MacroAssembler::print_CPU_state() { 4692 push_CPU_state(); 4693 push(rsp); // pass CPU state 4694 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 4695 addptr(rsp, wordSize); // discard argument 4696 pop_CPU_state(); 4697 } 4698 4699 4700 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 4701 static int counter = 0; 4702 FPU_State* fs = &state->_fpu_state; 4703 counter++; 4704 // For leaf calls, only verify that the top few elements remain empty. 4705 // We only need 1 empty at the top for C2 code. 4706 if( stack_depth < 0 ) { 4707 if( fs->tag_for_st(7) != 3 ) { 4708 printf("FPR7 not empty\n"); 4709 state->print(); 4710 assert(false, "error"); 4711 return false; 4712 } 4713 return true; // All other stack states do not matter 4714 } 4715 4716 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 4717 "bad FPU control word"); 4718 4719 // compute stack depth 4720 int i = 0; 4721 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 4722 int d = i; 4723 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 4724 // verify findings 4725 if (i != FPU_State::number_of_registers) { 4726 // stack not contiguous 4727 printf("%s: stack not contiguous at ST%d\n", s, i); 4728 state->print(); 4729 assert(false, "error"); 4730 return false; 4731 } 4732 // check if computed stack depth corresponds to expected stack depth 4733 if (stack_depth < 0) { 4734 // expected stack depth is -stack_depth or less 4735 if (d > -stack_depth) { 4736 // too many elements on the stack 4737 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 4738 state->print(); 4739 assert(false, "error"); 4740 return false; 4741 } 4742 } else { 4743 // expected stack depth is stack_depth 4744 if (d != stack_depth) { 4745 // wrong stack depth 4746 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 4747 state->print(); 4748 assert(false, "error"); 4749 return false; 4750 } 4751 } 4752 // everything is cool 4753 return true; 4754 } 4755 4756 4757 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 4758 if (!VerifyFPU) return; 4759 push_CPU_state(); 4760 push(rsp); // pass CPU state 4761 ExternalAddress msg((address) s); 4762 // pass message string s 4763 pushptr(msg.addr()); 4764 push(stack_depth); // pass stack depth 4765 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 4766 addptr(rsp, 3 * wordSize); // discard arguments 4767 // check for error 4768 { Label L; 4769 testl(rax, rax); 4770 jcc(Assembler::notZero, L); 4771 int3(); // break if error condition 4772 bind(L); 4773 } 4774 pop_CPU_state(); 4775 } 4776 4777 void MacroAssembler::restore_cpu_control_state_after_jni() { 4778 // Either restore the MXCSR register after returning from the JNI Call 4779 // or verify that it wasn't changed (with -Xcheck:jni flag). 4780 if (VM_Version::supports_sse()) { 4781 if (RestoreMXCSROnJNICalls) { 4782 ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std())); 4783 } else if (CheckJNICalls) { 4784 call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry())); 4785 } 4786 } 4787 if (VM_Version::supports_avx()) { 4788 // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. 4789 vzeroupper(); 4790 } 4791 4792 #ifndef _LP64 4793 // Either restore the x87 floating pointer control word after returning 4794 // from the JNI call or verify that it wasn't changed. 4795 if (CheckJNICalls) { 4796 call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); 4797 } 4798 #endif // _LP64 4799 } 4800 4801 4802 void MacroAssembler::load_klass(Register dst, Register src) { 4803 #ifdef _LP64 4804 if (UseCompressedKlassPointers) { 4805 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4806 decode_klass_not_null(dst); 4807 } else 4808 #endif 4809 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4810 } 4811 4812 void MacroAssembler::load_prototype_header(Register dst, Register src) { 4813 #ifdef _LP64 4814 if (UseCompressedKlassPointers) { 4815 assert (Universe::heap() != NULL, "java heap should be initialized"); 4816 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4817 if (Universe::narrow_klass_shift() != 0) { 4818 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 4819 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 4820 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 4821 } else { 4822 movq(dst, Address(dst, Klass::prototype_header_offset())); 4823 } 4824 } else 4825 #endif 4826 { 4827 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4828 movptr(dst, Address(dst, Klass::prototype_header_offset())); 4829 } 4830 } 4831 4832 void MacroAssembler::store_klass(Register dst, Register src) { 4833 #ifdef _LP64 4834 if (UseCompressedKlassPointers) { 4835 encode_klass_not_null(src); 4836 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 4837 } else 4838 #endif 4839 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 4840 } 4841 4842 void MacroAssembler::load_heap_oop(Register dst, Address src) { 4843 #ifdef _LP64 4844 // FIXME: Must change all places where we try to load the klass. 4845 if (UseCompressedOops) { 4846 movl(dst, src); 4847 decode_heap_oop(dst); 4848 } else 4849 #endif 4850 movptr(dst, src); 4851 } 4852 4853 // Doesn't do verfication, generates fixed size code 4854 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 4855 #ifdef _LP64 4856 if (UseCompressedOops) { 4857 movl(dst, src); 4858 decode_heap_oop_not_null(dst); 4859 } else 4860 #endif 4861 movptr(dst, src); 4862 } 4863 4864 void MacroAssembler::store_heap_oop(Address dst, Register src) { 4865 #ifdef _LP64 4866 if (UseCompressedOops) { 4867 assert(!dst.uses(src), "not enough registers"); 4868 encode_heap_oop(src); 4869 movl(dst, src); 4870 } else 4871 #endif 4872 movptr(dst, src); 4873 } 4874 4875 void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { 4876 assert_different_registers(src1, tmp); 4877 #ifdef _LP64 4878 if (UseCompressedOops) { 4879 bool did_push = false; 4880 if (tmp == noreg) { 4881 tmp = rax; 4882 push(tmp); 4883 did_push = true; 4884 assert(!src2.uses(rsp), "can't push"); 4885 } 4886 load_heap_oop(tmp, src2); 4887 cmpptr(src1, tmp); 4888 if (did_push) pop(tmp); 4889 } else 4890 #endif 4891 cmpptr(src1, src2); 4892 } 4893 4894 // Used for storing NULLs. 4895 void MacroAssembler::store_heap_oop_null(Address dst) { 4896 #ifdef _LP64 4897 if (UseCompressedOops) { 4898 movl(dst, (int32_t)NULL_WORD); 4899 } else { 4900 movslq(dst, (int32_t)NULL_WORD); 4901 } 4902 #else 4903 movl(dst, (int32_t)NULL_WORD); 4904 #endif 4905 } 4906 4907 #ifdef _LP64 4908 void MacroAssembler::store_klass_gap(Register dst, Register src) { 4909 if (UseCompressedKlassPointers) { 4910 // Store to klass gap in destination 4911 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 4912 } 4913 } 4914 4915 #ifdef ASSERT 4916 void MacroAssembler::verify_heapbase(const char* msg) { 4917 assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed"); 4918 assert (Universe::heap() != NULL, "java heap should be initialized"); 4919 if (CheckCompressedOops) { 4920 Label ok; 4921 push(rscratch1); // cmpptr trashes rscratch1 4922 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 4923 jcc(Assembler::equal, ok); 4924 STOP(msg); 4925 bind(ok); 4926 pop(rscratch1); 4927 } 4928 } 4929 #endif 4930 4931 // Algorithm must match oop.inline.hpp encode_heap_oop. 4932 void MacroAssembler::encode_heap_oop(Register r) { 4933 #ifdef ASSERT 4934 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 4935 #endif 4936 verify_oop(r, "broken oop in encode_heap_oop"); 4937 if (Universe::narrow_oop_base() == NULL) { 4938 if (Universe::narrow_oop_shift() != 0) { 4939 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4940 shrq(r, LogMinObjAlignmentInBytes); 4941 } 4942 return; 4943 } 4944 testq(r, r); 4945 cmovq(Assembler::equal, r, r12_heapbase); 4946 subq(r, r12_heapbase); 4947 shrq(r, LogMinObjAlignmentInBytes); 4948 } 4949 4950 void MacroAssembler::encode_heap_oop_not_null(Register r) { 4951 #ifdef ASSERT 4952 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 4953 if (CheckCompressedOops) { 4954 Label ok; 4955 testq(r, r); 4956 jcc(Assembler::notEqual, ok); 4957 STOP("null oop passed to encode_heap_oop_not_null"); 4958 bind(ok); 4959 } 4960 #endif 4961 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 4962 if (Universe::narrow_oop_base() != NULL) { 4963 subq(r, r12_heapbase); 4964 } 4965 if (Universe::narrow_oop_shift() != 0) { 4966 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4967 shrq(r, LogMinObjAlignmentInBytes); 4968 } 4969 } 4970 4971 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 4972 #ifdef ASSERT 4973 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 4974 if (CheckCompressedOops) { 4975 Label ok; 4976 testq(src, src); 4977 jcc(Assembler::notEqual, ok); 4978 STOP("null oop passed to encode_heap_oop_not_null2"); 4979 bind(ok); 4980 } 4981 #endif 4982 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 4983 if (dst != src) { 4984 movq(dst, src); 4985 } 4986 if (Universe::narrow_oop_base() != NULL) { 4987 subq(dst, r12_heapbase); 4988 } 4989 if (Universe::narrow_oop_shift() != 0) { 4990 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4991 shrq(dst, LogMinObjAlignmentInBytes); 4992 } 4993 } 4994 4995 void MacroAssembler::decode_heap_oop(Register r) { 4996 #ifdef ASSERT 4997 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 4998 #endif 4999 if (Universe::narrow_oop_base() == NULL) { 5000 if (Universe::narrow_oop_shift() != 0) { 5001 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 5002 shlq(r, LogMinObjAlignmentInBytes); 5003 } 5004 } else { 5005 Label done; 5006 shlq(r, LogMinObjAlignmentInBytes); 5007 jccb(Assembler::equal, done); 5008 addq(r, r12_heapbase); 5009 bind(done); 5010 } 5011 verify_oop(r, "broken oop in decode_heap_oop"); 5012 } 5013 5014 void MacroAssembler::decode_heap_oop_not_null(Register r) { 5015 // Note: it will change flags 5016 assert (UseCompressedOops, "should only be used for compressed headers"); 5017 assert (Universe::heap() != NULL, "java heap should be initialized"); 5018 // Cannot assert, unverified entry point counts instructions (see .ad file) 5019 // vtableStubs also counts instructions in pd_code_size_limit. 5020 // Also do not verify_oop as this is called by verify_oop. 5021 if (Universe::narrow_oop_shift() != 0) { 5022 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 5023 shlq(r, LogMinObjAlignmentInBytes); 5024 if (Universe::narrow_oop_base() != NULL) { 5025 addq(r, r12_heapbase); 5026 } 5027 } else { 5028 assert (Universe::narrow_oop_base() == NULL, "sanity"); 5029 } 5030 } 5031 5032 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 5033 // Note: it will change flags 5034 assert (UseCompressedOops, "should only be used for compressed headers"); 5035 assert (Universe::heap() != NULL, "java heap should be initialized"); 5036 // Cannot assert, unverified entry point counts instructions (see .ad file) 5037 // vtableStubs also counts instructions in pd_code_size_limit. 5038 // Also do not verify_oop as this is called by verify_oop. 5039 if (Universe::narrow_oop_shift() != 0) { 5040 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 5041 if (LogMinObjAlignmentInBytes == Address::times_8) { 5042 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 5043 } else { 5044 if (dst != src) { 5045 movq(dst, src); 5046 } 5047 shlq(dst, LogMinObjAlignmentInBytes); 5048 if (Universe::narrow_oop_base() != NULL) { 5049 addq(dst, r12_heapbase); 5050 } 5051 } 5052 } else { 5053 assert (Universe::narrow_oop_base() == NULL, "sanity"); 5054 if (dst != src) { 5055 movq(dst, src); 5056 } 5057 } 5058 } 5059 5060 void MacroAssembler::encode_klass_not_null(Register r) { 5061 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5062 #ifdef ASSERT 5063 verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?"); 5064 #endif 5065 if (Universe::narrow_klass_base() != NULL) { 5066 subq(r, r12_heapbase); 5067 } 5068 if (Universe::narrow_klass_shift() != 0) { 5069 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5070 shrq(r, LogKlassAlignmentInBytes); 5071 } 5072 } 5073 5074 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 5075 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5076 #ifdef ASSERT 5077 verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); 5078 #endif 5079 if (dst != src) { 5080 movq(dst, src); 5081 } 5082 if (Universe::narrow_klass_base() != NULL) { 5083 subq(dst, r12_heapbase); 5084 } 5085 if (Universe::narrow_klass_shift() != 0) { 5086 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5087 shrq(dst, LogKlassAlignmentInBytes); 5088 } 5089 } 5090 5091 void MacroAssembler::decode_klass_not_null(Register r) { 5092 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5093 // Note: it will change flags 5094 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5095 // Cannot assert, unverified entry point counts instructions (see .ad file) 5096 // vtableStubs also counts instructions in pd_code_size_limit. 5097 // Also do not verify_oop as this is called by verify_oop. 5098 if (Universe::narrow_klass_shift() != 0) { 5099 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5100 shlq(r, LogKlassAlignmentInBytes); 5101 if (Universe::narrow_klass_base() != NULL) { 5102 addq(r, r12_heapbase); 5103 } 5104 } else { 5105 assert (Universe::narrow_klass_base() == NULL, "sanity"); 5106 } 5107 } 5108 5109 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 5110 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 5111 // Note: it will change flags 5112 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5113 // Cannot assert, unverified entry point counts instructions (see .ad file) 5114 // vtableStubs also counts instructions in pd_code_size_limit. 5115 // Also do not verify_oop as this is called by verify_oop. 5116 if (Universe::narrow_klass_shift() != 0) { 5117 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5118 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 5119 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 5120 } else { 5121 assert (Universe::narrow_klass_base() == NULL, "sanity"); 5122 if (dst != src) { 5123 movq(dst, src); 5124 } 5125 } 5126 } 5127 5128 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 5129 assert (UseCompressedOops, "should only be used for compressed headers"); 5130 assert (Universe::heap() != NULL, "java heap should be initialized"); 5131 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5132 int oop_index = oop_recorder()->find_index(obj); 5133 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5134 mov_narrow_oop(dst, oop_index, rspec); 5135 } 5136 5137 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 5138 assert (UseCompressedOops, "should only be used for compressed headers"); 5139 assert (Universe::heap() != NULL, "java heap should be initialized"); 5140 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5141 int oop_index = oop_recorder()->find_index(obj); 5142 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5143 mov_narrow_oop(dst, oop_index, rspec); 5144 } 5145 5146 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 5147 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5148 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5149 int klass_index = oop_recorder()->find_index(k); 5150 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5151 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5152 } 5153 5154 void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { 5155 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5156 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5157 int klass_index = oop_recorder()->find_index(k); 5158 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5159 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5160 } 5161 5162 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 5163 assert (UseCompressedOops, "should only be used for compressed headers"); 5164 assert (Universe::heap() != NULL, "java heap should be initialized"); 5165 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5166 int oop_index = oop_recorder()->find_index(obj); 5167 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5168 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 5169 } 5170 5171 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 5172 assert (UseCompressedOops, "should only be used for compressed headers"); 5173 assert (Universe::heap() != NULL, "java heap should be initialized"); 5174 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5175 int oop_index = oop_recorder()->find_index(obj); 5176 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5177 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 5178 } 5179 5180 void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { 5181 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5182 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5183 int klass_index = oop_recorder()->find_index(k); 5184 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5185 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5186 } 5187 5188 void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { 5189 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 5190 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5191 int klass_index = oop_recorder()->find_index(k); 5192 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5193 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 5194 } 5195 5196 void MacroAssembler::reinit_heapbase() { 5197 if (UseCompressedOops || UseCompressedKlassPointers) { 5198 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 5199 } 5200 } 5201 #endif // _LP64 5202 5203 5204 // C2 compiled method's prolog code. 5205 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 5206 5207 // WARNING: Initial instruction MUST be 5 bytes or longer so that 5208 // NativeJump::patch_verified_entry will be able to patch out the entry 5209 // code safely. The push to verify stack depth is ok at 5 bytes, 5210 // the frame allocation can be either 3 or 6 bytes. So if we don't do 5211 // stack bang then we must use the 6 byte frame allocation even if 5212 // we have no frame. :-( 5213 5214 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 5215 // Remove word for return addr 5216 framesize -= wordSize; 5217 5218 // Calls to C2R adapters often do not accept exceptional returns. 5219 // We require that their callers must bang for them. But be careful, because 5220 // some VM calls (such as call site linkage) can use several kilobytes of 5221 // stack. But the stack safety zone should account for that. 5222 // See bugs 4446381, 4468289, 4497237. 5223 if (stack_bang) { 5224 generate_stack_overflow_check(framesize); 5225 5226 // We always push rbp, so that on return to interpreter rbp, will be 5227 // restored correctly and we can correct the stack. 5228 push(rbp); 5229 // Remove word for ebp 5230 framesize -= wordSize; 5231 5232 // Create frame 5233 if (framesize) { 5234 subptr(rsp, framesize); 5235 } 5236 } else { 5237 // Create frame (force generation of a 4 byte immediate value) 5238 subptr_imm32(rsp, framesize); 5239 5240 // Save RBP register now. 5241 framesize -= wordSize; 5242 movptr(Address(rsp, framesize), rbp); 5243 } 5244 5245 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 5246 framesize -= wordSize; 5247 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 5248 } 5249 5250 #ifndef _LP64 5251 // If method sets FPU control word do it now 5252 if (fp_mode_24b) { 5253 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 5254 } 5255 if (UseSSE >= 2 && VerifyFPU) { 5256 verify_FPU(0, "FPU stack must be clean on entry"); 5257 } 5258 #endif 5259 5260 #ifdef ASSERT 5261 if (VerifyStackAtCalls) { 5262 Label L; 5263 push(rax); 5264 mov(rax, rsp); 5265 andptr(rax, StackAlignmentInBytes-1); 5266 cmpptr(rax, StackAlignmentInBytes-wordSize); 5267 pop(rax); 5268 jcc(Assembler::equal, L); 5269 STOP("Stack is not properly aligned!"); 5270 bind(L); 5271 } 5272 #endif 5273 5274 } 5275 5276 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) { 5277 // cnt - number of qwords (8-byte words). 5278 // base - start address, qword aligned. 5279 assert(base==rdi, "base register must be edi for rep stos"); 5280 assert(tmp==rax, "tmp register must be eax for rep stos"); 5281 assert(cnt==rcx, "cnt register must be ecx for rep stos"); 5282 5283 xorptr(tmp, tmp); 5284 if (UseFastStosb) { 5285 shlptr(cnt,3); // convert to number of bytes 5286 rep_stosb(); 5287 } else { 5288 NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM 5289 rep_stos(); 5290 } 5291 } 5292 5293 // IndexOf for constant substrings with size >= 8 chars 5294 // which don't need to be loaded through stack. 5295 void MacroAssembler::string_indexofC8(Register str1, Register str2, 5296 Register cnt1, Register cnt2, 5297 int int_cnt2, Register result, 5298 XMMRegister vec, Register tmp) { 5299 ShortBranchVerifier sbv(this); 5300 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 5301 5302 // This method uses pcmpestri inxtruction with bound registers 5303 // inputs: 5304 // xmm - substring 5305 // rax - substring length (elements count) 5306 // mem - scanned string 5307 // rdx - string length (elements count) 5308 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 5309 // outputs: 5310 // rcx - matched index in string 5311 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5312 5313 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 5314 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 5315 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 5316 5317 // Note, inline_string_indexOf() generates checks: 5318 // if (substr.count > string.count) return -1; 5319 // if (substr.count == 0) return 0; 5320 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 5321 5322 // Load substring. 5323 movdqu(vec, Address(str2, 0)); 5324 movl(cnt2, int_cnt2); 5325 movptr(result, str1); // string addr 5326 5327 if (int_cnt2 > 8) { 5328 jmpb(SCAN_TO_SUBSTR); 5329 5330 // Reload substr for rescan, this code 5331 // is executed only for large substrings (> 8 chars) 5332 bind(RELOAD_SUBSTR); 5333 movdqu(vec, Address(str2, 0)); 5334 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 5335 5336 bind(RELOAD_STR); 5337 // We came here after the beginning of the substring was 5338 // matched but the rest of it was not so we need to search 5339 // again. Start from the next element after the previous match. 5340 5341 // cnt2 is number of substring reminding elements and 5342 // cnt1 is number of string reminding elements when cmp failed. 5343 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 5344 subl(cnt1, cnt2); 5345 addl(cnt1, int_cnt2); 5346 movl(cnt2, int_cnt2); // Now restore cnt2 5347 5348 decrementl(cnt1); // Shift to next element 5349 cmpl(cnt1, cnt2); 5350 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5351 5352 addptr(result, 2); 5353 5354 } // (int_cnt2 > 8) 5355 5356 // Scan string for start of substr in 16-byte vectors 5357 bind(SCAN_TO_SUBSTR); 5358 pcmpestri(vec, Address(result, 0), 0x0d); 5359 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 5360 subl(cnt1, 8); 5361 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 5362 cmpl(cnt1, cnt2); 5363 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5364 addptr(result, 16); 5365 jmpb(SCAN_TO_SUBSTR); 5366 5367 // Found a potential substr 5368 bind(FOUND_CANDIDATE); 5369 // Matched whole vector if first element matched (tmp(rcx) == 0). 5370 if (int_cnt2 == 8) { 5371 jccb(Assembler::overflow, RET_FOUND); // OF == 1 5372 } else { // int_cnt2 > 8 5373 jccb(Assembler::overflow, FOUND_SUBSTR); 5374 } 5375 // After pcmpestri tmp(rcx) contains matched element index 5376 // Compute start addr of substr 5377 lea(result, Address(result, tmp, Address::times_2)); 5378 5379 // Make sure string is still long enough 5380 subl(cnt1, tmp); 5381 cmpl(cnt1, cnt2); 5382 if (int_cnt2 == 8) { 5383 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 5384 } else { // int_cnt2 > 8 5385 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 5386 } 5387 // Left less then substring. 5388 5389 bind(RET_NOT_FOUND); 5390 movl(result, -1); 5391 jmpb(EXIT); 5392 5393 if (int_cnt2 > 8) { 5394 // This code is optimized for the case when whole substring 5395 // is matched if its head is matched. 5396 bind(MATCH_SUBSTR_HEAD); 5397 pcmpestri(vec, Address(result, 0), 0x0d); 5398 // Reload only string if does not match 5399 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 5400 5401 Label CONT_SCAN_SUBSTR; 5402 // Compare the rest of substring (> 8 chars). 5403 bind(FOUND_SUBSTR); 5404 // First 8 chars are already matched. 5405 negptr(cnt2); 5406 addptr(cnt2, 8); 5407 5408 bind(SCAN_SUBSTR); 5409 subl(cnt1, 8); 5410 cmpl(cnt2, -8); // Do not read beyond substring 5411 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 5412 // Back-up strings to avoid reading beyond substring: 5413 // cnt1 = cnt1 - cnt2 + 8 5414 addl(cnt1, cnt2); // cnt2 is negative 5415 addl(cnt1, 8); 5416 movl(cnt2, 8); negptr(cnt2); 5417 bind(CONT_SCAN_SUBSTR); 5418 if (int_cnt2 < (int)G) { 5419 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 5420 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 5421 } else { 5422 // calculate index in register to avoid integer overflow (int_cnt2*2) 5423 movl(tmp, int_cnt2); 5424 addptr(tmp, cnt2); 5425 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 5426 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 5427 } 5428 // Need to reload strings pointers if not matched whole vector 5429 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 5430 addptr(cnt2, 8); 5431 jcc(Assembler::negative, SCAN_SUBSTR); 5432 // Fall through if found full substring 5433 5434 } // (int_cnt2 > 8) 5435 5436 bind(RET_FOUND); 5437 // Found result if we matched full small substring. 5438 // Compute substr offset 5439 subptr(result, str1); 5440 shrl(result, 1); // index 5441 bind(EXIT); 5442 5443 } // string_indexofC8 5444 5445 // Small strings are loaded through stack if they cross page boundary. 5446 void MacroAssembler::string_indexof(Register str1, Register str2, 5447 Register cnt1, Register cnt2, 5448 int int_cnt2, Register result, 5449 XMMRegister vec, Register tmp) { 5450 ShortBranchVerifier sbv(this); 5451 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 5452 // 5453 // int_cnt2 is length of small (< 8 chars) constant substring 5454 // or (-1) for non constant substring in which case its length 5455 // is in cnt2 register. 5456 // 5457 // Note, inline_string_indexOf() generates checks: 5458 // if (substr.count > string.count) return -1; 5459 // if (substr.count == 0) return 0; 5460 // 5461 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 5462 5463 // This method uses pcmpestri inxtruction with bound registers 5464 // inputs: 5465 // xmm - substring 5466 // rax - substring length (elements count) 5467 // mem - scanned string 5468 // rdx - string length (elements count) 5469 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 5470 // outputs: 5471 // rcx - matched index in string 5472 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5473 5474 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 5475 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 5476 FOUND_CANDIDATE; 5477 5478 { //======================================================== 5479 // We don't know where these strings are located 5480 // and we can't read beyond them. Load them through stack. 5481 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 5482 5483 movptr(tmp, rsp); // save old SP 5484 5485 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 5486 if (int_cnt2 == 1) { // One char 5487 load_unsigned_short(result, Address(str2, 0)); 5488 movdl(vec, result); // move 32 bits 5489 } else if (int_cnt2 == 2) { // Two chars 5490 movdl(vec, Address(str2, 0)); // move 32 bits 5491 } else if (int_cnt2 == 4) { // Four chars 5492 movq(vec, Address(str2, 0)); // move 64 bits 5493 } else { // cnt2 = { 3, 5, 6, 7 } 5494 // Array header size is 12 bytes in 32-bit VM 5495 // + 6 bytes for 3 chars == 18 bytes, 5496 // enough space to load vec and shift. 5497 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); 5498 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 5499 psrldq(vec, 16-(int_cnt2*2)); 5500 } 5501 } else { // not constant substring 5502 cmpl(cnt2, 8); 5503 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 5504 5505 // We can read beyond string if srt+16 does not cross page boundary 5506 // since heaps are aligned and mapped by pages. 5507 assert(os::vm_page_size() < (int)G, "default page should be small"); 5508 movl(result, str2); // We need only low 32 bits 5509 andl(result, (os::vm_page_size()-1)); 5510 cmpl(result, (os::vm_page_size()-16)); 5511 jccb(Assembler::belowEqual, CHECK_STR); 5512 5513 // Move small strings to stack to allow load 16 bytes into vec. 5514 subptr(rsp, 16); 5515 int stk_offset = wordSize-2; 5516 push(cnt2); 5517 5518 bind(COPY_SUBSTR); 5519 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 5520 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 5521 decrement(cnt2); 5522 jccb(Assembler::notZero, COPY_SUBSTR); 5523 5524 pop(cnt2); 5525 movptr(str2, rsp); // New substring address 5526 } // non constant 5527 5528 bind(CHECK_STR); 5529 cmpl(cnt1, 8); 5530 jccb(Assembler::aboveEqual, BIG_STRINGS); 5531 5532 // Check cross page boundary. 5533 movl(result, str1); // We need only low 32 bits 5534 andl(result, (os::vm_page_size()-1)); 5535 cmpl(result, (os::vm_page_size()-16)); 5536 jccb(Assembler::belowEqual, BIG_STRINGS); 5537 5538 subptr(rsp, 16); 5539 int stk_offset = -2; 5540 if (int_cnt2 < 0) { // not constant 5541 push(cnt2); 5542 stk_offset += wordSize; 5543 } 5544 movl(cnt2, cnt1); 5545 5546 bind(COPY_STR); 5547 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 5548 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 5549 decrement(cnt2); 5550 jccb(Assembler::notZero, COPY_STR); 5551 5552 if (int_cnt2 < 0) { // not constant 5553 pop(cnt2); 5554 } 5555 movptr(str1, rsp); // New string address 5556 5557 bind(BIG_STRINGS); 5558 // Load substring. 5559 if (int_cnt2 < 0) { // -1 5560 movdqu(vec, Address(str2, 0)); 5561 push(cnt2); // substr count 5562 push(str2); // substr addr 5563 push(str1); // string addr 5564 } else { 5565 // Small (< 8 chars) constant substrings are loaded already. 5566 movl(cnt2, int_cnt2); 5567 } 5568 push(tmp); // original SP 5569 5570 } // Finished loading 5571 5572 //======================================================== 5573 // Start search 5574 // 5575 5576 movptr(result, str1); // string addr 5577 5578 if (int_cnt2 < 0) { // Only for non constant substring 5579 jmpb(SCAN_TO_SUBSTR); 5580 5581 // SP saved at sp+0 5582 // String saved at sp+1*wordSize 5583 // Substr saved at sp+2*wordSize 5584 // Substr count saved at sp+3*wordSize 5585 5586 // Reload substr for rescan, this code 5587 // is executed only for large substrings (> 8 chars) 5588 bind(RELOAD_SUBSTR); 5589 movptr(str2, Address(rsp, 2*wordSize)); 5590 movl(cnt2, Address(rsp, 3*wordSize)); 5591 movdqu(vec, Address(str2, 0)); 5592 // We came here after the beginning of the substring was 5593 // matched but the rest of it was not so we need to search 5594 // again. Start from the next element after the previous match. 5595 subptr(str1, result); // Restore counter 5596 shrl(str1, 1); 5597 addl(cnt1, str1); 5598 decrementl(cnt1); // Shift to next element 5599 cmpl(cnt1, cnt2); 5600 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5601 5602 addptr(result, 2); 5603 } // non constant 5604 5605 // Scan string for start of substr in 16-byte vectors 5606 bind(SCAN_TO_SUBSTR); 5607 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5608 pcmpestri(vec, Address(result, 0), 0x0d); 5609 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 5610 subl(cnt1, 8); 5611 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 5612 cmpl(cnt1, cnt2); 5613 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5614 addptr(result, 16); 5615 5616 bind(ADJUST_STR); 5617 cmpl(cnt1, 8); // Do not read beyond string 5618 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 5619 // Back-up string to avoid reading beyond string. 5620 lea(result, Address(result, cnt1, Address::times_2, -16)); 5621 movl(cnt1, 8); 5622 jmpb(SCAN_TO_SUBSTR); 5623 5624 // Found a potential substr 5625 bind(FOUND_CANDIDATE); 5626 // After pcmpestri tmp(rcx) contains matched element index 5627 5628 // Make sure string is still long enough 5629 subl(cnt1, tmp); 5630 cmpl(cnt1, cnt2); 5631 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 5632 // Left less then substring. 5633 5634 bind(RET_NOT_FOUND); 5635 movl(result, -1); 5636 jmpb(CLEANUP); 5637 5638 bind(FOUND_SUBSTR); 5639 // Compute start addr of substr 5640 lea(result, Address(result, tmp, Address::times_2)); 5641 5642 if (int_cnt2 > 0) { // Constant substring 5643 // Repeat search for small substring (< 8 chars) 5644 // from new point without reloading substring. 5645 // Have to check that we don't read beyond string. 5646 cmpl(tmp, 8-int_cnt2); 5647 jccb(Assembler::greater, ADJUST_STR); 5648 // Fall through if matched whole substring. 5649 } else { // non constant 5650 assert(int_cnt2 == -1, "should be != 0"); 5651 5652 addl(tmp, cnt2); 5653 // Found result if we matched whole substring. 5654 cmpl(tmp, 8); 5655 jccb(Assembler::lessEqual, RET_FOUND); 5656 5657 // Repeat search for small substring (<= 8 chars) 5658 // from new point 'str1' without reloading substring. 5659 cmpl(cnt2, 8); 5660 // Have to check that we don't read beyond string. 5661 jccb(Assembler::lessEqual, ADJUST_STR); 5662 5663 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 5664 // Compare the rest of substring (> 8 chars). 5665 movptr(str1, result); 5666 5667 cmpl(tmp, cnt2); 5668 // First 8 chars are already matched. 5669 jccb(Assembler::equal, CHECK_NEXT); 5670 5671 bind(SCAN_SUBSTR); 5672 pcmpestri(vec, Address(str1, 0), 0x0d); 5673 // Need to reload strings pointers if not matched whole vector 5674 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 5675 5676 bind(CHECK_NEXT); 5677 subl(cnt2, 8); 5678 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 5679 addptr(str1, 16); 5680 addptr(str2, 16); 5681 subl(cnt1, 8); 5682 cmpl(cnt2, 8); // Do not read beyond substring 5683 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 5684 // Back-up strings to avoid reading beyond substring. 5685 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 5686 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 5687 subl(cnt1, cnt2); 5688 movl(cnt2, 8); 5689 addl(cnt1, 8); 5690 bind(CONT_SCAN_SUBSTR); 5691 movdqu(vec, Address(str2, 0)); 5692 jmpb(SCAN_SUBSTR); 5693 5694 bind(RET_FOUND_LONG); 5695 movptr(str1, Address(rsp, wordSize)); 5696 } // non constant 5697 5698 bind(RET_FOUND); 5699 // Compute substr offset 5700 subptr(result, str1); 5701 shrl(result, 1); // index 5702 5703 bind(CLEANUP); 5704 pop(rsp); // restore SP 5705 5706 } // string_indexof 5707 5708 // Compare strings. 5709 void MacroAssembler::string_compare(Register str1, Register str2, 5710 Register cnt1, Register cnt2, Register result, 5711 XMMRegister vec1) { 5712 ShortBranchVerifier sbv(this); 5713 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 5714 5715 // Compute the minimum of the string lengths and the 5716 // difference of the string lengths (stack). 5717 // Do the conditional move stuff 5718 movl(result, cnt1); 5719 subl(cnt1, cnt2); 5720 push(cnt1); 5721 cmov32(Assembler::lessEqual, cnt2, result); 5722 5723 // Is the minimum length zero? 5724 testl(cnt2, cnt2); 5725 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 5726 5727 // Compare first characters 5728 load_unsigned_short(result, Address(str1, 0)); 5729 load_unsigned_short(cnt1, Address(str2, 0)); 5730 subl(result, cnt1); 5731 jcc(Assembler::notZero, POP_LABEL); 5732 cmpl(cnt2, 1); 5733 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 5734 5735 // Check if the strings start at the same location. 5736 cmpptr(str1, str2); 5737 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 5738 5739 Address::ScaleFactor scale = Address::times_2; 5740 int stride = 8; 5741 5742 if (UseAVX >= 2 && UseSSE42Intrinsics) { 5743 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; 5744 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; 5745 Label COMPARE_TAIL_LONG; 5746 int pcmpmask = 0x19; 5747 5748 // Setup to compare 16-chars (32-bytes) vectors, 5749 // start from first character again because it has aligned address. 5750 int stride2 = 16; 5751 int adr_stride = stride << scale; 5752 int adr_stride2 = stride2 << scale; 5753 5754 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 5755 // rax and rdx are used by pcmpestri as elements counters 5756 movl(result, cnt2); 5757 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count 5758 jcc(Assembler::zero, COMPARE_TAIL_LONG); 5759 5760 // fast path : compare first 2 8-char vectors. 5761 bind(COMPARE_16_CHARS); 5762 movdqu(vec1, Address(str1, 0)); 5763 pcmpestri(vec1, Address(str2, 0), pcmpmask); 5764 jccb(Assembler::below, COMPARE_INDEX_CHAR); 5765 5766 movdqu(vec1, Address(str1, adr_stride)); 5767 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); 5768 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS); 5769 addl(cnt1, stride); 5770 5771 // Compare the characters at index in cnt1 5772 bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character 5773 load_unsigned_short(result, Address(str1, cnt1, scale)); 5774 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); 5775 subl(result, cnt2); 5776 jmp(POP_LABEL); 5777 5778 // Setup the registers to start vector comparison loop 5779 bind(COMPARE_WIDE_VECTORS); 5780 lea(str1, Address(str1, result, scale)); 5781 lea(str2, Address(str2, result, scale)); 5782 subl(result, stride2); 5783 subl(cnt2, stride2); 5784 jccb(Assembler::zero, COMPARE_WIDE_TAIL); 5785 negptr(result); 5786 5787 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) 5788 bind(COMPARE_WIDE_VECTORS_LOOP); 5789 vmovdqu(vec1, Address(str1, result, scale)); 5790 vpxor(vec1, Address(str2, result, scale)); 5791 vptest(vec1, vec1); 5792 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); 5793 addptr(result, stride2); 5794 subl(cnt2, stride2); 5795 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); 5796 // clean upper bits of YMM registers 5797 vzeroupper(); 5798 5799 // compare wide vectors tail 5800 bind(COMPARE_WIDE_TAIL); 5801 testptr(result, result); 5802 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5803 5804 movl(result, stride2); 5805 movl(cnt2, result); 5806 negptr(result); 5807 jmpb(COMPARE_WIDE_VECTORS_LOOP); 5808 5809 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. 5810 bind(VECTOR_NOT_EQUAL); 5811 // clean upper bits of YMM registers 5812 vzeroupper(); 5813 lea(str1, Address(str1, result, scale)); 5814 lea(str2, Address(str2, result, scale)); 5815 jmp(COMPARE_16_CHARS); 5816 5817 // Compare tail chars, length between 1 to 15 chars 5818 bind(COMPARE_TAIL_LONG); 5819 movl(cnt2, result); 5820 cmpl(cnt2, stride); 5821 jccb(Assembler::less, COMPARE_SMALL_STR); 5822 5823 movdqu(vec1, Address(str1, 0)); 5824 pcmpestri(vec1, Address(str2, 0), pcmpmask); 5825 jcc(Assembler::below, COMPARE_INDEX_CHAR); 5826 subptr(cnt2, stride); 5827 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5828 lea(str1, Address(str1, result, scale)); 5829 lea(str2, Address(str2, result, scale)); 5830 negptr(cnt2); 5831 jmpb(WHILE_HEAD_LABEL); 5832 5833 bind(COMPARE_SMALL_STR); 5834 } else if (UseSSE42Intrinsics) { 5835 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 5836 int pcmpmask = 0x19; 5837 // Setup to compare 8-char (16-byte) vectors, 5838 // start from first character again because it has aligned address. 5839 movl(result, cnt2); 5840 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 5841 jccb(Assembler::zero, COMPARE_TAIL); 5842 5843 lea(str1, Address(str1, result, scale)); 5844 lea(str2, Address(str2, result, scale)); 5845 negptr(result); 5846 5847 // pcmpestri 5848 // inputs: 5849 // vec1- substring 5850 // rax - negative string length (elements count) 5851 // mem - scaned string 5852 // rdx - string length (elements count) 5853 // pcmpmask - cmp mode: 11000 (string compare with negated result) 5854 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 5855 // outputs: 5856 // rcx - first mismatched element index 5857 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 5858 5859 bind(COMPARE_WIDE_VECTORS); 5860 movdqu(vec1, Address(str1, result, scale)); 5861 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 5862 // After pcmpestri cnt1(rcx) contains mismatched element index 5863 5864 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 5865 addptr(result, stride); 5866 subptr(cnt2, stride); 5867 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 5868 5869 // compare wide vectors tail 5870 testptr(result, result); 5871 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5872 5873 movl(cnt2, stride); 5874 movl(result, stride); 5875 negptr(result); 5876 movdqu(vec1, Address(str1, result, scale)); 5877 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 5878 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 5879 5880 // Mismatched characters in the vectors 5881 bind(VECTOR_NOT_EQUAL); 5882 addptr(cnt1, result); 5883 load_unsigned_short(result, Address(str1, cnt1, scale)); 5884 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); 5885 subl(result, cnt2); 5886 jmpb(POP_LABEL); 5887 5888 bind(COMPARE_TAIL); // limit is zero 5889 movl(cnt2, result); 5890 // Fallthru to tail compare 5891 } 5892 // Shift str2 and str1 to the end of the arrays, negate min 5893 lea(str1, Address(str1, cnt2, scale)); 5894 lea(str2, Address(str2, cnt2, scale)); 5895 decrementl(cnt2); // first character was compared already 5896 negptr(cnt2); 5897 5898 // Compare the rest of the elements 5899 bind(WHILE_HEAD_LABEL); 5900 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 5901 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 5902 subl(result, cnt1); 5903 jccb(Assembler::notZero, POP_LABEL); 5904 increment(cnt2); 5905 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 5906 5907 // Strings are equal up to min length. Return the length difference. 5908 bind(LENGTH_DIFF_LABEL); 5909 pop(result); 5910 jmpb(DONE_LABEL); 5911 5912 // Discard the stored length difference 5913 bind(POP_LABEL); 5914 pop(cnt1); 5915 5916 // That's it 5917 bind(DONE_LABEL); 5918 } 5919 5920 // Compare char[] arrays aligned to 4 bytes or substrings. 5921 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 5922 Register limit, Register result, Register chr, 5923 XMMRegister vec1, XMMRegister vec2) { 5924 ShortBranchVerifier sbv(this); 5925 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 5926 5927 int length_offset = arrayOopDesc::length_offset_in_bytes(); 5928 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 5929 5930 // Check the input args 5931 cmpptr(ary1, ary2); 5932 jcc(Assembler::equal, TRUE_LABEL); 5933 5934 if (is_array_equ) { 5935 // Need additional checks for arrays_equals. 5936 testptr(ary1, ary1); 5937 jcc(Assembler::zero, FALSE_LABEL); 5938 testptr(ary2, ary2); 5939 jcc(Assembler::zero, FALSE_LABEL); 5940 5941 // Check the lengths 5942 movl(limit, Address(ary1, length_offset)); 5943 cmpl(limit, Address(ary2, length_offset)); 5944 jcc(Assembler::notEqual, FALSE_LABEL); 5945 } 5946 5947 // count == 0 5948 testl(limit, limit); 5949 jcc(Assembler::zero, TRUE_LABEL); 5950 5951 if (is_array_equ) { 5952 // Load array address 5953 lea(ary1, Address(ary1, base_offset)); 5954 lea(ary2, Address(ary2, base_offset)); 5955 } 5956 5957 shll(limit, 1); // byte count != 0 5958 movl(result, limit); // copy 5959 5960 if (UseAVX >= 2) { 5961 // With AVX2, use 32-byte vector compare 5962 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 5963 5964 // Compare 32-byte vectors 5965 andl(result, 0x0000001e); // tail count (in bytes) 5966 andl(limit, 0xffffffe0); // vector count (in bytes) 5967 jccb(Assembler::zero, COMPARE_TAIL); 5968 5969 lea(ary1, Address(ary1, limit, Address::times_1)); 5970 lea(ary2, Address(ary2, limit, Address::times_1)); 5971 negptr(limit); 5972 5973 bind(COMPARE_WIDE_VECTORS); 5974 vmovdqu(vec1, Address(ary1, limit, Address::times_1)); 5975 vmovdqu(vec2, Address(ary2, limit, Address::times_1)); 5976 vpxor(vec1, vec2); 5977 5978 vptest(vec1, vec1); 5979 jccb(Assembler::notZero, FALSE_LABEL); 5980 addptr(limit, 32); 5981 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 5982 5983 testl(result, result); 5984 jccb(Assembler::zero, TRUE_LABEL); 5985 5986 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); 5987 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); 5988 vpxor(vec1, vec2); 5989 5990 vptest(vec1, vec1); 5991 jccb(Assembler::notZero, FALSE_LABEL); 5992 jmpb(TRUE_LABEL); 5993 5994 bind(COMPARE_TAIL); // limit is zero 5995 movl(limit, result); 5996 // Fallthru to tail compare 5997 } else if (UseSSE42Intrinsics) { 5998 // With SSE4.2, use double quad vector compare 5999 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 6000 6001 // Compare 16-byte vectors 6002 andl(result, 0x0000000e); // tail count (in bytes) 6003 andl(limit, 0xfffffff0); // vector count (in bytes) 6004 jccb(Assembler::zero, COMPARE_TAIL); 6005 6006 lea(ary1, Address(ary1, limit, Address::times_1)); 6007 lea(ary2, Address(ary2, limit, Address::times_1)); 6008 negptr(limit); 6009 6010 bind(COMPARE_WIDE_VECTORS); 6011 movdqu(vec1, Address(ary1, limit, Address::times_1)); 6012 movdqu(vec2, Address(ary2, limit, Address::times_1)); 6013 pxor(vec1, vec2); 6014 6015 ptest(vec1, vec1); 6016 jccb(Assembler::notZero, FALSE_LABEL); 6017 addptr(limit, 16); 6018 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 6019 6020 testl(result, result); 6021 jccb(Assembler::zero, TRUE_LABEL); 6022 6023 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 6024 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 6025 pxor(vec1, vec2); 6026 6027 ptest(vec1, vec1); 6028 jccb(Assembler::notZero, FALSE_LABEL); 6029 jmpb(TRUE_LABEL); 6030 6031 bind(COMPARE_TAIL); // limit is zero 6032 movl(limit, result); 6033 // Fallthru to tail compare 6034 } 6035 6036 // Compare 4-byte vectors 6037 andl(limit, 0xfffffffc); // vector count (in bytes) 6038 jccb(Assembler::zero, COMPARE_CHAR); 6039 6040 lea(ary1, Address(ary1, limit, Address::times_1)); 6041 lea(ary2, Address(ary2, limit, Address::times_1)); 6042 negptr(limit); 6043 6044 bind(COMPARE_VECTORS); 6045 movl(chr, Address(ary1, limit, Address::times_1)); 6046 cmpl(chr, Address(ary2, limit, Address::times_1)); 6047 jccb(Assembler::notEqual, FALSE_LABEL); 6048 addptr(limit, 4); 6049 jcc(Assembler::notZero, COMPARE_VECTORS); 6050 6051 // Compare trailing char (final 2 bytes), if any 6052 bind(COMPARE_CHAR); 6053 testl(result, 0x2); // tail char 6054 jccb(Assembler::zero, TRUE_LABEL); 6055 load_unsigned_short(chr, Address(ary1, 0)); 6056 load_unsigned_short(limit, Address(ary2, 0)); 6057 cmpl(chr, limit); 6058 jccb(Assembler::notEqual, FALSE_LABEL); 6059 6060 bind(TRUE_LABEL); 6061 movl(result, 1); // return true 6062 jmpb(DONE); 6063 6064 bind(FALSE_LABEL); 6065 xorl(result, result); // return false 6066 6067 // That's it 6068 bind(DONE); 6069 if (UseAVX >= 2) { 6070 // clean upper bits of YMM registers 6071 vzeroupper(); 6072 } 6073 } 6074 6075 void MacroAssembler::generate_fill(BasicType t, bool aligned, 6076 Register to, Register value, Register count, 6077 Register rtmp, XMMRegister xtmp) { 6078 ShortBranchVerifier sbv(this); 6079 assert_different_registers(to, value, count, rtmp); 6080 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 6081 Label L_fill_2_bytes, L_fill_4_bytes; 6082 6083 int shift = -1; 6084 switch (t) { 6085 case T_BYTE: 6086 shift = 2; 6087 break; 6088 case T_SHORT: 6089 shift = 1; 6090 break; 6091 case T_INT: 6092 shift = 0; 6093 break; 6094 default: ShouldNotReachHere(); 6095 } 6096 6097 if (t == T_BYTE) { 6098 andl(value, 0xff); 6099 movl(rtmp, value); 6100 shll(rtmp, 8); 6101 orl(value, rtmp); 6102 } 6103 if (t == T_SHORT) { 6104 andl(value, 0xffff); 6105 } 6106 if (t == T_BYTE || t == T_SHORT) { 6107 movl(rtmp, value); 6108 shll(rtmp, 16); 6109 orl(value, rtmp); 6110 } 6111 6112 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 6113 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 6114 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 6115 // align source address at 4 bytes address boundary 6116 if (t == T_BYTE) { 6117 // One byte misalignment happens only for byte arrays 6118 testptr(to, 1); 6119 jccb(Assembler::zero, L_skip_align1); 6120 movb(Address(to, 0), value); 6121 increment(to); 6122 decrement(count); 6123 BIND(L_skip_align1); 6124 } 6125 // Two bytes misalignment happens only for byte and short (char) arrays 6126 testptr(to, 2); 6127 jccb(Assembler::zero, L_skip_align2); 6128 movw(Address(to, 0), value); 6129 addptr(to, 2); 6130 subl(count, 1<<(shift-1)); 6131 BIND(L_skip_align2); 6132 } 6133 if (UseSSE < 2) { 6134 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 6135 // Fill 32-byte chunks 6136 subl(count, 8 << shift); 6137 jcc(Assembler::less, L_check_fill_8_bytes); 6138 align(16); 6139 6140 BIND(L_fill_32_bytes_loop); 6141 6142 for (int i = 0; i < 32; i += 4) { 6143 movl(Address(to, i), value); 6144 } 6145 6146 addptr(to, 32); 6147 subl(count, 8 << shift); 6148 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 6149 BIND(L_check_fill_8_bytes); 6150 addl(count, 8 << shift); 6151 jccb(Assembler::zero, L_exit); 6152 jmpb(L_fill_8_bytes); 6153 6154 // 6155 // length is too short, just fill qwords 6156 // 6157 BIND(L_fill_8_bytes_loop); 6158 movl(Address(to, 0), value); 6159 movl(Address(to, 4), value); 6160 addptr(to, 8); 6161 BIND(L_fill_8_bytes); 6162 subl(count, 1 << (shift + 1)); 6163 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 6164 // fall through to fill 4 bytes 6165 } else { 6166 Label L_fill_32_bytes; 6167 if (!UseUnalignedLoadStores) { 6168 // align to 8 bytes, we know we are 4 byte aligned to start 6169 testptr(to, 4); 6170 jccb(Assembler::zero, L_fill_32_bytes); 6171 movl(Address(to, 0), value); 6172 addptr(to, 4); 6173 subl(count, 1<<shift); 6174 } 6175 BIND(L_fill_32_bytes); 6176 { 6177 assert( UseSSE >= 2, "supported cpu only" ); 6178 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 6179 movdl(xtmp, value); 6180 if (UseAVX >= 2 && UseUnalignedLoadStores) { 6181 // Fill 64-byte chunks 6182 Label L_fill_64_bytes_loop, L_check_fill_32_bytes; 6183 vpbroadcastd(xtmp, xtmp); 6184 6185 subl(count, 16 << shift); 6186 jcc(Assembler::less, L_check_fill_32_bytes); 6187 align(16); 6188 6189 BIND(L_fill_64_bytes_loop); 6190 vmovdqu(Address(to, 0), xtmp); 6191 vmovdqu(Address(to, 32), xtmp); 6192 addptr(to, 64); 6193 subl(count, 16 << shift); 6194 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); 6195 6196 BIND(L_check_fill_32_bytes); 6197 addl(count, 8 << shift); 6198 jccb(Assembler::less, L_check_fill_8_bytes); 6199 vmovdqu(Address(to, 0), xtmp); 6200 addptr(to, 32); 6201 subl(count, 8 << shift); 6202 6203 BIND(L_check_fill_8_bytes); 6204 // clean upper bits of YMM registers 6205 vzeroupper(); 6206 } else { 6207 // Fill 32-byte chunks 6208 pshufd(xtmp, xtmp, 0); 6209 6210 subl(count, 8 << shift); 6211 jcc(Assembler::less, L_check_fill_8_bytes); 6212 align(16); 6213 6214 BIND(L_fill_32_bytes_loop); 6215 6216 if (UseUnalignedLoadStores) { 6217 movdqu(Address(to, 0), xtmp); 6218 movdqu(Address(to, 16), xtmp); 6219 } else { 6220 movq(Address(to, 0), xtmp); 6221 movq(Address(to, 8), xtmp); 6222 movq(Address(to, 16), xtmp); 6223 movq(Address(to, 24), xtmp); 6224 } 6225 6226 addptr(to, 32); 6227 subl(count, 8 << shift); 6228 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 6229 6230 BIND(L_check_fill_8_bytes); 6231 } 6232 addl(count, 8 << shift); 6233 jccb(Assembler::zero, L_exit); 6234 jmpb(L_fill_8_bytes); 6235 6236 // 6237 // length is too short, just fill qwords 6238 // 6239 BIND(L_fill_8_bytes_loop); 6240 movq(Address(to, 0), xtmp); 6241 addptr(to, 8); 6242 BIND(L_fill_8_bytes); 6243 subl(count, 1 << (shift + 1)); 6244 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 6245 } 6246 } 6247 // fill trailing 4 bytes 6248 BIND(L_fill_4_bytes); 6249 testl(count, 1<<shift); 6250 jccb(Assembler::zero, L_fill_2_bytes); 6251 movl(Address(to, 0), value); 6252 if (t == T_BYTE || t == T_SHORT) { 6253 addptr(to, 4); 6254 BIND(L_fill_2_bytes); 6255 // fill trailing 2 bytes 6256 testl(count, 1<<(shift-1)); 6257 jccb(Assembler::zero, L_fill_byte); 6258 movw(Address(to, 0), value); 6259 if (t == T_BYTE) { 6260 addptr(to, 2); 6261 BIND(L_fill_byte); 6262 // fill trailing byte 6263 testl(count, 1); 6264 jccb(Assembler::zero, L_exit); 6265 movb(Address(to, 0), value); 6266 } else { 6267 BIND(L_fill_byte); 6268 } 6269 } else { 6270 BIND(L_fill_2_bytes); 6271 } 6272 BIND(L_exit); 6273 } 6274 6275 // encode char[] to byte[] in ISO_8859_1 6276 void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, 6277 XMMRegister tmp1Reg, XMMRegister tmp2Reg, 6278 XMMRegister tmp3Reg, XMMRegister tmp4Reg, 6279 Register tmp5, Register result) { 6280 // rsi: src 6281 // rdi: dst 6282 // rdx: len 6283 // rcx: tmp5 6284 // rax: result 6285 ShortBranchVerifier sbv(this); 6286 assert_different_registers(src, dst, len, tmp5, result); 6287 Label L_done, L_copy_1_char, L_copy_1_char_exit; 6288 6289 // set result 6290 xorl(result, result); 6291 // check for zero length 6292 testl(len, len); 6293 jcc(Assembler::zero, L_done); 6294 movl(result, len); 6295 6296 // Setup pointers 6297 lea(src, Address(src, len, Address::times_2)); // char[] 6298 lea(dst, Address(dst, len, Address::times_1)); // byte[] 6299 negptr(len); 6300 6301 if (UseSSE42Intrinsics || UseAVX >= 2) { 6302 Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; 6303 Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; 6304 6305 if (UseAVX >= 2) { 6306 Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; 6307 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector 6308 movdl(tmp1Reg, tmp5); 6309 vpbroadcastd(tmp1Reg, tmp1Reg); 6310 jmpb(L_chars_32_check); 6311 6312 bind(L_copy_32_chars); 6313 vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); 6314 vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); 6315 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); 6316 vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector 6317 jccb(Assembler::notZero, L_copy_32_chars_exit); 6318 vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); 6319 vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true); 6320 vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg); 6321 6322 bind(L_chars_32_check); 6323 addptr(len, 32); 6324 jccb(Assembler::lessEqual, L_copy_32_chars); 6325 6326 bind(L_copy_32_chars_exit); 6327 subptr(len, 16); 6328 jccb(Assembler::greater, L_copy_16_chars_exit); 6329 6330 } else if (UseSSE42Intrinsics) { 6331 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector 6332 movdl(tmp1Reg, tmp5); 6333 pshufd(tmp1Reg, tmp1Reg, 0); 6334 jmpb(L_chars_16_check); 6335 } 6336 6337 bind(L_copy_16_chars); 6338 if (UseAVX >= 2) { 6339 vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32)); 6340 vptest(tmp2Reg, tmp1Reg); 6341 jccb(Assembler::notZero, L_copy_16_chars_exit); 6342 vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true); 6343 vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true); 6344 } else { 6345 if (UseAVX > 0) { 6346 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); 6347 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); 6348 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false); 6349 } else { 6350 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); 6351 por(tmp2Reg, tmp3Reg); 6352 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); 6353 por(tmp2Reg, tmp4Reg); 6354 } 6355 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector 6356 jccb(Assembler::notZero, L_copy_16_chars_exit); 6357 packuswb(tmp3Reg, tmp4Reg); 6358 } 6359 movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg); 6360 6361 bind(L_chars_16_check); 6362 addptr(len, 16); 6363 jccb(Assembler::lessEqual, L_copy_16_chars); 6364 6365 bind(L_copy_16_chars_exit); 6366 if (UseAVX >= 2) { 6367 // clean upper bits of YMM registers 6368 vzeroupper(); 6369 } 6370 subptr(len, 8); 6371 jccb(Assembler::greater, L_copy_8_chars_exit); 6372 6373 bind(L_copy_8_chars); 6374 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); 6375 ptest(tmp3Reg, tmp1Reg); 6376 jccb(Assembler::notZero, L_copy_8_chars_exit); 6377 packuswb(tmp3Reg, tmp1Reg); 6378 movq(Address(dst, len, Address::times_1, -8), tmp3Reg); 6379 addptr(len, 8); 6380 jccb(Assembler::lessEqual, L_copy_8_chars); 6381 6382 bind(L_copy_8_chars_exit); 6383 subptr(len, 8); 6384 jccb(Assembler::zero, L_done); 6385 } 6386 6387 bind(L_copy_1_char); 6388 load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); 6389 testl(tmp5, 0xff00); // check if Unicode char 6390 jccb(Assembler::notZero, L_copy_1_char_exit); 6391 movb(Address(dst, len, Address::times_1, 0), tmp5); 6392 addptr(len, 1); 6393 jccb(Assembler::less, L_copy_1_char); 6394 6395 bind(L_copy_1_char_exit); 6396 addptr(result, len); // len is negative count of not processed elements 6397 bind(L_done); 6398 } 6399 6400 /** 6401 * Emits code to update CRC-32 with a byte value according to constants in table 6402 * 6403 * @param [in,out]crc Register containing the crc. 6404 * @param [in]val Register containing the byte to fold into the CRC. 6405 * @param [in]table Register containing the table of crc constants. 6406 * 6407 * uint32_t crc; 6408 * val = crc_table[(val ^ crc) & 0xFF]; 6409 * crc = val ^ (crc >> 8); 6410 * 6411 */ 6412 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 6413 xorl(val, crc); 6414 andl(val, 0xFF); 6415 shrl(crc, 8); // unsigned shift 6416 xorl(crc, Address(table, val, Address::times_4, 0)); 6417 } 6418 6419 /** 6420 * Fold 128-bit data chunk 6421 */ 6422 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { 6423 vpclmulhdq(xtmp, xK, xcrc); // [123:64] 6424 vpclmulldq(xcrc, xK, xcrc); // [63:0] 6425 vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); 6426 pxor(xcrc, xtmp); 6427 } 6428 6429 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { 6430 vpclmulhdq(xtmp, xK, xcrc); 6431 vpclmulldq(xcrc, xK, xcrc); 6432 pxor(xcrc, xbuf); 6433 pxor(xcrc, xtmp); 6434 } 6435 6436 /** 6437 * 8-bit folds to compute 32-bit CRC 6438 * 6439 * uint64_t xcrc; 6440 * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8); 6441 */ 6442 void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) { 6443 movdl(tmp, xcrc); 6444 andl(tmp, 0xFF); 6445 movdl(xtmp, Address(table, tmp, Address::times_4, 0)); 6446 psrldq(xcrc, 1); // unsigned shift one byte 6447 pxor(xcrc, xtmp); 6448 } 6449 6450 /** 6451 * uint32_t crc; 6452 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 6453 */ 6454 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { 6455 movl(tmp, crc); 6456 andl(tmp, 0xFF); 6457 shrl(crc, 8); 6458 xorl(crc, Address(table, tmp, Address::times_4, 0)); 6459 } 6460 6461 /** 6462 * @param crc register containing existing CRC (32-bit) 6463 * @param buf register pointing to input byte buffer (byte*) 6464 * @param len register containing number of bytes 6465 * @param table register that will contain address of CRC table 6466 * @param tmp scratch register 6467 */ 6468 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) { 6469 assert_different_registers(crc, buf, len, table, tmp, rax); 6470 6471 Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned; 6472 Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop; 6473 6474 lea(table, ExternalAddress(StubRoutines::crc_table_addr())); 6475 notl(crc); // ~crc 6476 cmpl(len, 16); 6477 jcc(Assembler::less, L_tail); 6478 6479 // Align buffer to 16 bytes 6480 movl(tmp, buf); 6481 andl(tmp, 0xF); 6482 jccb(Assembler::zero, L_aligned); 6483 subl(tmp, 16); 6484 addl(len, tmp); 6485 6486 align(4); 6487 BIND(L_align_loop); 6488 movsbl(rax, Address(buf, 0)); // load byte with sign extension 6489 update_byte_crc32(crc, rax, table); 6490 increment(buf); 6491 incrementl(tmp); 6492 jccb(Assembler::less, L_align_loop); 6493 6494 BIND(L_aligned); 6495 movl(tmp, len); // save 6496 shrl(len, 4); 6497 jcc(Assembler::zero, L_tail_restore); 6498 6499 // Fold crc into first bytes of vector 6500 movdqa(xmm1, Address(buf, 0)); 6501 movdl(rax, xmm1); 6502 xorl(crc, rax); 6503 pinsrd(xmm1, crc, 0); 6504 addptr(buf, 16); 6505 subl(len, 4); // len > 0 6506 jcc(Assembler::less, L_fold_tail); 6507 6508 movdqa(xmm2, Address(buf, 0)); 6509 movdqa(xmm3, Address(buf, 16)); 6510 movdqa(xmm4, Address(buf, 32)); 6511 addptr(buf, 48); 6512 subl(len, 3); 6513 jcc(Assembler::lessEqual, L_fold_512b); 6514 6515 // Fold total 512 bits of polynomial on each iteration, 6516 // 128 bits per each of 4 parallel streams. 6517 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32)); 6518 6519 align(32); 6520 BIND(L_fold_512b_loop); 6521 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); 6522 fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16); 6523 fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32); 6524 fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48); 6525 addptr(buf, 64); 6526 subl(len, 4); 6527 jcc(Assembler::greater, L_fold_512b_loop); 6528 6529 // Fold 512 bits to 128 bits. 6530 BIND(L_fold_512b); 6531 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); 6532 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2); 6533 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3); 6534 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4); 6535 6536 // Fold the rest of 128 bits data chunks 6537 BIND(L_fold_tail); 6538 addl(len, 3); 6539 jccb(Assembler::lessEqual, L_fold_128b); 6540 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); 6541 6542 BIND(L_fold_tail_loop); 6543 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); 6544 addptr(buf, 16); 6545 decrementl(len); 6546 jccb(Assembler::greater, L_fold_tail_loop); 6547 6548 // Fold 128 bits in xmm1 down into 32 bits in crc register. 6549 BIND(L_fold_128b); 6550 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); 6551 vpclmulqdq(xmm2, xmm0, xmm1, 0x1); 6552 vpand(xmm3, xmm0, xmm2, false /* vector256 */); 6553 vpclmulqdq(xmm0, xmm0, xmm3, 0x1); 6554 psrldq(xmm1, 8); 6555 psrldq(xmm2, 4); 6556 pxor(xmm0, xmm1); 6557 pxor(xmm0, xmm2); 6558 6559 // 8 8-bit folds to compute 32-bit CRC. 6560 for (int j = 0; j < 4; j++) { 6561 fold_8bit_crc32(xmm0, table, xmm1, rax); 6562 } 6563 movdl(crc, xmm0); // mov 32 bits to general register 6564 for (int j = 0; j < 4; j++) { 6565 fold_8bit_crc32(crc, table, rax); 6566 } 6567 6568 BIND(L_tail_restore); 6569 movl(len, tmp); // restore 6570 BIND(L_tail); 6571 andl(len, 0xf); 6572 jccb(Assembler::zero, L_exit); 6573 6574 // Fold the rest of bytes 6575 align(4); 6576 BIND(L_tail_loop); 6577 movsbl(rax, Address(buf, 0)); // load byte with sign extension 6578 update_byte_crc32(crc, rax, table); 6579 increment(buf); 6580 decrementl(len); 6581 jccb(Assembler::greater, L_tail_loop); 6582 6583 BIND(L_exit); 6584 notl(crc); // ~c 6585 } 6586 6587 #undef BIND 6588 #undef BLOCK_COMMENT 6589 6590 6591 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 6592 switch (cond) { 6593 // Note some conditions are synonyms for others 6594 case Assembler::zero: return Assembler::notZero; 6595 case Assembler::notZero: return Assembler::zero; 6596 case Assembler::less: return Assembler::greaterEqual; 6597 case Assembler::lessEqual: return Assembler::greater; 6598 case Assembler::greater: return Assembler::lessEqual; 6599 case Assembler::greaterEqual: return Assembler::less; 6600 case Assembler::below: return Assembler::aboveEqual; 6601 case Assembler::belowEqual: return Assembler::above; 6602 case Assembler::above: return Assembler::belowEqual; 6603 case Assembler::aboveEqual: return Assembler::below; 6604 case Assembler::overflow: return Assembler::noOverflow; 6605 case Assembler::noOverflow: return Assembler::overflow; 6606 case Assembler::negative: return Assembler::positive; 6607 case Assembler::positive: return Assembler::negative; 6608 case Assembler::parity: return Assembler::noParity; 6609 case Assembler::noParity: return Assembler::parity; 6610 } 6611 ShouldNotReachHere(); return Assembler::overflow; 6612 } 6613 6614 SkipIfEqual::SkipIfEqual( 6615 MacroAssembler* masm, const bool* flag_addr, bool value) { 6616 _masm = masm; 6617 _masm->cmp8(ExternalAddress((address)flag_addr), value); 6618 _masm->jcc(Assembler::equal, _label); 6619 } 6620 6621 SkipIfEqual::~SkipIfEqual() { 6622 _masm->bind(_label); 6623 }